1. verze hotove Simonovy casti

author Simon Suchomel <xsuchom1@anxur.fi.muni.cz>

Thu, 19 Sep 2013 13:19:02 +0000 (15:19 +0200)

committer Simon Suchomel <xsuchom1@anxur.fi.muni.cz>

Thu, 19 Sep 2013 13:19:02 +0000 (15:19 +0200)
author Simon Suchomel <xsuchom1@anxur.fi.muni.cz>
Thu, 19 Sep 2013 13:19:02 +0000 (15:19 +0200)
committer Simon Suchomel <xsuchom1@anxur.fi.muni.cz>
Thu, 19 Sep 2013 13:19:02 +0000 (15:19 +0200)
diff --git a/pan13-poster/img/document_awfc.pdf b/pan13-poster/img/document_awfc.pdf

index 71e5eff24ad5ead635929ff6c835515cff6fed80..0a48308dcf5b357cfa80d9e315ae3fd9f64ccc11 100755 (executable)

Binary files a/pan13-poster/img/document_awfc.pdf and b/pan13-poster/img/document_awfc.pdf differ
diff --git a/pan13-poster/img/document_keywords.pdf b/pan13-poster/img/document_keywords.pdf

new file mode 100755 (executable)

index 0000000..f60baf6

Binary files /dev/null and b/pan13-poster/img/document_keywords.pdf differ
diff --git a/pan13-poster/img/document_paragraphs.pdf b/pan13-poster/img/document_paragraphs.pdf

new file mode 100755 (executable)

index 0000000..38c4372

Binary files /dev/null and b/pan13-poster/img/document_paragraphs.pdf differ
diff --git a/pan13-poster/img/queryprocess.pdf b/pan13-poster/img/queryprocess.pdf

new file mode 100755 (executable)

index 0000000..e6d8a1a

Binary files /dev/null and b/pan13-poster/img/queryprocess.pdf differ
diff --git a/pan13-poster/poster.tex b/pan13-poster/poster.tex

index 42987ac9382911afc72bf535f6f37347ffc51ab8..5e3c9a095b4e30283e5c9b63b798d7325becc268 100755 (executable)
--- a/pan13-poster/poster.tex
+++ b/pan13-poster/poster.tex
@@ -116,41 +116,32 @@
  \r
  \r
  \begin{multicols}{2}\setlength{\columnseprule}{0pt}\r
-\r
-\r
  \section{Introduction}\r
-\r
+%\r
  PAN 2013 LOrem ipsum Lorem ipsum Lorem ipsumLorem ipsumLorem ipsumLorem ipsumLorem ipsum \r
-\r
-\r
+%\r
  \vfill\r
  \columnbreak\r
-\r
+%\r
  \begin{figure}\r
   \centering\r
-  \includegraphics[width=0.8\textwidth]{img/source_retrieval_process.pdf}\r
+  \includegraphics[width=0.6\textwidth]{img/source_retrieval_process.pdf}\r
    \caption{Plagiarism discovery process.}\r
    \label{fig:process}\r
  \end{figure} \r
-\r
-\r
  \end{multicols}\r
-\r
-\r
-\r
  \begin{multicols}{2}\r
-\r
  %\rm\r
-\r
  %%% Introduction\r
  \section{Querying}\r
  Querying means to effectively utilize the search engine in order to retrieve as many relevant\r
  documents as possible with the minimum amount of queries.\r
  %We consider the resulting document relevantif it shares some of text characteristics with the suspicious document.\r
-In real-world queries as such represent appreciable cost, therefore their minimization should be one of the top priorities. \\\r
-\subsection{Types of Queries}\r
-From the suspicious document, there were three diverse types of queries extracted.\r
-\subsubsection{Keywords Based Queries}\r
+In real-world queries as such represent appreciable cost, therefore their minimization should be one of the top priorities. \r
+%\subsection{Types of Queries}\r
+From the suspicious document, there were three diverse types of queries extracted.\\\r
+\begin{minipage}{0.55\linewidth}\r
+\subsection{Keywords Based Queries}\r
  \begin{ytemize}\r
  \item TF--IDF base automated keywords extraction;\r
  \item 5-token long; \r
@@ -158,9 +149,15 @@ From the suspicious document, there were three diverse types of queries extracte
  \item Non-positional;\r
  \item Non-phrasal.\r
  \end{ytemize}\r
-\r
+\end{minipage}\r
+\begin{minipage}{0.45\linewidth}\r
+\begin{figure}[h]\r
+ %\centering\r
+  \includegraphics[width=1\linewidth]{img/document_keywords.pdf}\r
+\end{figure}\r
+\end{minipage}\r
  \begin{minipage}{0.55\linewidth}\r
-\subsubsection{Intrinsic Plagiarism Based Queries}\r
+\subsection{Intrinsic Plagiarism Based Queries}\r
  \begin{ytemize}\r
  \item Averaged Word Frequency Class based chunking~\cite{AWFC};\r
  \item Random sentence selection from the chunk;\r
@@ -175,16 +172,35 @@ From the suspicious document, there were three diverse types of queries extracte
    \includegraphics[width=1\linewidth]{img/document_awfc.pdf}\r
  \end{figure}\r
  \end{minipage}\r
-\r
-\subsubsection{Paragraph Based Queries}\r
+\begin{minipage}{0.55\linewidth}\r
+\subsection{Paragraph Based Queries}\r
  \begin{ytemize}\r
  \item Longest sentences from miscellaneous paragraphs;\r
  \item Deterministic;\r
  \item Positional;\r
  \item Phrasal.\r
  \end{ytemize}\r
+\end{minipage}\r
+\begin{minipage}{0.45\linewidth}\r
+\begin{figure}[h]\r
+ %\centering\r
+  \includegraphics[width=1\linewidth]{img/document_paragraphs.pdf}\r
+\end{figure}\r
+\end{minipage}\r
+\r
+\begin{figure}[h]\r
+ \centering\r
+  \includegraphics[width=0.8\linewidth]{img/queryprocess.pdf}\r
+   \caption{Stepwise queries execution process.}\r
+\end{figure}\r
  \r
  \section{Selecting}\r
+Document snippets were used for deciding whether to download the document for the text alignment.\r
+We used 2-tuples measurement, which indicates how many neighbouring word pairs coexist in the snippet and in the suspicious document.\r
+Performance of this measure is depicted at picture~\ref{fig:snippet_graph}.\r
+Having this measure, a threshold for download decision needs to be set in order to maximize all discovered similarities\r
+and minimize total downloads.\r
+A profitable threshold is such that matches with the largest distance between those two curves.\r
  \begin{figure}\r
    \centering\r
    \includegraphics[width=0.8\textwidth]{img/snippets_graph.pdf}\r
@@ -192,6 +208,7 @@ From the suspicious document, there were three diverse types of queries extracte
    \label{fig:snippet_graph}\r
  \end{figure}\r
  \r
+\r
  %\r
  % Yenyova cast\r
  %\r
author	Simon Suchomel <xsuchom1@anxur.fi.muni.cz>
	Thu, 19 Sep 2013 13:19:02 +0000 (15:19 +0200)
committer	Simon Suchomel <xsuchom1@anxur.fi.muni.cz>
	Thu, 19 Sep 2013 13:19:02 +0000 (15:19 +0200)
pan13-poster/img/document_awfc.pdf		patch \| blob \| history
pan13-poster/img/document_keywords.pdf	[new file with mode: 0755]	patch \| blob
pan13-poster/img/document_paragraphs.pdf	[new file with mode: 0755]	patch \| blob
pan13-poster/img/queryprocess.pdf	[new file with mode: 0755]	patch \| blob
pan13-poster/poster.tex		patch \| blob \| history