X-Git-Url: https://www.fi.muni.cz/~kas/git//home/kas/public_html/git/?a=blobdiff_plain;f=pan13-poster%2Fposter.tex;h=5e3c9a095b4e30283e5c9b63b798d7325becc268;hb=b92a122ec0f3aca815db768cbd5ff1cde427cd38;hp=da045962ab135c7d43dfeeae5ae1a2f1449f9c0b;hpb=5e0e3911225801250c236c193d7a8b2259c74cd9;p=pan13-paper.git

diff --git a/pan13-poster/poster.tex b/pan13-poster/poster.tex
index da04596..5e3c9a0 100755
--- a/pan13-poster/poster.tex
+++ b/pan13-poster/poster.tex
@@ -7,8 +7,8 @@
 \usepackage{bera}
 \usepackage[utf8]{inputenc}
 %\usepackage{fancybullets}
-\usepackage{floatflt}
-\usepackage{graphics}
+%\usepackage{floatflt}
+%\usepackage{graphics}
 
 \definecolor{BoxCol}{rgb}{0.9,0.9,1}
 % uncomment for light blue background to \section boxes 
@@ -116,38 +116,32 @@
 
 
 \begin{multicols}{2}\setlength{\columnseprule}{0pt}
-
-
 \section{Introduction}
+%
 PAN 2013 LOrem ipsum Lorem ipsum Lorem ipsumLorem ipsumLorem ipsumLorem ipsumLorem ipsum 
-
-
-
+%
+\vfill
+\columnbreak
+%
 \begin{figure}
  \centering
-  \includegraphics[width=0.8\textwidth]{img/source_retrieval_process.pdf}
+  \includegraphics[width=0.6\textwidth]{img/source_retrieval_process.pdf}
   \caption{Plagiarism discovery process.}
   \label{fig:process}
 \end{figure} 
-
-
 \end{multicols}
-
-
-
 \begin{multicols}{2}
-
 %\rm
-
 %%% Introduction
 \section{Querying}
 Querying means to effectively utilize the search engine in order to retrieve as many relevant
 documents as possible with the minimum amount of queries.
 %We consider the resulting document relevantif it shares some of text characteristics with the suspicious document.
-In real-world queries as such represent appreciable cost, therefore their minimization should be one of the top priorities. \\
-\subsection{Types of Queries}
-From the suspicious document, there were three diverse types of queries extracted.
-\subsubsection{Keywords Based Queries}
+In real-world queries as such represent appreciable cost, therefore their minimization should be one of the top priorities. 
+%\subsection{Types of Queries}
+From the suspicious document, there were three diverse types of queries extracted.\\
+\begin{minipage}{0.55\linewidth}
+\subsection{Keywords Based Queries}
 \begin{ytemize}
 \item TF--IDF base automated keywords extraction;
 \item 5-token long; 
@@ -155,7 +149,15 @@ From the suspicious document, there were three diverse types of queries extracte
 \item Non-positional;
 \item Non-phrasal.
 \end{ytemize}
-\subsubsection{Intrinsic Plagiarism Based Queries}
+\end{minipage}
+\begin{minipage}{0.45\linewidth}
+\begin{figure}[h]
+ %\centering
+  \includegraphics[width=1\linewidth]{img/document_keywords.pdf}
+\end{figure}
+\end{minipage}
+\begin{minipage}{0.55\linewidth}
+\subsection{Intrinsic Plagiarism Based Queries}
 \begin{ytemize}
 \item Averaged Word Frequency Class based chunking~\cite{AWFC};
 \item Random sentence selection from the chunk;
@@ -163,21 +165,42 @@ From the suspicious document, there were three diverse types of queries extracte
 \item Positional;
 \item Phrasal.
 \end{ytemize}
-
-\begin{floatingfigure}[r]{100pt}
- \centering
-  \includegraphics[width=0.4\textwidth]{img/document_awfc.pdf}
-\end{floatingfigure}
-
-\subsubsection{Paragraph Based Queries}
+\end{minipage}
+\begin{minipage}{0.45\linewidth}
+\begin{figure}[h]
+ %\centering
+  \includegraphics[width=1\linewidth]{img/document_awfc.pdf}
+\end{figure}
+\end{minipage}
+\begin{minipage}{0.55\linewidth}
+\subsection{Paragraph Based Queries}
 \begin{ytemize}
 \item Longest sentences from miscellaneous paragraphs;
 \item Deterministic;
 \item Positional;
 \item Phrasal.
 \end{ytemize}
+\end{minipage}
+\begin{minipage}{0.45\linewidth}
+\begin{figure}[h]
+ %\centering
+  \includegraphics[width=1\linewidth]{img/document_paragraphs.pdf}
+\end{figure}
+\end{minipage}
+
+\begin{figure}[h]
+ \centering
+  \includegraphics[width=0.8\linewidth]{img/queryprocess.pdf}
+   \caption{Stepwise queries execution process.}
+\end{figure}
 
 \section{Selecting}
+Document snippets were used for deciding whether to download the document for the text alignment.
+We used 2-tuples measurement, which indicates how many neighbouring word pairs coexist in the snippet and in the suspicious document.
+Performance of this measure is depicted at picture~\ref{fig:snippet_graph}.
+Having this measure, a threshold for download decision needs to be set in order to maximize all discovered similarities
+and minimize total downloads.
+A profitable threshold is such that matches with the largest distance between those two curves.
 \begin{figure}
   \centering
   \includegraphics[width=0.8\textwidth]{img/snippets_graph.pdf}
@@ -185,8 +208,17 @@ From the suspicious document, there were three diverse types of queries extracte
   \label{fig:snippet_graph}
 \end{figure}
 
+
+%
+% Yenyova cast
+%
+
 \section{Text Alignment}
 
+%
+% Spolecna cast
+%
+
 \section{Conclusion}
 
 NÄjakÃ½ zÃ¡vÄr