2 @INPROCEEDINGS{Kasprzak2008,
\r
3 AUTHOR = "Jan Kasprzak and Michal Brandejs and Miroslav K\v{r}ipa\v{c} and Pavel {\v S}merk",
\r
4 TITLE = "Distributed System for Discovering Similar Documents",
\r
5 SUBTITLE = "From a Relational Database to the Custom-Developed Parallel solution",
\r
6 BOOKTITLE = "ICEIS 2008: Proceedings of the Tenth International Conference on Enterprise Information Systems, Vol. DISI---Databases and Informations Systems Integration",
\r
8 publisher = "INSTICC (Institute for Systems and Technologies of Information, Control and Communication), Setúbal, Portugal",
\r
10 isbn = "978-989-8111-36-4",
\r
14 @INPROCEEDINGS{Kasprzak2009,
\r
15 AUTHOR = "Jan Kasprzak and Michal Brandejs and Jitka Brandejsová",
\r
16 TITLE = "Distributed Aspects of the System for Discovering Similar Documents",
\r
17 BOOKTITLE = "ITA 09: Proceedings of the Third International Conference on Internet Technology and Applications",
\r
22 @INPROCEEDINGS{Kasprzak2009a,
\r
23 AUTHOR = "Jan Kasprzak and Michal Brandejs and Miroslav Křipač",
\r
24 TITLE = "Finding Plagiarism by Evaluating Document Similarities",
\r
25 BOOKTITLE = "SEPLN'09: The 25th edition of the Annual Conference of the Spanish Society for Natural Language Processing",
\r
30 @INPROCEEDINGS{Monostori2002,
\r
31 author = {Kriszti\'{a}n Monostori and Raphael A. Finkel and Arkady B. Zaslavsky and G\'{a}bor Hod\'{a}sz and M\'{a}t\'{e} Pataki},
\r
32 title = {Comparison of Overlap Detection Techniques},
\r
33 booktitle = {ICCS '02: Proceedings of the International Conference on Computational Science-Part I},
\r
35 isbn = {3-540-43591-3},
\r
37 publisher = {Springer-Verlag},
\r
38 address = {London, UK},
\r
43 title = "{Czech National Archive of Graduate Theses}",
\r
44 howpublished = "\url{http://theses.cz/}",
\r
45 year = "2008--2011",
\r
50 title = "{Masaryk University Information System}",
\r
51 howpublished = "\url{http://is.muni.cz/}",
\r
52 year = "1999--2011",
\r
56 key = "{Odevzdej.CZ}",
\r
57 title = "{Odevzdej---the system for collecting seminar works}",
\r
58 howpublished = "\url{http://odevzdej.cz/}",
\r
59 year = "2009--2011",
\r
63 @inproceedings{finkel2002,
\r
64 author = {Finkel, Raphael A. and Zaslavsky, Arkady and Monostori, Kriszti\'{a}n and Schmidt, Heinz},
\r
65 title = {Signature extraction for overlap detection in documents},
\r
66 booktitle = {ACSC '02: Proceedings of the twenty-fifth Australasian conference on Computer science},
\r
68 isbn = {0-909925-82-8},
\r
70 location = {Melbourne, Victoria, Australia},
\r
71 publisher = {Australian Computer Society, Inc.},
\r
72 address = {Darlinghurst, Australia},
\r
75 @INPROCEEDINGS{broder97,
\r
76 title={On the resemblance and containment of documents},
\r
77 author={Broder, A.Z.},
\r
78 booktitle={Compression and Complexity of Sequences 1997. Proceedings},
\r
84 keywords={information retrieval, random processes, set theoryRabin fingerprints, World Wide Web, containment, documents, fixed size sample, informal notions, information retrieval, intersection problems, mathematical notions, mathematical properties, random sampling, resemblance, roughly contained, roughly the same},
\r
85 doi={10.1109/SEQUEN.1997.666900},
\r
90 author="{Rivest, R.}",
\r
91 title="{RFC1321: The MD5 Message-Digest Algorithm}",
\r
93 publisher = {RFC Editor},
\r
94 address = {United States},
\r
95 note={\url{http://www.rfc-editor.org/rfc/rfc1321.txt}},
\r
98 @Misc{britannicaplagiarism,
\r
99 author = "Encyclop\ae{}dia Britannica",
\r
100 title = "Plagiarism",
\r
101 howpublished = "retrieved 2009--08--24 from \url{http://www.britannica.com/EBchecked/topic/462640/plagiarism}",
\r
106 author = "iDnes.CZ",
\r
107 title = "Zlínského děkana usvědčili z plagiátorství",
\r
108 howpublished = "retrieved 2009--08--25 from \url{http://zpravy.idnes.cz/studium.asp?c=A080709_085836_studium_bar}",
\r
112 @inproceedings{pomikalek2008,
\r
113 author = "Pomikálek, Jan and Rychlý, Pavel",
\r
114 title = "Detecting Co-Derivative Documents in Large Text Collections",
\r
115 booktitle = "Proceedings of the Sixth International Language Resources and Evaluation (LREC'08)",
\r
118 address = "Marrakech, Morocco",
\r
119 url = "http://www.lrec-conf.org/lrec2008/"
\r
122 @inproceedings{pomikalek2009,
\r
123 author = "Pomikálek, Jan and Rychlý, Pavel and Kilgarriff, Adam",
\r
124 title = "Scaling to Billion-plus Word Corpora",
\r
125 booktitle = "Advances in Computational Linguistics",
\r
128 address = "Mexico",
\r
129 issn = "1870-4069",
\r
130 publisher = "Instituto Politécnico Nacional",
\r
135 title = "Masaryk University: Full-text search",
\r
136 howpublished = "retrieved 2009--08--25 from \url{http://www.muni.cz/general/search}",
\r
141 author = "Alan Cox",
\r
142 title = "{Alan Cox talks about laws... and Linux}",
\r
143 howpublished = "retrieved 2009--08--27 from \url{http://interviews.slashdot.org/article.pl?sid=02/05/20/1314214}",
\r
147 @inproceedings{coderivative,
\r
148 Author = {Bernstein, Y and Zobel, J},
\r
149 Title = {{A Scalable System for Identifying Co-derivative Documents}},
\r
150 Booktitle = {{String Processing and Information Retrieval, Proceedings}},
\r
151 Series = {{Lecture Notes in Computer Science}},
\r
155 Publisher = {{Springer-Verlag Berlin}},
\r
156 Type = {{Proceedings Paper}},
\r
157 Language = {{English}},
\r
158 Affiliation = {{Bernstein, Y (Reprint Author), RMIT Univ, Sch Comp Sci \& Informat Technol, Melbourne, Vic, Australia.
\r
159 RMIT Univ, Sch Comp Sci \& Informat Technol, Melbourne, Vic, Australia.}},
\r
160 ISSN = {{0302-9743}},
\r
161 ISBN = {{3-540-23210-9}},
\r
162 Keywords-Plus = {{COMPRESSION}},
\r
163 Subject-Category = {{Computer Science, Theory \& Methods}},
\r
164 Author-Email = {{ybernste@cs.rmit.edu.au
\r
165 jz@cs.rmit.edu.au}},
\r
166 Number-of-Cited-References = {{17}},
\r
167 Times-Cited = {{2}},
\r
168 Doc-Delivery-Number = {{BBA15}},
\r
169 Unique-ID = {{ISI:000224377200006}},
\r
173 key = "{Turnitin.com}",
\r
174 title = "Turnitin",
\r
175 howpublished = "\url{http://turnitin.com/}, retrieved 2009--08--26",
\r
180 key = "{Copyscape.com}",
\r
181 title = "Copyscape",
\r
182 howpublished = "\url{http://copyscape.com/}, retrieved 2009--08--26",
\r
187 key = "{Doccop.com}",
\r
189 howpublished = "\url{http://doccop.com/}, retrieved 2009--08--26",
\r
193 @MISC{pan09competition,
\r
195 title = "1st International Competition on Plagiarism Detection",
\r
196 howpublished = "\url{http://www.uni-weimar.de/medien/webis/research/workshopseries/pan-09/competition.html}, retrieved 2009--08--26",
\r
200 @INPROCEEDINGS{Brin95copydetection,
\r
201 author = {Sergey Brin and James Davis and Hector Garcia-Molina},
\r
202 title = {Copy Detection Mechanisms for Digital Documents},
\r
203 booktitle = {Proceedings of the ACM SIGMOD Annual Conference},
\r
208 @INPROCEEDINGS{Shivakumar95scam,
\r
209 author = {Narayanan Shivakumar and Hector Garcia-Molina},
\r
210 title = {SCAM: A Copy Detection Mechanism for Digital Documents},
\r
211 booktitle = {Proceedings of the Second Annual Conference on the Theory and Practice of Digital Libraries},
\r
215 @INPROCEEDINGS{Garcia-Molina96dscam:finding,
\r
216 author = {Hector Garcia-Molina and Luis Gravano and Narayanan Shivakumar},
\r
217 title = {dSCAM: Finding Document Copies across Multiple Databases},
\r
218 booktitle = {In Proceedings of the 4th International Conference on Parallel and Distributed Information Systems},
\r
222 @INPROCEEDINGS{LinuxDesktop,
\r
223 author = {Jan Kasprzak},
\r
224 title = "{Desktop a jádro Linuxu}",
\r
225 booktitle = {Proceedings of the XXXI EurOpen.CZ Conference},
\r
226 isbn = "978-80-86583-13-6",
\r
228 publisher = "EurOpen.CZ, Plzeň",
\r
232 @INPROCEEDINGS{Filesystems,
\r
233 author = {Jan Kasprzak},
\r
234 title = "{Co umí souborové systémy}",
\r
235 booktitle = {Proceedings of the XXXII EurOpen.CZ Conference},
\r
236 isbn = "978-80-86583-14-3",
\r
237 pages = {105--118},
\r
238 publisher = "EurOpen.CZ, Plzeň",
\r
242 @INPROCEEDINGS{GitEuropen,
\r
243 author = {Jan Kasprzak},
\r
244 title = "{Git aneb správa verzí trochu jinak}",
\r
245 booktitle = {Proceedings of the XXXIV EurOpen.CZ Conference},
\r
246 isbn = "978-80-86583-16-7",
\r
247 pages = {107--118},
\r
248 publisher = "EurOpen.CZ, Plzeň",
\r
252 @INPROCEEDINGS{Clusters,
\r
253 author = {Jan Kasprzak},
\r
254 title = "{Clusterová řešení pod Linuxem}",
\r
255 booktitle = {SLT 2001: Proceedings of the 2nd Seminar on Linux and \TeX},
\r
256 isbn = "80-7302-009-2",
\r
257 pages = {161--168},
\r
258 publisher = "Konvoj, Brno",
\r
263 AUTHOR = {{Webis at Bauhaus-Universität Weimar} and
\r
264 {NLEL at Universidad Polytécnica de Valencia}},
\r
265 HOWPUBLISHED = {\url{http://www.webis.de/research/corpora}},
\r
266 TITLE = {{PAN Plagiarism Corpus 2009 (PAN-PC-09)}},
\r
268 NOTE = {{Martin Potthast, Andreas Eiselt, Benno Stein,
\r
269 Alberto Barrón-Cedeño, and Paolo Rosso (editors)}}
\r
272 @INPROCEEDINGS{ngram,
\r
273 author = {William B. Cavnar and John M. Trenkle},
\r
274 title = {N-Gram-Based Text Categorization},
\r
275 booktitle = {In Proceedings of SDAIR-94, 3rd Annual Symposium on Document Analysis and Information Retrieval},
\r
280 @inproceedings{intrinsic,
\r
281 author = {Sven Meyer zu Eissen and Benno Stein},
\r
282 booktitle = {ECIR},
\r
283 editor = {Mounia Lalmas and Andy MacFarlane and Stefan M. Rüger and Anastasios Tombros and Theodora Tsikrika and Alexei Yavlinsky},
\r
285 publisher = {Springer},
\r
286 series = {Lecture Notes in Computer Science},
\r
287 title = {Intrinsic Plagiarism Detection.},
\r
288 url = {\url{http://dblp.uni-trier.de/db/conf/ecir/ecir2006.html#EissenS06}},
\r
291 ee = {http://dx.doi.org/10.1007/11735106_66},
\r
292 isbn = {3-540-33347-9},
\r
293 date = {2006-04-03}
\r
296 @INPROCEEDINGS{Heintze96scalabledocument,
\r
297 author = {Nevin Heintze},
\r
298 title = {Scalable Document Fingerprinting},
\r
299 booktitle = {In Proc. USENIX Workshop on Electronic Commerce},
\r
303 @inproceedings{suffixtree,
\r
304 author = {Manber, Udi and Myers, Gene},
\r
305 title = {Suffix arrays: a new method for on-line string searches},
\r
306 booktitle = {SODA '90: Proceedings of the first annual ACM-SIAM symposium on Discrete algorithms},
\r
308 isbn = {0-89871-251-3},
\r
309 pages = {319--327},
\r
310 location = {San Francisco, California, United States},
\r
311 publisher = {Society for Industrial and Applied Mathematics},
\r
312 address = {Philadelphia, PA, USA}
\r
318 AUTHOR = "Andrew Tridgell and Paul Mackerras",
\r
319 TITLE = "The rsync algorithm",
\r
320 DATE = "2004--05--19",
\r
322 INSTITUTION = "Department of Computer Science, FEIT, Australian National university",
\r
324 NOTE = "\url{http://hdl.handle.net/1885/40765}",
\r
329 AUTHOR = "Linus Torvalds et al",
\r
330 TITLE = "{Git---the Fast Version Control System}",
\r
331 HOWPUBLISHED = "\url{http://git-scm.com/}, retrieved 2011--01--12",
\r
336 TITLE = "{The RPM Package Manager}",
\r
337 HOWPUBLISHED = "\url{http://www.rpm.org/}, retrieved 2011-01--12",
\r
342 TITLE = "{DeltaRPM}",
\r
343 HOWPUBLISHED = "\url{ftp://ftp.suse.com/pub/projects/deltarpm/}, retrieved 2011--01--12",
\r
346 @mastersthesis{zazrivec,
\r
347 AUTHOR = "Milan Zázrivec",
\r
348 TITLE = "Algoritmus a implementace software pro tvorbu binárních záplat",
\r
349 SCHOOL = "Faculty of Informatics, Masaryk University",
\r
350 NOTE = "\url{http://is.muni.cz/th/60716/fi_m/}",
\r
355 KEY = "Google Scholar",
\r
356 TITLE = "{Google Scholar}",
\r
357 HOWPUBLISHED = "\url{http://scholar.google.com/}, retrieved 2011--01--12",
\r
362 TITLE = "{American National Standard for Information Systems -- Coded Character Sets -- 7-Bit American National Standard Code for Information Interchange (7-Bit ASCII), ANSI X3.4-1986}",
\r
363 DATE="1986--03--26",
\r
365 INSTITUTION = "American National Standards Institute, Inc.",
\r
369 AUTHOR = "The Unicode Consortium",
\r
370 TITLE = "{The Unicode Standard -- Version 4.0}",
\r
371 PUBLISHER = "Addison-Wesley, Boston, MA",
\r
373 ISBN = "0--321--18578--1",
\r
374 NOTE = "\url{http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode_4_0_0}",
\r
379 TITLE = "{RFC 3629: UTF-8, a transformation format of ISO 10646}",
\r
380 AUTHOR = "F. Yergeau",
\r
382 NOTE = "\url{http://tools.ietf.org/html/rfc3629}",
\r
385 @MISC{thesisproposal,
\r
386 TITLE = "{Systems for Discovering Similar Documents}",
\r
387 AUTHOR = "Jan Kasprzak",
\r
388 INSTITUTION = "Faculty of Informatics, Masaryk University",
\r
390 NOTE = "Ph.D. thesis proposal, \url{http://is.muni.cz/th/1885/fi_r/}",
\r
393 @inproceedings{Kasprzak2010,
\r
394 title={Improving the reliability of the plagiarism detection system},
\r
395 author={Kasprzak, J. and Brandejs, M.},
\r
396 booktitle={Notebook Papers of CLEF 2010 LABs and Workshops},
\r
398 organization={Citeseer}
\r
401 @article{stamatatos2011plagiarism,
\r
402 title={Plagiarism detection using stopword n-grams},
\r
403 author={Stamatatos, E.},
\r
404 journal={Journal of the American Society for Information Science and Technology},
\r
406 publisher={Wiley Online Library}
\r
408 @inproceedings{pan09stamatatos,
\r
409 author = {Efstathios Stamatatos},
\r
411 title = {Intrinsic Plagiarism Detection Using Character n-gram Profiles},
\r
412 booktitle = {Proceedings of the SEPLN'09 Workshop on Uncovering Plagiarism, Authorship and Social Software Misuse},
\r
414 location = {San Sebastian (Donostia), Spain},
\r
415 issn = {1613--0073},
\r
419 @article{zipf1935psycho,
\r
420 title={The psycho-biology of language.},
\r
421 author={Zipf, G.K.},
\r
423 publisher={Houghton, Mifflin}
\r
426 @INPROCEEDINGS{potthastframework,
\r
427 TITLE = {{An Evaluation Framework for Plagiarism Detection}
\r
429 AUTHOR = {Martin Potthast and Benno Stein and Alberot Barr{\'o}n-Cede{\~n}o and Paolo Rosso},
\r
430 BOOKTITLE = {Proceedings of the 23rd International Conference on Computational Linguistics (COLING 2010) (to appear)},
\r
433 ADDRESS = {Beijing, China},
\r
434 PUBLISHER = {Association for Computational Linguistics},
\r