1 //========================================================================
5 // Copyright 1997 Derek B. Noonburg
7 //========================================================================
13 #include "parseargs.h"
24 #include "TextOutputDev.h"
29 static int firstPage = 1;
30 static int lastPage = 0;
31 static GBool useASCII7 = gFalse;
33 static GBool useEUCJP = gFalse;
35 static GBool rawOrder = gFalse;
36 GBool printCommands = gFalse;
37 static GBool printHelp = gFalse;
39 static ArgDesc argDesc[] = {
40 {"-f", argInt, &firstPage, 0,
41 "first page to convert"},
42 {"-l", argInt, &lastPage, 0,
43 "last page to convert"},
44 {"-ascii7", argFlag, &useASCII7, 0,
45 "convert to 7-bit ASCII (default is 8-bit ISO Latin-1)"},
47 {"-eucjp", argFlag, &useEUCJP, 0,
48 "convert Japanese text to EUC-JP"},
50 {"-raw", argFlag, &rawOrder, 0,
51 "keep strings in content stream order"},
52 {"-q", argFlag, &errQuiet, 0,
53 "don't print any messages or errors"},
54 {"-h", argFlag, &printHelp, 0,
55 "print usage information"},
56 {"-help", argFlag, &printHelp, 0,
57 "print usage information"},
61 int main(int argc, char *argv[]) {
64 GString *textFileName;
65 TextOutputDev *textOut;
70 ok = parseArgs(argDesc, &argc, argv);
71 if (!ok || argc < 2 || argc > 3 || printHelp) {
72 fprintf(stderr, "pdftotext version %s\n", xpdfVersion);
73 fprintf(stderr, "%s\n", xpdfCopyright);
74 printUsage("pdftotext", "<PDF-file> [<text-file>]", argDesc);
77 fileName = new GString(argv[1]);
83 initParams(xpdfConfigFile);
87 doc = new PDFDoc(fileName);
92 // check for copy permission
93 if (!doc->okToCopy()) {
94 error(-1, "Copying of text from this document is not allowed.");
98 // construct text file name
100 textFileName = new GString(argv[2]);
102 p = fileName->getCString() + fileName->getLength() - 4;
103 if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF"))
104 textFileName = new GString(fileName->getCString(),
105 fileName->getLength() - 4);
107 textFileName = fileName->copy();
108 textFileName->append(".txt");
114 if (lastPage < 1 || lastPage > doc->getNumPages())
115 lastPage = doc->getNumPages();
119 useASCII7 |= useEUCJP;
121 textOut = new TextOutputDev(textFileName->getCString(), useASCII7, rawOrder);
123 doc->displayPages(textOut, firstPage, lastPage, 72, 0);
133 // check for memory leaks
134 Object::memCheck(stderr);