1 //========================================================================
5 // Copyright 1997 Derek B. Noonburg
7 //========================================================================
13 #include "parseargs.h"
24 #include "TextOutputDev.h"
29 static int firstPage = 1;
30 static int lastPage = 0;
31 static GBool useASCII7 = gFalse;
32 static GBool useLatin2 = gFalse;
33 static GBool useLatin5 = gFalse;
35 static GBool useEUCJP = gFalse;
37 static GBool rawOrder = gFalse;
38 static char userPassword[33] = "";
39 static GBool printVersion = gFalse;
40 static GBool printHelp = gFalse;
42 static ArgDesc argDesc[] = {
43 {"-f", argInt, &firstPage, 0,
44 "first page to convert"},
45 {"-l", argInt, &lastPage, 0,
46 "last page to convert"},
47 {"-ascii7", argFlag, &useASCII7, 0,
48 "convert to 7-bit ASCII (default is 8-bit ISO Latin-1)"},
49 {"-latin2", argFlag, &useLatin2, 0,
50 "convert to ISO Latin-2 character set"},
51 {"-latin5", argFlag, &useLatin5, 0,
52 "convert to ISO Latin-5 character set"},
54 {"-eucjp", argFlag, &useEUCJP, 0,
55 "convert Japanese text to EUC-JP"},
57 {"-raw", argFlag, &rawOrder, 0,
58 "keep strings in content stream order"},
59 {"-upw", argString, userPassword, sizeof(userPassword),
60 "user password (for encrypted files)"},
61 {"-q", argFlag, &errQuiet, 0,
62 "don't print any messages or errors"},
63 {"-v", argFlag, &printVersion, 0,
64 "print copyright and version info"},
65 {"-h", argFlag, &printHelp, 0,
66 "print usage information"},
67 {"-help", argFlag, &printHelp, 0,
68 "print usage information"},
72 int main(int argc, char *argv[]) {
75 GString *textFileName;
77 TextOutputDev *textOut;
78 TextOutputCharSet charSet;
83 ok = parseArgs(argDesc, &argc, argv);
84 if (!ok || argc < 2 || argc > 3 || printVersion || printHelp) {
85 fprintf(stderr, "pdftotext version %s\n", xpdfVersion);
86 fprintf(stderr, "%s\n", xpdfCopyright);
88 printUsage("pdftotext", "<PDF-file> [<text-file>]", argDesc);
92 fileName = new GString(argv[1]);
98 initParams(xpdfConfigFile);
102 if (userPassword[0]) {
103 userPW = new GString(userPassword);
107 doc = new PDFDoc(fileName, userPW);
115 // check for copy permission
116 if (!doc->okToCopy()) {
117 error(-1, "Copying of text from this document is not allowed.");
121 // construct text file name
123 textFileName = new GString(argv[2]);
125 p = fileName->getCString() + fileName->getLength() - 4;
126 if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) {
127 textFileName = new GString(fileName->getCString(),
128 fileName->getLength() - 4);
130 textFileName = fileName->copy();
132 textFileName->append(".txt");
139 if (lastPage < 1 || lastPage > doc->getNumPages()) {
140 lastPage = doc->getNumPages();
145 useASCII7 |= useEUCJP;
147 charSet = textOutLatin1;
149 charSet = textOutASCII7;
150 } else if (useLatin2) {
151 charSet = textOutLatin2;
152 } else if (useLatin5) {
153 charSet = textOutLatin5;
155 textOut = new TextOutputDev(textFileName->getCString(), charSet, rawOrder);
156 if (textOut->isOk()) {
157 doc->displayPages(textOut, firstPage, lastPage, 72, 0, gFalse);
167 // check for memory leaks
168 Object::memCheck(stderr);