//
// pdftotext.cc
//
-// Copyright 1997-2002 Glyph & Cog, LLC
+// Copyright 1997-2003 Glyph & Cog, LLC
//
//========================================================================
static int firstPage = 1;
static int lastPage = 0;
+static GBool physLayout = gFalse;
static GBool rawOrder = gFalse;
static GBool htmlMeta = gFalse;
static char textEncName[128] = "";
static char textEOL[16] = "";
-static char ownerPassword[33] = "";
-static char userPassword[33] = "";
+static GBool noPageBreaks = gFalse;
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
static GBool quiet = gFalse;
static char cfgFileName[256] = "";
static GBool printVersion = gFalse;
static GBool printHelp = gFalse;
static ArgDesc argDesc[] = {
- {"-f", argInt, &firstPage, 0,
+ {"-f", argInt, &firstPage, 0,
"first page to convert"},
- {"-l", argInt, &lastPage, 0,
+ {"-l", argInt, &lastPage, 0,
"last page to convert"},
- {"-raw", argFlag, &rawOrder, 0,
+ {"-layout", argFlag, &physLayout, 0,
+ "maintain original physical layout"},
+ {"-raw", argFlag, &rawOrder, 0,
"keep strings in content stream order"},
- {"-htmlmeta", argFlag, &htmlMeta, 0,
+ {"-htmlmeta", argFlag, &htmlMeta, 0,
"generate a simple HTML file, including the meta information"},
- {"-enc", argString, textEncName, sizeof(textEncName),
+ {"-enc", argString, textEncName, sizeof(textEncName),
"output text encoding name"},
- {"-eol", argString, textEOL, sizeof(textEOL),
+ {"-eol", argString, textEOL, sizeof(textEOL),
"output end-of-line convention (unix, dos, or mac)"},
- {"-opw", argString, ownerPassword, sizeof(ownerPassword),
+ {"-nopgbrk", argFlag, &noPageBreaks, 0,
+ "don't insert page breaks between pages"},
+ {"-opw", argString, ownerPassword, sizeof(ownerPassword),
"owner password (for encrypted files)"},
- {"-upw", argString, userPassword, sizeof(userPassword),
+ {"-upw", argString, userPassword, sizeof(userPassword),
"user password (for encrypted files)"},
- {"-q", argFlag, &quiet, 0,
+ {"-q", argFlag, &quiet, 0,
"don't print any messages or errors"},
- {"-cfg", argString, cfgFileName, sizeof(cfgFileName),
+ {"-cfg", argString, cfgFileName, sizeof(cfgFileName),
"configuration file to use in place of .xpdfrc"},
- {"-v", argFlag, &printVersion, 0,
+ {"-v", argFlag, &printVersion, 0,
"print copyright and version info"},
- {"-h", argFlag, &printHelp, 0,
+ {"-h", argFlag, &printHelp, 0,
"print usage information"},
- {"-help", argFlag, &printHelp, 0,
+ {"-help", argFlag, &printHelp, 0,
"print usage information"},
- {"--help", argFlag, &printHelp, 0,
+ {"--help", argFlag, &printHelp, 0,
"print usage information"},
- {"-?", argFlag, &printHelp, 0,
+ {"-?", argFlag, &printHelp, 0,
"print usage information"},
{NULL}
};
Object info;
GBool ok;
char *p;
+ int exitCode;
+
+ exitCode = 99;
// parse args
ok = parseArgs(argDesc, &argc, argv);
if (!printVersion) {
printUsage("pdftotext", "<PDF-file> [<text-file>]", argDesc);
}
- exit(1);
+ goto err0;
}
fileName = new GString(argv[1]);
fprintf(stderr, "Bad '-eol' value on command line\n");
}
}
+ if (noPageBreaks) {
+ globalParams->setTextPageBreaks(gFalse);
+ }
if (quiet) {
globalParams->setErrQuiet(quiet);
}
}
// open PDF file
- if (ownerPassword[0]) {
+ if (ownerPassword[0] != '\001') {
ownerPW = new GString(ownerPassword);
} else {
ownerPW = NULL;
}
- if (userPassword[0]) {
+ if (userPassword[0] != '\001') {
userPW = new GString(userPassword);
} else {
userPW = NULL;
delete ownerPW;
}
if (!doc->isOk()) {
+ exitCode = 1;
goto err2;
}
// check for copy permission
if (!doc->okToCopy()) {
error(-1, "Copying of text from this document is not allowed.");
+ exitCode = 3;
goto err2;
}
} else {
if (!(f = fopen(textFileName->getCString(), "wb"))) {
error(-1, "Couldn't open text file '%s'", textFileName->getCString());
+ exitCode = 2;
goto err3;
}
}
}
// write text file
- textOut = new TextOutputDev(textFileName->getCString(), rawOrder, htmlMeta);
+ textOut = new TextOutputDev(textFileName->getCString(),
+ physLayout, rawOrder, htmlMeta);
if (textOut->isOk()) {
- doc->displayPages(textOut, firstPage, lastPage, 72, 0, gFalse);
+ doc->displayPages(textOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse);
+ } else {
+ delete textOut;
+ exitCode = 2;
+ goto err3;
}
delete textOut;
} else {
if (!(f = fopen(textFileName->getCString(), "ab"))) {
error(-1, "Couldn't open text file '%s'", textFileName->getCString());
+ exitCode = 2;
goto err3;
}
}
}
}
+ exitCode = 0;
+
// clean up
err3:
delete textFileName;
uMap->decRefCnt();
err1:
delete globalParams;
+ err0:
// check for memory leaks
Object::memCheck(stderr);
gMemReport(stderr);
- return 0;
+ return exitCode;
}
static void printInfoString(FILE *f, Dict *infoDict, char *key,