//
// Parser.cc
//
-// Copyright 1996 Derek B. Noonburg
+// Copyright 1996-2003 Glyph & Cog, LLC
//
//========================================================================
-#ifdef __GNUC__
+#include <aconf.h>
+
+#ifdef USE_GCC_PRAGMAS
#pragma implementation
#endif
#include "Array.h"
#include "Dict.h"
#include "Parser.h"
+#include "XRef.h"
#include "Error.h"
+#ifndef NO_DECRYPTION
+#include "Decrypt.h"
+#endif
-Parser::Parser(Lexer *lexer1) {
- lexer = lexer1;
+Parser::Parser(XRef *xrefA, Lexer *lexerA) {
+ xref = xrefA;
+ lexer = lexerA;
inlineImg = 0;
lexer->getObj(&buf1);
lexer->getObj(&buf2);
delete lexer;
}
+#ifndef NO_DECRYPTION
+Object *Parser::getObj(Object *obj,
+ Guchar *fileKey, int keyLength,
+ int objNum, int objGen) {
+#else
Object *Parser::getObj(Object *obj) {
+#endif
char *key;
Stream *str;
Object obj2;
int num;
+#ifndef NO_DECRYPTION
+ Decrypt *decrypt;
+ GString *s;
+ char *p;
+ int i;
+#endif
// refill buffer after inline image data
if (inlineImg == 2) {
// array
if (buf1.isCmd("[")) {
shift();
- obj->initArray();
+ obj->initArray(xref);
while (!buf1.isCmd("]") && !buf1.isEOF())
+#ifndef NO_DECRYPTION
+ obj->arrayAdd(getObj(&obj2, fileKey, keyLength, objNum, objGen));
+#else
obj->arrayAdd(getObj(&obj2));
+#endif
if (buf1.isEOF())
error(getPos(), "End of file inside array");
shift();
// dictionary or stream
} else if (buf1.isCmd("<<")) {
shift();
- obj->initDict();
+ obj->initDict(xref);
while (!buf1.isCmd(">>") && !buf1.isEOF()) {
if (!buf1.isName()) {
error(getPos(), "Dictionary key must be a name object");
} else {
key = copyString(buf1.getName());
shift();
- if (buf1.isEOF() || buf1.isError())
+ if (buf1.isEOF() || buf1.isError()) {
+ gfree(key);
break;
+ }
+#ifndef NO_DECRYPTION
+ obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen));
+#else
obj->dictAdd(key, getObj(&obj2));
+#endif
}
}
if (buf1.isEOF())
if (buf2.isCmd("stream")) {
if ((str = makeStream(obj))) {
obj->initStream(str);
+#ifndef NO_DECRYPTION
+ if (fileKey) {
+ str->getBaseStream()->doDecryption(fileKey, keyLength,
+ objNum, objGen);
+ }
+#endif
} else {
obj->free();
obj->initError();
obj->initInt(num);
}
+#ifndef NO_DECRYPTION
+ // string
+ } else if (buf1.isString() && fileKey) {
+ buf1.copy(obj);
+ s = obj->getString();
+ decrypt = new Decrypt(fileKey, keyLength, objNum, objGen);
+ for (i = 0, p = obj->getString()->getCString();
+ i < s->getLength();
+ ++i, ++p) {
+ *p = decrypt->decryptByte(*p);
+ }
+ delete decrypt;
+ shift();
+#endif
+
// simple object
} else {
buf1.copy(obj);
Stream *Parser::makeStream(Object *dict) {
Object obj;
Stream *str;
- int pos, length;
+ Guint pos, endPos, length;
// get stream start position
lexer->skipToNextLine();
// get length
dict->dictLookup("Length", &obj);
if (obj.isInt()) {
- length = obj.getInt();
+ length = (Guint)obj.getInt();
obj.free();
} else {
error(getPos(), "Bad 'Length' attribute in stream");
return NULL;
}
+ // check for length in damaged file
+ if (xref && xref->getStreamEnd(pos, &endPos)) {
+ length = endPos - pos;
+ }
+
+ // in badly damaged PDF files, we can run off the end of the input
+ // stream immediately after the "stream" token
+ if (!lexer->getStream()) {
+ return NULL;
+ }
+
// make base stream
- str = lexer->getStream()->getBaseStream()->makeSubStream(pos, length, dict);
+ str = lexer->getStream()->getBaseStream()->makeSubStream(pos, gTrue,
+ length, dict);
// get filters
str = str->addFilters(dict);
// refill token buffers and check for 'endstream'
shift(); // kill '>>'
shift(); // kill 'stream'
- if (buf1.isCmd("endstream"))
+ if (buf1.isCmd("endstream")) {
shift();
- else
+ } else {
error(getPos(), "Missing 'endstream'");
+ str->ignoreLength();
+ }
return str;
}
void Parser::shift() {
if (inlineImg > 0) {
- ++inlineImg;
+ if (inlineImg < 2) {
+ ++inlineImg;
+ } else {
+ // in a damaged content stream, if 'ID' shows up in the middle
+ // of a dictionary, we need to reset
+ inlineImg = 0;
+ }
} else if (buf2.isCmd("ID")) {
lexer->skipChar(); // skip char after 'ID' command
inlineImg = 1;