1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
29 #include "ErrorCodes.h"
32 //------------------------------------------------------------------------
34 #define xrefSearchSize 1024 // read this many bytes at end of file
35 // to look for 'startxref'
38 //------------------------------------------------------------------------
40 //------------------------------------------------------------------------
42 #define permPrint (1<<2)
43 #define permChange (1<<3)
44 #define permCopy (1<<4)
45 #define permNotes (1<<5)
46 #define defPermFlags 0xfffc
49 //------------------------------------------------------------------------
51 //------------------------------------------------------------------------
56 // Create an object stream, using object number <objStrNum>,
58 ObjectStream(XRef *xref, int objStrNumA);
62 // Return the object number of this object stream.
63 int getObjStrNum() { return objStrNum; }
65 // Get the <objIdx>th object from this stream, which should be
66 // object number <objNum>, generation 0.
67 Object *getObject(int objIdx, int objNum, Object *obj);
71 int objStrNum; // object number of the object stream
72 int nObjects; // number of objects in the stream
73 Object *objs; // the objects (length = nObjects)
74 int *objNums; // the object numbers (length = nObjects)
77 ObjectStream::ObjectStream(XRef *xref, int objStrNumA) {
81 Object objStr, obj1, obj2;
84 objStrNum = objStrNumA;
89 if (!xref->fetch(objStrNum, 0, &objStr)->isStream()) {
93 if (!objStr.streamGetDict()->lookup("N", &obj1)->isInt()) {
97 nObjects = obj1.getInt();
103 if (!objStr.streamGetDict()->lookup("First", &obj1)->isInt()) {
107 first = obj1.getInt();
113 if (nObjects*sizeof(int)/sizeof(int) != nObjects) {
114 error(-1, "Invalid 'nObjects'");
118 objs = new Object[nObjects];
119 objNums = (int *)gmalloc(nObjects * sizeof(int));
120 offsets = (int *)gmalloc(nObjects * sizeof(int));
122 // parse the header: object numbers and offsets
123 objStr.streamReset();
125 str = new EmbedStream(objStr.getStream(), &obj1, gTrue, first);
126 parser = new Parser(xref, new Lexer(xref, str));
127 for (i = 0; i < nObjects; ++i) {
128 parser->getObj(&obj1);
129 parser->getObj(&obj2);
130 if (!obj1.isInt() || !obj2.isInt()) {
137 objNums[i] = obj1.getInt();
138 offsets[i] = obj2.getInt();
141 if (objNums[i] < 0 || offsets[i] < 0 ||
142 (i > 0 && offsets[i] < offsets[i-1])) {
148 while (str->getChar() != EOF) ;
151 // skip to the first object - this shouldn't be necessary because
152 // the First key is supposed to be equal to offsets[0], but just in
154 for (i = first; i < offsets[0]; ++i) {
155 objStr.getStream()->getChar();
159 for (i = 0; i < nObjects; ++i) {
161 if (i == nObjects - 1) {
162 str = new EmbedStream(objStr.getStream(), &obj1, gFalse, 0);
164 str = new EmbedStream(objStr.getStream(), &obj1, gTrue,
165 offsets[i+1] - offsets[i]);
167 parser = new Parser(xref, new Lexer(xref, str));
168 parser->getObj(&objs[i]);
169 while (str->getChar() != EOF) ;
180 ObjectStream::~ObjectStream() {
184 for (i = 0; i < nObjects; ++i) {
192 Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) {
193 if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) {
194 return obj->initNull();
196 return objs[objIdx].copy(obj);
199 //------------------------------------------------------------------------
201 //------------------------------------------------------------------------
203 XRef::XRef(BaseStream *strA, GString *ownerPassword, GString *userPassword) {
217 start = str->getStart();
218 pos = getStartXref();
220 // if there was a problem with the 'startxref' position, try to
221 // reconstruct the xref table
223 if (!(ok = constructXRef())) {
224 errCode = errDamaged;
228 // read the xref table
230 while (readXRef(&pos)) ;
232 // if there was a problem with the xref table,
233 // try to reconstruct it
235 if (!(ok = constructXRef())) {
236 errCode = errDamaged;
242 // get the root dictionary (catalog) object
243 trailerDict.dictLookupNF("Root", &obj);
245 rootNum = obj.getRefNum();
246 rootGen = obj.getRefGen();
250 if (!(ok = constructXRef())) {
251 errCode = errDamaged;
256 // now set the trailer dictionary's xref pointer so we can fetch
257 // indirect objects from it
258 trailerDict.getDict()->setXRef(this);
260 // check for encryption
261 #ifndef NO_DECRYPTION
264 if (checkEncrypted(ownerPassword, userPassword)) {
266 errCode = errEncrypted;
282 // Read the 'startxref' position.
283 Guint XRef::getStartXref() {
284 char buf[xrefSearchSize+1];
288 // read last xrefSearchSize bytes
289 str->setPos(xrefSearchSize, -1);
290 for (n = 0; n < xrefSearchSize; ++n) {
291 if ((c = str->getChar()) == EOF) {
299 for (i = n - 9; i >= 0; --i) {
300 if (!strncmp(&buf[i], "startxref", 9)) {
307 for (p = &buf[i+9]; isspace(*p); ++p) ;
308 lastXRefPos = strToUnsigned(p);
313 // Read one xref table section. Also reads the associated trailer
314 // dictionary, and returns the prev pointer (if any).
315 GBool XRef::readXRef(Guint *pos) {
320 // start up a parser, parse one token
322 parser = new Parser(NULL,
324 str->makeSubStream(start + *pos, gFalse, 0, &obj)));
325 parser->getObj(&obj);
327 // parse an old-style xref table
328 if (obj.isCmd("xref")) {
330 more = readXRefTable(parser, pos);
332 // parse an xref stream
333 } else if (obj.isInt()) {
335 if (!parser->getObj(&obj)->isInt()) {
339 if (!parser->getObj(&obj)->isCmd("obj")) {
343 if (!parser->getObj(&obj)->isStream()) {
346 more = readXRefStream(obj.getStream(), pos);
363 GBool XRef::readXRefTable(Parser *parser, Guint *pos) {
368 int first, n, newSize, i;
371 parser->getObj(&obj);
372 if (obj.isCmd("trailer")) {
379 first = obj.getInt();
381 if (!parser->getObj(&obj)->isInt()) {
386 if (first < 0 || n < 0 || first + n < 0) {
389 if (first + n > size) {
390 for (newSize = size ? 2 * size : 1024;
391 first + n > newSize && newSize > 0;
396 if (newSize*sizeof(XRefEntry)/sizeof(XRefEntry) != newSize) {
397 error(-1, "Invalid 'obj' parameters'");
401 entries = (XRefEntry *)grealloc(entries, newSize * sizeof(XRefEntry));
402 for (i = size; i < newSize; ++i) {
403 entries[i].offset = 0xffffffff;
404 entries[i].type = xrefEntryFree;
408 for (i = first; i < first + n; ++i) {
409 if (!parser->getObj(&obj)->isInt()) {
412 entry.offset = (Guint)obj.getInt();
414 if (!parser->getObj(&obj)->isInt()) {
417 entry.gen = obj.getInt();
419 parser->getObj(&obj);
420 if (obj.isCmd("n")) {
421 entry.type = xrefEntryUncompressed;
422 } else if (obj.isCmd("f")) {
423 entry.type = xrefEntryFree;
428 if (entries[i].offset == 0xffffffff) {
430 // PDF files of patents from the IBM Intellectual Property
431 // Network have a bug: the xref table claims to start at 1
433 if (i == 1 && first == 1 &&
434 entries[1].offset == 0 && entries[1].gen == 65535 &&
435 entries[1].type == xrefEntryFree) {
437 entries[0] = entries[1];
438 entries[1].offset = 0xffffffff;
444 // read the trailer dictionary
445 if (!parser->getObj(&obj)->isDict()) {
449 // get the 'Prev' pointer
450 obj.getDict()->lookupNF("Prev", &obj2);
452 *pos = (Guint)obj2.getInt();
454 } else if (obj2.isRef()) {
455 // certain buggy PDF generators generate "/Prev NNN 0 R" instead
457 *pos = (Guint)obj2.getRefNum();
464 // save the first trailer dictionary
465 if (trailerDict.isNone()) {
466 obj.copy(&trailerDict);
469 // check for an 'XRefStm' key
470 if (obj.getDict()->lookup("XRefStm", &obj2)->isInt()) {
471 pos2 = (Guint)obj2.getInt();
488 GBool XRef::readXRefStream(Stream *xrefStr, Guint *pos) {
492 Object obj, obj2, idx;
493 int newSize, first, n, i;
495 dict = xrefStr->getDict();
497 if (!dict->lookupNF("Size", &obj)->isInt()) {
500 newSize = obj.getInt();
505 if (newSize > size) {
506 if (newSize * sizeof(XRefEntry)/sizeof(XRefEntry) != newSize) {
507 error(-1, "Invalid 'size' parameter.");
510 entries = (XRefEntry *)grealloc(entries, newSize * sizeof(XRefEntry));
511 for (i = size; i < newSize; ++i) {
512 entries[i].offset = 0xffffffff;
513 entries[i].type = xrefEntryFree;
518 if (!dict->lookupNF("W", &obj)->isArray() ||
519 obj.arrayGetLength() < 3) {
522 for (i = 0; i < 3; ++i) {
523 if (!obj.arrayGet(i, &obj2)->isInt()) {
527 w[i] = obj2.getInt();
529 if (w[i] < 0 || w[i] > 4) {
536 dict->lookupNF("Index", &idx);
538 for (i = 0; i+1 < idx.arrayGetLength(); i += 2) {
539 if (!idx.arrayGet(i, &obj)->isInt()) {
543 first = obj.getInt();
545 if (!idx.arrayGet(i+1, &obj)->isInt()) {
551 if (first < 0 || n < 0 ||
552 !readXRefStreamSection(xrefStr, w, first, n)) {
558 if (!readXRefStreamSection(xrefStr, w, 0, newSize)) {
565 dict->lookupNF("Prev", &obj);
567 *pos = (Guint)obj.getInt();
573 if (trailerDict.isNone()) {
574 trailerDict.initDict(dict);
586 GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) {
588 int type, gen, c, newSize, i, j;
593 if (first + n > size) {
594 for (newSize = size ? 2 * size : 1024;
595 first + n > newSize && newSize > 0;
600 if (newSize*sizeof(XRefEntry)/sizeof(XRefEntry) != newSize) {
601 error(-1, "Invalid 'size' inside xref table.");
604 entries = (XRefEntry *)grealloc(entries, newSize * sizeof(XRefEntry));
605 for (i = size; i < newSize; ++i) {
606 entries[i].offset = 0xffffffff;
607 entries[i].type = xrefEntryFree;
611 for (i = first; i < first + n; ++i) {
615 for (type = 0, j = 0; j < w[0]; ++j) {
616 if ((c = xrefStr->getChar()) == EOF) {
619 type = (type << 8) + c;
622 for (offset = 0, j = 0; j < w[1]; ++j) {
623 if ((c = xrefStr->getChar()) == EOF) {
626 offset = (offset << 8) + c;
628 for (gen = 0, j = 0; j < w[2]; ++j) {
629 if ((c = xrefStr->getChar()) == EOF) {
632 gen = (gen << 8) + c;
634 if (entries[i].offset == 0xffffffff) {
637 entries[i].offset = offset;
638 entries[i].gen = gen;
639 entries[i].type = xrefEntryFree;
642 entries[i].offset = offset;
643 entries[i].gen = gen;
644 entries[i].type = xrefEntryUncompressed;
647 entries[i].offset = offset;
648 entries[i].gen = gen;
649 entries[i].type = xrefEntryCompressed;
660 // Attempt to construct an xref table for a damaged file.
661 GBool XRef::constructXRef() {
663 Object newTrailerDict, obj;
677 error(0, "PDF file is damaged - attempting to reconstruct xref table...");
679 streamEndsLen = streamEndsSize = 0;
684 if (!str->getLine(buf, 256)) {
689 // got trailer dictionary
690 if (!strncmp(p, "trailer", 7)) {
692 parser = new Parser(NULL,
694 str->makeSubStream(start + pos + 7, gFalse, 0, &obj)));
695 parser->getObj(&newTrailerDict);
696 if (newTrailerDict.isDict()) {
697 newTrailerDict.dictLookupNF("Root", &obj);
699 rootNum = obj.getRefNum();
700 rootGen = obj.getRefGen();
701 if (!trailerDict.isNone()) {
704 newTrailerDict.copy(&trailerDict);
709 newTrailerDict.free();
713 } else if (isdigit(*p)) {
718 } while (*p && isdigit(*p));
722 } while (*p && isspace(*p));
727 } while (*p && isdigit(*p));
731 } while (*p && isspace(*p));
732 if (!strncmp(p, "obj", 3)) {
734 newSize = (num + 1 + 255) & ~255;
736 error(-1, "Bad object number");
739 if (newSize*sizeof(XRefEntry)/sizeof(XRefEntry) != newSize) {
740 error(-1, "Invalid 'obj' parameters.");
743 entries = (XRefEntry *)
744 grealloc(entries, newSize * sizeof(XRefEntry));
745 for (i = size; i < newSize; ++i) {
746 entries[i].offset = 0xffffffff;
747 entries[i].type = xrefEntryFree;
751 if (entries[num].type == xrefEntryFree ||
752 gen >= entries[num].gen) {
753 entries[num].offset = pos - start;
754 entries[num].gen = gen;
755 entries[num].type = xrefEntryUncompressed;
763 } else if (!strncmp(p, "endstream", 9)) {
764 if (streamEndsLen == streamEndsSize) {
765 streamEndsSize += 64;
766 if (streamEndsSize*sizeof(int)/sizeof(int) != streamEndsSize) {
767 error(-1, "Invalid 'endstream' parameter.");
770 streamEnds = (Guint *)grealloc(streamEnds,
771 streamEndsSize * sizeof(int));
773 streamEnds[streamEndsLen++] = pos;
780 error(-1, "Couldn't find trailer dictionary");
784 #ifndef NO_DECRYPTION
785 GBool XRef::checkEncrypted(GString *ownerPassword, GString *userPassword) {
786 Object encrypt, filterObj, versionObj, revisionObj, lengthObj;
787 Object ownerKey, userKey, permissions, fileID, fileID1;
792 encVersion = encRevision = 0;
795 permFlags = defPermFlags;
796 ownerPasswordOk = gFalse;
797 trailerDict.dictLookup("Encrypt", &encrypt);
798 if ((encrypted1 = encrypt.isDict())) {
800 encrypt.dictLookup("Filter", &filterObj);
801 if (filterObj.isName("Standard")) {
802 encrypt.dictLookup("V", &versionObj);
803 encrypt.dictLookup("R", &revisionObj);
804 encrypt.dictLookup("Length", &lengthObj);
805 encrypt.dictLookup("O", &ownerKey);
806 encrypt.dictLookup("U", &userKey);
807 encrypt.dictLookup("P", &permissions);
808 trailerDict.dictLookup("ID", &fileID);
809 if (versionObj.isInt() &&
810 revisionObj.isInt() &&
811 ownerKey.isString() && ownerKey.getString()->getLength() == 32 &&
812 userKey.isString() && userKey.getString()->getLength() == 32 &&
813 permissions.isInt() &&
815 encVersion = versionObj.getInt();
816 encRevision = revisionObj.getInt();
817 if (lengthObj.isInt()) {
818 keyLength = lengthObj.getInt() / 8;
822 permFlags = permissions.getInt();
823 if (encVersion >= 1 && encVersion <= 2 &&
824 encRevision >= 2 && encRevision <= 3) {
825 fileID.arrayGet(0, &fileID1);
826 if (fileID1.isString()) {
827 if (Decrypt::makeFileKey(encVersion, encRevision, keyLength,
828 ownerKey.getString(), userKey.getString(),
829 permFlags, fileID1.getString(),
830 ownerPassword, userPassword, fileKey,
832 if (ownerPassword && !ownerPasswordOk) {
833 error(-1, "Incorrect owner password");
837 error(-1, "Incorrect password");
840 error(-1, "Weird encryption info");
844 error(-1, "Unsupported version/revision (%d/%d) of Standard security handler",
845 encVersion, encRevision);
848 error(-1, "Weird encryption info");
858 error(-1, "Unknown security handler '%s'",
859 filterObj.isName() ? filterObj.getName() : "???");
865 // this flag has to be set *after* we read the O/U/P strings
866 encrypted = encrypted1;
871 GBool XRef::checkEncrypted(GString *ownerPassword, GString *userPassword) {
875 trailerDict.dictLookup("Encrypt", &obj);
876 if ((encrypted = !obj.isNull())) {
877 error(-1, "PDF file is encrypted and this version of the Xpdf tools");
878 error(-1, "was built without decryption support.");
885 GBool XRef::okToPrint(GBool ignoreOwnerPW) {
886 #ifndef NO_DECRYPTION
887 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint);
893 GBool XRef::okToChange(GBool ignoreOwnerPW) {
894 #ifndef NO_DECRYPTION
895 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange);
901 GBool XRef::okToCopy(GBool ignoreOwnerPW) {
902 #ifndef NO_DECRYPTION
903 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy);
909 GBool XRef::okToAddNotes(GBool ignoreOwnerPW) {
910 #ifndef NO_DECRYPTION
911 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes);
917 Object *XRef::fetch(int num, int gen, Object *obj) {
920 Object obj1, obj2, obj3;
922 // check for bogus ref - this can happen in corrupted PDF files
923 if (num < 0 || num >= size) {
930 case xrefEntryUncompressed:
935 parser = new Parser(this,
937 str->makeSubStream(start + e->offset, gFalse, 0, &obj1)));
938 parser->getObj(&obj1);
939 parser->getObj(&obj2);
940 parser->getObj(&obj3);
941 if (!obj1.isInt() || obj1.getInt() != num ||
942 !obj2.isInt() || obj2.getInt() != gen ||
943 !obj3.isCmd("obj")) {
946 #ifndef NO_DECRYPTION
947 parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL, keyLength,
958 case xrefEntryCompressed:
962 if (!objStr || objStr->getObjStrNum() != (int)e->offset) {
966 objStr = new ObjectStream(this, e->offset);
968 objStr->getObject(e->gen, num, obj);
978 return obj->initNull();
981 Object *XRef::getDocInfo(Object *obj) {
982 return trailerDict.dictLookup("Info", obj);
985 // Added for the pdftex project.
986 Object *XRef::getDocInfoNF(Object *obj) {
987 return trailerDict.dictLookupNF("Info", obj);
990 GBool XRef::getStreamEnd(Guint streamStart, Guint *streamEnd) {
993 if (streamEndsLen == 0 ||
994 streamStart > streamEnds[streamEndsLen - 1]) {
999 b = streamEndsLen - 1;
1000 // invariant: streamEnds[a] < streamStart <= streamEnds[b]
1003 if (streamStart <= streamEnds[m]) {
1009 *streamEnd = streamEnds[b];
1013 Guint XRef::strToUnsigned(char *s) {
1019 for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
1020 x = 10 * x + (*p - '0');