pdf/xpdf/XRef.cc

   1 //========================================================================
   2 //
   3 // XRef.cc
   4 //
   5 // Copyright 1996 Derek B. Noonburg
   6 //
   7 //========================================================================
   8
   9 #ifdef __GNUC__
  10 #pragma implementation
  11 #endif
  12
  13 #include <stdlib.h>
  14 #include <stddef.h>
  15 #include <string.h>
  16 #include <ctype.h>
  17 #include "gmem.h"
  18 #include "Object.h"
  19 #include "Stream.h"
  20 #include "Lexer.h"
  21 #include "Parser.h"
  22 #include "Dict.h"
  23 #include "Error.h"
  24 #include "XRef.h"
  25
  26 //------------------------------------------------------------------------
  27
  28 #define xrefSearchSize 1024     // read this many bytes at end of file
  29                                 //   to look for 'startxref'
  30
  31 //------------------------------------------------------------------------
  32 // The global xref table
  33 //------------------------------------------------------------------------
  34
  35 XRef *xref = NULL;
  36
  37 //------------------------------------------------------------------------
  38 // XRef
  39 //------------------------------------------------------------------------
  40
  41 XRef::XRef(FileStream *str) {
  42   XRef *oldXref;
  43   int pos;
  44   int i;
  45
  46   ok = gTrue;
  47   size = 0;
  48   entries = NULL;
  49
  50   // get rid of old xref (otherwise it will try to fetch the Root object
  51   // in the new document, using the old xref)
  52   oldXref = xref;
  53   xref = NULL;
  54
  55   // read the trailer
  56   file = str->getFile();
  57   start = str->getStart();
  58   pos = readTrailer(str);
  59
  60   // if there was a problem with the trailer,
  61   // try to reconstruct the xref table
  62   if (pos == 0) {
  63     if (!(ok = constructXRef(str))) {
  64       xref = oldXref;
  65       return;
  66     }
  67
  68   // trailer is ok - read the xref table
  69   } else {
  70     entries = (XRefEntry *)gmalloc(size * sizeof(XRefEntry));
  71     for (i = 0; i < size; ++i) {
  72       entries[i].offset = -1;
  73       entries[i].used = gFalse;
  74     }
  75     while (readXRef(str, &pos)) ;
  76
  77     // if there was a problem with the xref table,
  78     // try to reconstruct it
  79     if (!ok) {
  80       gfree(entries);
  81       size = 0;
  82       entries = NULL;
  83       if (!(ok = constructXRef(str))) {
  84         xref = oldXref;
  85         return;
  86       }
  87     }
  88   }
  89
  90   // set up new xref table
  91   xref = this;
  92
  93   // check for encryption
  94   if (checkEncrypted()) {
  95     ok = gFalse;
  96     xref = oldXref;
  97     return;
  98   }
  99 }
 100
 101 XRef::~XRef() {
 102   gfree(entries);
 103   trailerDict.free();
 104 }
 105
 106 // Read startxref position, xref table size, and root.  Returns
 107 // first xref position.
 108 int XRef::readTrailer(FileStream *str) {
 109   Parser *parser;
 110   Object obj;
 111   char buf[xrefSearchSize+1];
 112   int n, pos, pos1;
 113   char *p;
 114   int c;
 115   int i;
 116
 117   // read last xrefSearchSize bytes
 118   str->setPos(-xrefSearchSize);
 119   for (n = 0; n < xrefSearchSize; ++n) {
 120     if ((c = str->getChar()) == EOF)
 121       break;
 122     buf[n] = c;
 123   }
 124   buf[n] = '\0';
 125
 126   // find startxref
 127   for (i = n - 9; i >= 0; --i) {
 128     if (!strncmp(&buf[i], "startxref", 9))
 129       break;
 130   }
 131   if (i < 0)
 132     return 0;
 133   for (p = &buf[i+9]; isspace(*p); ++p) ;
 134   pos = atoi(p);
 135
 136   // find trailer dict by looking after first xref table
 137   // (NB: we can't just use the trailer dict at the end of the file --
 138   // this won't work for linearized files.)
 139   str->setPos(start + pos);
 140   for (i = 0; i < 4; ++i)
 141     buf[i] = str->getChar();
 142   if (strncmp(buf, "xref", 4))
 143     return 0;
 144   pos1 = pos + 4;
 145   while (1) {
 146     str->setPos(start + pos1);
 147     for (i = 0; i < 35; ++i) {
 148       if ((c = str->getChar()) == EOF)
 149         return 0;
 150       buf[i] = c;
 151     }
 152     if (!strncmp(buf, "trailer", 7))
 153       break;
 154     p = buf;
 155     while (isspace(*p)) ++p;
 156     while ('0' <= *p && *p <= '9') ++p;
 157     while (isspace(*p)) ++p;
 158     n = atoi(p);
 159     while ('0' <= *p && *p <= '9') ++p;
 160     while (isspace(*p)) ++p;
 161     if (p == buf)
 162       return 0;
 163     pos1 += (p - buf) + n * 20;
 164   }
 165   pos1 += 7;
 166
 167   // read trailer dict
 168   obj.initNull();
 169   parser = new Parser(new Lexer(new FileStream(file, start + pos1, -1, &obj)));
 170   parser->getObj(&trailerDict);
 171   if (trailerDict.isDict()) {
 172     trailerDict.dictLookupNF("Size", &obj);
 173     if (obj.isInt())
 174       size = obj.getInt();
 175     else
 176       pos = 0;
 177     obj.free();
 178     trailerDict.dictLookupNF("Root", &obj);
 179     if (obj.isRef()) {
 180       rootNum = obj.getRefNum();
 181       rootGen = obj.getRefGen();
 182     } else {
 183       pos = 0;
 184     }
 185     obj.free();
 186   } else {
 187     pos = 0;
 188   }
 189   delete parser;
 190
 191   // return first xref position
 192   return pos;
 193 }
 194
 195 // Read an xref table and the prev pointer from the trailer.
 196 GBool XRef::readXRef(FileStream *str, int *pos) {
 197   Parser *parser;
 198   Object obj, obj2;
 199   char s[20];
 200   GBool more;
 201   int first, n, i, j;
 202   int c;
 203
 204   // seek to xref in stream
 205   str->setPos(start + *pos);
 206
 207   // make sure it's an xref table
 208   while ((c = str->getChar()) != EOF && isspace(c)) ;
 209   s[0] = (char)c;
 210   s[1] = (char)str->getChar();
 211   s[2] = (char)str->getChar();
 212   s[3] = (char)str->getChar();
 213   if (!(s[0] == 'x' && s[1] == 'r' && s[2] == 'e' && s[3] == 'f'))
 214     goto err2;
 215
 216   // read xref
 217   while (1) {
 218     while ((c = str->lookChar()) != EOF && isspace(c))
 219       str->getChar();
 220     if (c == 't')
 221       break;
 222     for (i = 0; (c = str->getChar()) != EOF && isdigit(c) && i < 20; ++i)
 223       s[i] = (char)c;
 224     if (i == 0)
 225       goto err2;
 226     s[i] = '\0';
 227     first = atoi(s);
 228     while ((c = str->lookChar()) != EOF && isspace(c))
 229       str->getChar();
 230     for (i = 0; (c = str->getChar()) != EOF && isdigit(c) && i < 20; ++i)
 231       s[i] = (char)c;
 232     if (i == 0)
 233       goto err2;
 234     s[i] = '\0';
 235     n = atoi(s);
 236     while ((c = str->lookChar()) != EOF && isspace(c))
 237       str->getChar();
 238     for (i = first; i < first + n; ++i) {
 239       for (j = 0; j < 20; ++j) {
 240         if ((c = str->getChar()) == EOF)
 241           goto err2;
 242         s[j] = (char)c;
 243       }
 244       if (entries[i].offset < 0) {
 245         s[10] = '\0';
 246         entries[i].offset = atoi(s);
 247         s[16] = '\0';
 248         entries[i].gen = atoi(&s[11]);
 249         if (s[17] == 'n')
 250           entries[i].used = gTrue;
 251         else if (s[17] == 'f')
 252           entries[i].used = gFalse;
 253         else
 254           goto err2;
 255       }
 256     }
 257   }
 258
 259   // read prev pointer from trailer dictionary
 260   obj.initNull();
 261   parser = new Parser(new Lexer(
 262     new FileStream(file, str->getPos(), -1, &obj)));
 263   parser->getObj(&obj);
 264   if (!obj.isCmd("trailer"))
 265     goto err1;
 266   obj.free();
 267   parser->getObj(&obj);
 268   if (!obj.isDict())
 269     goto err1;
 270   obj.getDict()->lookupNF("Prev", &obj2);
 271   if (obj2.isInt()) {
 272     *pos = obj2.getInt();
 273     more = gTrue;
 274   } else {
 275     more = gFalse;
 276   }
 277   obj.free();
 278   obj2.free();
 279
 280   delete parser;
 281   return more;
 282
 283  err1:
 284   obj.free();
 285  err2:
 286   ok = gFalse;
 287   return gFalse;
 288 }
 289
 290 // Attempt to construct an xref table for a damaged file.
 291 GBool XRef::constructXRef(FileStream *str) {
 292   Parser *parser;
 293   Object obj;
 294   char buf[256];
 295   int pos;
 296   int num, gen;
 297   int newSize;
 298   char *p;
 299   int i;
 300   GBool gotRoot;
 301
 302   error(0, "PDF file is damaged - attempting to reconstruct xref table...");
 303   gotRoot = gFalse;
 304
 305   str->reset();
 306   while (1) {
 307     pos = str->getPos();
 308     if (!str->getLine(buf, 256))
 309       break;
 310     p = buf;
 311
 312     // got trailer dictionary
 313     if (!strncmp(p, "trailer", 7)) {
 314       obj.initNull();
 315       parser = new Parser(new Lexer(
 316                  new FileStream(file, start + pos + 8, -1, &obj)));
 317       if (!trailerDict.isNone())
 318         trailerDict.free();
 319       parser->getObj(&trailerDict);
 320       if (trailerDict.isDict()) {
 321         trailerDict.dictLookupNF("Root", &obj);
 322         if (obj.isRef()) {
 323           rootNum = obj.getRefNum();
 324           rootGen = obj.getRefGen();
 325           gotRoot = gTrue;
 326         }
 327         obj.free();
 328       } else {
 329         pos = 0;
 330       }
 331       delete parser;
 332
 333     // look for object
 334     } else if (isdigit(*p)) {
 335       num = atoi(p);
 336       do {
 337         ++p;
 338       } while (*p && isdigit(*p));
 339       if (isspace(*p)) {
 340         do {
 341           ++p;
 342         } while (*p && isspace(*p));
 343         if (isdigit(*p)) {
 344           gen = atoi(p);
 345           do {
 346             ++p;
 347           } while (*p && isdigit(*p));
 348           if (isspace(*p)) {
 349             do {
 350               ++p;
 351             } while (*p && isspace(*p));
 352             if (!strncmp(p, "obj", 3)) {
 353               if (num >= size) {
 354                 newSize = (num + 1 + 255) & ~255;
 355                 entries = (XRefEntry *)
 356                             grealloc(entries, newSize * sizeof(XRefEntry));
 357                 for (i = size; i < newSize; ++i) {
 358                   entries[i].offset = -1;
 359                   entries[i].used = gFalse;
 360                 }
 361                 size = newSize;
 362               }
 363               if (!entries[num].used || gen >= entries[num].gen) {
 364                 entries[num].offset = pos - start;
 365                 entries[num].gen = gen;
 366                 entries[num].used = gTrue;
 367               }
 368             }
 369           }
 370         }
 371       }
 372     }
 373   }
 374
 375   if (gotRoot)
 376     return gTrue;
 377
 378   error(-1, "Couldn't find trailer dictionary");
 379   return gFalse;
 380 }
 381
 382 GBool XRef::checkEncrypted() {
 383   Object obj;
 384   GBool encrypted;
 385
 386   trailerDict.dictLookup("Encrypt", &obj);
 387   if ((encrypted = !obj.isNull())) {
 388     error(-1, "PDF file is encrypted and cannot be displayed");
 389     error(-1, "* Decryption support is currently not included in xpdf");
 390     error(-1, "* due to legal restrictions: the U.S.A. still has bogus");
 391     error(-1, "* export controls on cryptography software.");
 392   }
 393   obj.free();
 394   return encrypted;
 395 }
 396
 397 GBool XRef::okToPrint() {
 398   return gTrue;
 399 }
 400
 401 GBool XRef::okToCopy() {
 402   return gTrue;
 403 }
 404
 405 Object *XRef::fetch(int num, int gen, Object *obj) {
 406   XRefEntry *e;
 407   Parser *parser;
 408   Object obj1, obj2, obj3;
 409
 410   // check for bogus ref - this can happen in corrupted PDF files
 411   if (num < 0 || num >= size) {
 412     obj->initNull();
 413     return obj;
 414   }
 415
 416   e = &entries[num];
 417   if (e->gen == gen && e->offset >= 0) {
 418     obj1.initNull();
 419     parser = new Parser(new Lexer(
 420       new FileStream(file, start + e->offset, -1, &obj1)));
 421     parser->getObj(&obj1);
 422     parser->getObj(&obj2);
 423     parser->getObj(&obj3);
 424     if (obj1.isInt() && obj1.getInt() == num &&
 425         obj2.isInt() && obj2.getInt() == gen &&
 426         obj3.isCmd("obj")) {
 427       parser->getObj(obj);
 428     } else {
 429       obj->initNull();
 430     }
 431     obj1.free();
 432     obj2.free();
 433     obj3.free();
 434     delete parser;
 435   } else {
 436     obj->initNull();
 437   }
 438   return obj;
 439 }
 440
 441 Object *XRef::getDocInfo(Object *obj) {
 442   return trailerDict.dictLookup("Info", obj);
 443 }