pdf/xpdf/GfxFont.cc

   1 //========================================================================
   2 //
   3 // GfxFont.cc
   4 //
   5 // Copyright 1996-2002 Glyph & Cog, LLC
   6 //
   7 //========================================================================
   8
   9 #include <aconf.h>
  10
  11 #ifdef USE_GCC_PRAGMAS
  12 #pragma implementation
  13 #endif
  14
  15 #include <stdio.h>
  16 #include <stdlib.h>
  17 #include <string.h>
  18 #include <ctype.h>
  19 #include "gmem.h"
  20 #include "Error.h"
  21 #include "Object.h"
  22 #include "Dict.h"
  23 #include "GlobalParams.h"
  24 #include "CMap.h"
  25 #include "CharCodeToUnicode.h"
  26 #include "FontEncodingTables.h"
  27 #include "BuiltinFontTables.h"
  28 #include "FontFile.h"
  29 #include "GfxFont.h"
  30
  31 //------------------------------------------------------------------------
  32
  33 struct StdFontMapEntry {
  34   char *altName;
  35   char *properName;
  36 };
  37
  38 static StdFontMapEntry stdFontMap[] = {
  39   { "Arial",                        "Helvetica" },
  40   { "Arial,Bold",                   "Helvetica-Bold" },
  41   { "Arial,BoldItalic",             "Helvetica-BoldOblique" },
  42   { "Arial,Italic",                 "Helvetica-Oblique" },
  43   { "Arial-Bold",                   "Helvetica-Bold" },
  44   { "Arial-BoldItalic",             "Helvetica-BoldOblique" },
  45   { "Arial-BoldItalicMT",           "Helvetica-BoldOblique" },
  46   { "Arial-BoldMT",                 "Helvetica-Bold" },
  47   { "Arial-Italic",                 "Helvetica-Oblique" },
  48   { "Arial-ItalicMT",               "Helvetica-Oblique" },
  49   { "ArialMT",                      "Helvetica" },
  50   { "Courier,Bold",                 "Courier-Bold" },
  51   { "Courier,Italic",               "Courier-Oblique" },
  52   { "Courier,BoldItalic",           "Courier-BoldOblique" },
  53   { "CourierNew",                   "Courier" },
  54   { "CourierNew,Bold",              "Courier-Bold" },
  55   { "CourierNew,BoldItalic",        "Courier-BoldOblique" },
  56   { "CourierNew,Italic",            "Courier-Oblique" },
  57   { "CourierNew-Bold",              "Courier-Bold" },
  58   { "CourierNew-BoldItalic",        "Courier-BoldOblique" },
  59   { "CourierNew-Italic",            "Courier-Oblique" },
  60   { "CourierNewPS-BoldItalicMT",    "Courier-BoldOblique" },
  61   { "CourierNewPS-BoldMT",          "Courier-Bold" },
  62   { "CourierNewPS-ItalicMT",        "Courier-Oblique" },
  63   { "CourierNewPSMT",               "Courier" },
  64   { "Helvetica,Bold",               "Helvetica-Bold" },
  65   { "Helvetica,BoldItalic",         "Helvetica-BoldOblique" },
  66   { "Helvetica,Italic",             "Helvetica-Oblique" },
  67   { "Helvetica-BoldItalic",         "Helvetica-BoldOblique" },
  68   { "Helvetica-Italic",             "Helvetica-Oblique" },
  69   { "TimesNewRoman",                "Times-Roman" },
  70   { "TimesNewRoman,Bold",           "Times-Bold" },
  71   { "TimesNewRoman,BoldItalic",     "Times-BoldItalic" },
  72   { "TimesNewRoman,Italic",         "Times-Italic" },
  73   { "TimesNewRoman-Bold",           "Times-Bold" },
  74   { "TimesNewRoman-BoldItalic",     "Times-BoldItalic" },
  75   { "TimesNewRoman-Italic",         "Times-Italic" },
  76   { "TimesNewRomanPS",              "Times-Roman" },
  77   { "TimesNewRomanPS-Bold",         "Times-Bold" },
  78   { "TimesNewRomanPS-BoldItalic",   "Times-BoldItalic" },
  79   { "TimesNewRomanPS-BoldItalicMT", "Times-BoldItalic" },
  80   { "TimesNewRomanPS-BoldMT",       "Times-Bold" },
  81   { "TimesNewRomanPS-Italic",       "Times-Italic" },
  82   { "TimesNewRomanPS-ItalicMT",     "Times-Italic" },
  83   { "TimesNewRomanPSMT",            "Times-Roman" }
  84 };
  85
  86 //------------------------------------------------------------------------
  87 // GfxFont
  88 //------------------------------------------------------------------------
  89
  90 GfxFont *GfxFont::makeFont(XRef *xref, char *tagA, Ref idA, Dict *fontDict) {
  91   GString *nameA;
  92   GfxFont *font;
  93   Object obj1;
  94
  95   // get base font name
  96   nameA = NULL;
  97   fontDict->lookup("BaseFont", &obj1);
  98   if (obj1.isName()) {
  99     nameA = new GString(obj1.getName());
 100   }
 101   obj1.free();
 102
 103   // get font type
 104   font = NULL;
 105   fontDict->lookup("Subtype", &obj1);
 106   if (obj1.isName("Type1") || obj1.isName("MMType1")) {
 107     font = new Gfx8BitFont(xref, tagA, idA, nameA, fontType1, fontDict);
 108   } else if (obj1.isName("Type1C")) {
 109     font = new Gfx8BitFont(xref, tagA, idA, nameA, fontType1C, fontDict);
 110   } else if (obj1.isName("Type3")) {
 111     font = new Gfx8BitFont(xref, tagA, idA, nameA, fontType3, fontDict);
 112   } else if (obj1.isName("TrueType")) {
 113     font = new Gfx8BitFont(xref, tagA, idA, nameA, fontTrueType, fontDict);
 114   } else if (obj1.isName("Type0")) {
 115     font = new GfxCIDFont(xref, tagA, idA, nameA, fontDict);
 116   } else {
 117     error(-1, "Unknown font type: '%s'",
 118           obj1.isName() ? obj1.getName() : "???");
 119     font = new Gfx8BitFont(xref, tagA, idA, nameA, fontUnknownType, fontDict);
 120   }
 121   obj1.free();
 122
 123   return font;
 124 }
 125
 126 GfxFont::GfxFont(char *tagA, Ref idA, GString *nameA) {
 127   ok = gFalse;
 128   tag = new GString(tagA);
 129   id = idA;
 130   name = nameA;
 131   embFontName = NULL;
 132   extFontFile = NULL;
 133 }
 134
 135 GfxFont::~GfxFont() {
 136   delete tag;
 137   if (name) {
 138     delete name;
 139   }
 140   if (embFontName) {
 141     delete embFontName;
 142   }
 143   if (extFontFile) {
 144     delete extFontFile;
 145   }
 146 }
 147
 148 void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) {
 149   Object obj1, obj2, obj3, obj4;
 150   double t;
 151   int i;
 152
 153   // assume Times-Roman by default (for substitution purposes)
 154   flags = fontSerif;
 155
 156   embFontID.num = -1;
 157   embFontID.gen = -1;
 158   missingWidth = 0;
 159
 160   if (fontDict->lookup("FontDescriptor", &obj1)->isDict()) {
 161
 162     // get flags
 163     if (obj1.dictLookup("Flags", &obj2)->isInt()) {
 164       flags = obj2.getInt();
 165     }
 166     obj2.free();
 167
 168     // get name
 169     obj1.dictLookup("FontName", &obj2);
 170     if (obj2.isName()) {
 171       embFontName = new GString(obj2.getName());
 172     }
 173     obj2.free();
 174
 175     // look for embedded font file
 176     if (obj1.dictLookupNF("FontFile", &obj2)->isRef()) {
 177       if (type == fontType1) {
 178         embFontID = obj2.getRef();
 179       } else {
 180         error(-1, "Mismatch between font type and embedded font file");
 181       }
 182     }
 183     obj2.free();
 184     if (embFontID.num == -1 &&
 185         obj1.dictLookupNF("FontFile2", &obj2)->isRef()) {
 186       if (type == fontTrueType || type == fontCIDType2) {
 187         embFontID = obj2.getRef();
 188       } else {
 189         error(-1, "Mismatch between font type and embedded font file");
 190       }
 191     }
 192     obj2.free();
 193     if (embFontID.num == -1 &&
 194         obj1.dictLookupNF("FontFile3", &obj2)->isRef()) {
 195       if (obj2.fetch(xref, &obj3)->isStream()) {
 196         obj3.streamGetDict()->lookup("Subtype", &obj4);
 197         if (obj4.isName("Type1")) {
 198           if (type == fontType1) {
 199             embFontID = obj2.getRef();
 200           } else {
 201             error(-1, "Mismatch between font type and embedded font file");
 202           }
 203         } else if (obj4.isName("Type1C")) {
 204           if (type == fontType1) {
 205             type = fontType1C;
 206             embFontID = obj2.getRef();
 207           } else if (type == fontType1C) {
 208             embFontID = obj2.getRef();
 209           } else {
 210             error(-1, "Mismatch between font type and embedded font file");
 211           }
 212         } else if (obj4.isName("TrueType")) {
 213           if (type == fontTrueType) {
 214             embFontID = obj2.getRef();
 215           } else {
 216             error(-1, "Mismatch between font type and embedded font file");
 217           }
 218         } else if (obj4.isName("CIDFontType0C")) {
 219           if (type == fontCIDType0) {
 220             type = fontCIDType0C;
 221             embFontID = obj2.getRef();
 222           } else {
 223             error(-1, "Mismatch between font type and embedded font file");
 224           }
 225         } else {
 226           error(-1, "Unknown embedded font type '%s'",
 227                 obj4.isName() ? obj4.getName() : "???");
 228         }
 229         obj4.free();
 230       }
 231       obj3.free();
 232     }
 233     obj2.free();
 234
 235     // look for MissingWidth
 236     obj1.dictLookup("MissingWidth", &obj2);
 237     if (obj2.isNum()) {
 238       missingWidth = obj2.getNum();
 239     }
 240     obj2.free();
 241
 242     // get Ascent and Descent
 243     obj1.dictLookup("Ascent", &obj2);
 244     if (obj2.isNum()) {
 245       t = 0.001 * obj2.getNum();
 246       // some broken font descriptors set ascent and descent to 0
 247       if (t != 0) {
 248         ascent = t;
 249       }
 250     }
 251     obj2.free();
 252     obj1.dictLookup("Descent", &obj2);
 253     if (obj2.isNum()) {
 254       t = 0.001 * obj2.getNum();
 255       // some broken font descriptors set ascent and descent to 0
 256       if (t != 0) {
 257         descent = t;
 258       }
 259     }
 260     obj2.free();
 261
 262     // font FontBBox
 263     if (obj1.dictLookup("FontBBox", &obj2)->isArray()) {
 264       for (i = 0; i < 4 && i < obj2.arrayGetLength(); ++i) {
 265         if (obj2.arrayGet(i, &obj3)->isNum()) {
 266           fontBBox[i] = 0.001 * obj3.getNum();
 267         }
 268         obj3.free();
 269       }
 270     }
 271     obj2.free();
 272
 273   }
 274   obj1.free();
 275 }
 276
 277 CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits) {
 278   CharCodeToUnicode *ctu;
 279   GString *buf;
 280   Object obj1;
 281   int c;
 282
 283   if (!fontDict->lookup("ToUnicode", &obj1)->isStream()) {
 284     obj1.free();
 285     return NULL;
 286   }
 287   buf = new GString();
 288   obj1.streamReset();
 289   while ((c = obj1.streamGetChar()) != EOF) {
 290     buf->append(c);
 291   }
 292   obj1.streamClose();
 293   obj1.free();
 294   ctu = CharCodeToUnicode::parseCMap(buf, nBits);
 295   delete buf;
 296   return ctu;
 297 }
 298
 299 void GfxFont::findExtFontFile() {
 300   if (name) {
 301     if (type == fontType1) {
 302       extFontFile = globalParams->findFontFile(name, ".pfa", ".pfb");
 303     } else if (type == fontTrueType) {
 304       extFontFile = globalParams->findFontFile(name, ".ttf", NULL);
 305     }
 306   }
 307 }
 308
 309 char *GfxFont::readExtFontFile(int *len) {
 310   FILE *f;
 311   char *buf;
 312
 313   if (!(f = fopen(extFontFile->getCString(), "rb"))) {
 314     error(-1, "External font file '%s' vanished", extFontFile->getCString());
 315     return NULL;
 316   }
 317   fseek(f, 0, SEEK_END);
 318   *len = (int)ftell(f);
 319   fseek(f, 0, SEEK_SET);
 320   buf = (char *)gmalloc(*len);
 321   if ((int)fread(buf, 1, *len, f) != *len) {
 322     error(-1, "Error reading external font file '%s'", extFontFile);
 323   }
 324   fclose(f);
 325   return buf;
 326 }
 327
 328 char *GfxFont::readEmbFontFile(XRef *xref, int *len) {
 329   char *buf;
 330   Object obj1, obj2;
 331   Stream *str;
 332   int c;
 333   int size, i;
 334
 335   obj1.initRef(embFontID.num, embFontID.gen);
 336   obj1.fetch(xref, &obj2);
 337   if (!obj2.isStream()) {
 338     error(-1, "Embedded font file is not a stream");
 339     obj2.free();
 340     obj1.free();
 341     embFontID.num = -1;
 342     return NULL;
 343   }
 344   str = obj2.getStream();
 345
 346   buf = NULL;
 347   i = size = 0;
 348   str->reset();
 349   while ((c = str->getChar()) != EOF) {
 350     if (i == size) {
 351       size += 4096;
 352       buf = (char *)grealloc(buf, size);
 353     }
 354     buf[i++] = c;
 355   }
 356   *len = i;
 357   str->close();
 358
 359   obj2.free();
 360   obj1.free();
 361
 362   return buf;
 363 }
 364
 365 //------------------------------------------------------------------------
 366 // Gfx8BitFont
 367 //------------------------------------------------------------------------
 368
 369 Gfx8BitFont::Gfx8BitFont(XRef *xref, char *tagA, Ref idA, GString *nameA,
 370                          GfxFontType typeA, Dict *fontDict):
 371   GfxFont(tagA, idA, nameA)
 372 {
 373   BuiltinFont *builtinFont;
 374   char **baseEnc;
 375   GBool baseEncFromFontFile;
 376   char *buf;
 377   int len;
 378   FontFile *fontFile;
 379   int code, code2;
 380   char *charName;
 381   GBool missing, hex;
 382   Unicode toUnicode[256];
 383   double mul;
 384   int firstChar, lastChar;
 385   Gushort w;
 386   Object obj1, obj2, obj3;
 387   int n, i, a, b, m;
 388
 389   type = typeA;
 390   ctu = NULL;
 391
 392   // Acrobat 4.0 and earlier substituted Base14-compatible fonts
 393   // without providing Widths and a FontDescriptor, so we munge the
 394   // names into the proper Base14 names.  (This table is from
 395   // implementation note 44 in the PDF 1.4 spec.)
 396   if (name) {
 397     a = 0;
 398     b = sizeof(stdFontMap) / sizeof(StdFontMapEntry);
 399     // invariant: stdFontMap[a].altName <= name < stdFontMap[b].altName
 400     while (b - a > 1) {
 401       m = (a + b) / 2;
 402       if (name->cmp(stdFontMap[m].altName) >= 0) {
 403         a = m;
 404       } else {
 405         b = m;
 406       }
 407     }
 408     if (!name->cmp(stdFontMap[a].altName)) {
 409       delete name;
 410       name = new GString(stdFontMap[a].properName);
 411     }
 412   }
 413
 414   // is it a built-in font?
 415   builtinFont = NULL;
 416   if (name) {
 417     for (i = 0; i < nBuiltinFonts; ++i) {
 418       if (!name->cmp(builtinFonts[i].name)) {
 419         builtinFont = &builtinFonts[i];
 420         break;
 421       }
 422     }
 423   }
 424
 425   // default ascent/descent values
 426   if (builtinFont) {
 427     ascent = 0.001 * builtinFont->ascent;
 428     descent = 0.001 * builtinFont->descent;
 429     fontBBox[0] = 0.001 * builtinFont->bbox[0];
 430     fontBBox[1] = 0.001 * builtinFont->bbox[1];
 431     fontBBox[2] = 0.001 * builtinFont->bbox[2];
 432     fontBBox[3] = 0.001 * builtinFont->bbox[3];
 433   } else {
 434     ascent = 0.95;
 435     descent = -0.35;
 436     fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
 437   }
 438
 439   // get info from font descriptor
 440   readFontDescriptor(xref, fontDict);
 441
 442   // look for an external font file
 443   findExtFontFile();
 444
 445   // get font matrix
 446   fontMat[0] = fontMat[3] = 1;
 447   fontMat[1] = fontMat[2] = fontMat[4] = fontMat[5] = 0;
 448   if (fontDict->lookup("FontMatrix", &obj1)->isArray()) {
 449     for (i = 0; i < 6 && i < obj1.arrayGetLength(); ++i) {
 450       if (obj1.arrayGet(i, &obj2)->isNum()) {
 451         fontMat[i] = obj2.getNum();
 452       }
 453       obj2.free();
 454     }
 455   }
 456   obj1.free();
 457
 458   // get Type 3 bounding box, font definition, and resources
 459   if (type == fontType3) {
 460     if (fontDict->lookup("FontBBox", &obj1)->isArray()) {
 461       for (i = 0; i < 4 && i < obj1.arrayGetLength(); ++i) {
 462         if (obj1.arrayGet(i, &obj2)->isNum()) {
 463           fontBBox[i] = obj2.getNum();
 464         }
 465         obj2.free();
 466       }
 467     }
 468     obj1.free();
 469     if (!fontDict->lookup("CharProcs", &charProcs)->isDict()) {
 470       error(-1, "Missing or invalid CharProcs dictionary in Type 3 font");
 471       charProcs.free();
 472     }
 473     if (!fontDict->lookup("Resources", &resources)->isDict()) {
 474       resources.free();
 475     }
 476   }
 477
 478   //----- build the font encoding -----
 479
 480   // Encodings start with a base encoding, which can come from
 481   // (in order of priority):
 482   //   1. FontDict.Encoding or FontDict.Encoding.BaseEncoding
 483   //        - MacRoman / MacExpert / WinAnsi / Standard
 484   //   2. embedded or external font file
 485   //   3. default:
 486   //        - builtin --> builtin encoding
 487   //        - TrueType --> MacRomanEncoding
 488   //        - others --> StandardEncoding
 489   // and then add a list of differences (if any) from
 490   // FontDict.Encoding.Differences.
 491
 492   // check FontDict for base encoding
 493   hasEncoding = gFalse;
 494   baseEnc = NULL;
 495   baseEncFromFontFile = gFalse;
 496   fontDict->lookup("Encoding", &obj1);
 497   if (obj1.isDict()) {
 498     obj1.dictLookup("BaseEncoding", &obj2);
 499     if (obj2.isName("MacRomanEncoding")) {
 500       hasEncoding = gTrue;
 501       baseEnc = macRomanEncoding;
 502     } else if (obj2.isName("MacExpertEncoding")) {
 503       hasEncoding = gTrue;
 504       baseEnc = macExpertEncoding;
 505     } else if (obj2.isName("WinAnsiEncoding")) {
 506       hasEncoding = gTrue;
 507       baseEnc = winAnsiEncoding;
 508     } else if (obj2.isName("StandardEncoding")) {
 509       hasEncoding = gTrue;
 510       baseEnc = standardEncoding;
 511     }
 512     obj2.free();
 513   } else if (obj1.isName("MacRomanEncoding")) {
 514     hasEncoding = gTrue;
 515     baseEnc = macRomanEncoding;
 516   } else if (obj1.isName("MacExpertEncoding")) {
 517     hasEncoding = gTrue;
 518     baseEnc = macExpertEncoding;
 519   } else if (obj1.isName("WinAnsiEncoding")) {
 520     hasEncoding = gTrue;
 521     baseEnc = winAnsiEncoding;
 522   } else if (obj1.isName("StandardEncoding")) {
 523     hasEncoding = gTrue;
 524     baseEnc = standardEncoding;
 525   }
 526
 527   // check embedded or external font file for base encoding
 528   // (only for Type 1 fonts - trying to get an encoding out of a
 529   // TrueType font is a losing proposition)
 530   fontFile = NULL;
 531   buf = NULL;
 532   if ((type == fontType1 || type == fontType1C) &&
 533       (extFontFile || embFontID.num >= 0)) {
 534     if (extFontFile) {
 535       buf = readExtFontFile(&len);
 536     } else {
 537       buf = readEmbFontFile(xref, &len);
 538     }
 539     if (buf) {
 540       if (type == fontType1C && !strncmp(buf, "%!", 2)) {
 541         // various tools (including Adobe's) occasionally embed Type 1
 542         // fonts but label them Type 1C
 543         type = fontType1;
 544       }
 545       if (type == fontType1) {
 546         fontFile = new Type1FontFile(buf, len);
 547       } else {
 548         fontFile = new Type1CFontFile(buf, len);
 549       }
 550       if (fontFile->getName()) {
 551         if (embFontName) {
 552           delete embFontName;
 553         }
 554         embFontName = new GString(fontFile->getName());
 555       }
 556       if (!baseEnc) {
 557         baseEnc = fontFile->getEncoding();
 558         baseEncFromFontFile = gTrue;
 559       }
 560       gfree(buf);
 561     }
 562   }
 563
 564   // get default base encoding
 565   if (!baseEnc) {
 566     if (builtinFont) {
 567       baseEnc = builtinFont->defaultBaseEnc;
 568       hasEncoding = gTrue;
 569     } else if (type == fontTrueType) {
 570       baseEnc = winAnsiEncoding;
 571     } else {
 572       baseEnc = standardEncoding;
 573     }
 574   }
 575
 576   // copy the base encoding
 577   for (i = 0; i < 256; ++i) {
 578     enc[i] = baseEnc[i];
 579     if ((encFree[i] = baseEncFromFontFile) && enc[i]) {
 580       enc[i] = copyString(baseEnc[i]);
 581     }
 582   }
 583
 584   // merge differences into encoding
 585   if (obj1.isDict()) {
 586     obj1.dictLookup("Differences", &obj2);
 587     if (obj2.isArray()) {
 588       hasEncoding = gTrue;
 589       code = 0;
 590       for (i = 0; i < obj2.arrayGetLength(); ++i) {
 591         obj2.arrayGet(i, &obj3);
 592         if (obj3.isInt()) {
 593           code = obj3.getInt();
 594         } else if (obj3.isName()) {
 595           if (code < 256) {
 596             if (encFree[code]) {
 597               gfree(enc[code]);
 598             }
 599             enc[code] = copyString(obj3.getName());
 600             encFree[code] = gTrue;
 601           }
 602           ++code;
 603         } else {
 604           error(-1, "Wrong type in font encoding resource differences (%s)",
 605                 obj3.getTypeName());
 606         }
 607         obj3.free();
 608       }
 609     }
 610     obj2.free();
 611   }
 612   obj1.free();
 613   if (fontFile) {
 614     delete fontFile;
 615   }
 616
 617   //----- build the mapping to Unicode -----
 618
 619   // look for a ToUnicode CMap
 620   if (!(ctu = readToUnicodeCMap(fontDict, 8))) {
 621
 622     // no ToUnicode CMap, so use the char names
 623
 624     // pass 1: use the name-to-Unicode mapping table
 625     missing = hex = gFalse;
 626     for (code = 0; code < 256; ++code) {
 627       if ((charName = enc[code])) {
 628         if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) &&
 629             strcmp(charName, ".notdef")) {
 630           // if it wasn't in the name-to-Unicode table, check for a
 631           // name that looks like 'Axx' or 'xx', where 'A' is any letter
 632           // and 'xx' is two hex digits
 633           if ((strlen(charName) == 3 &&
 634                isalpha(charName[0]) &&
 635                isxdigit(charName[1]) && isxdigit(charName[2]) &&
 636                ((charName[1] >= 'a' && charName[1] <= 'f') ||
 637                 (charName[1] >= 'A' && charName[1] <= 'F') ||
 638                 (charName[2] >= 'a' && charName[2] <= 'f') ||
 639                 (charName[2] >= 'A' && charName[2] <= 'F'))) ||
 640               (strlen(charName) == 2 &&
 641                isxdigit(charName[0]) && isxdigit(charName[1]) &&
 642                ((charName[0] >= 'a' && charName[0] <= 'f') ||
 643                 (charName[0] >= 'A' && charName[0] <= 'F') ||
 644                 (charName[1] >= 'a' && charName[1] <= 'f') ||
 645                 (charName[1] >= 'A' && charName[1] <= 'F')))) {
 646             hex = gTrue;
 647           }
 648           missing = gTrue;
 649         }
 650       } else {
 651         toUnicode[code] = 0;
 652       }
 653     }
 654
 655     // pass 2: try to fill in the missing chars, looking for names of
 656     // the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B'
 657     // are any letters, 'xx' is two hex digits, and 'nn' is 2-4
 658     // decimal digits
 659     if (missing && globalParams->getMapNumericCharNames()) {
 660       for (code = 0; code < 256; ++code) {
 661         if ((charName = enc[code]) && !toUnicode[code] &&
 662             strcmp(charName, ".notdef")) {
 663           n = strlen(charName);
 664           code2 = -1;
 665           if (hex && n == 3 && isalpha(charName[0]) &&
 666               isxdigit(charName[1]) && isxdigit(charName[2])) {
 667             sscanf(charName+1, "%x", &code2);
 668           } else if (hex && n == 2 &&
 669                      isxdigit(charName[0]) && isxdigit(charName[1])) {
 670             sscanf(charName, "%x", &code2);
 671           } else if (!hex && n >= 2 && n <= 4 &&
 672                      isdigit(charName[0]) && isdigit(charName[1])) {
 673             code2 = atoi(charName);
 674           } else if (n >= 3 && n <= 5 &&
 675                      isdigit(charName[1]) && isdigit(charName[2])) {
 676             code2 = atoi(charName+1);
 677           } else if (n >= 4 && n <= 6 &&
 678                      isdigit(charName[2]) && isdigit(charName[3])) {
 679             code2 = atoi(charName+2);
 680           }
 681           if (code2 >= 0 && code2 <= 0xff) {
 682             toUnicode[code] = (Unicode)code2;
 683           }
 684         }
 685       }
 686     }
 687
 688     ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
 689   }
 690
 691   //----- get the character widths -----
 692
 693   // initialize all widths
 694   for (code = 0; code < 256; ++code) {
 695     widths[code] = missingWidth * 0.001;
 696   }
 697
 698   // use widths from font dict, if present
 699   fontDict->lookup("FirstChar", &obj1);
 700   firstChar = obj1.isInt() ? obj1.getInt() : 0;
 701   obj1.free();
 702   fontDict->lookup("LastChar", &obj1);
 703   lastChar = obj1.isInt() ? obj1.getInt() : 255;
 704   obj1.free();
 705   mul = (type == fontType3) ? fontMat[0] : 0.001;
 706   fontDict->lookup("Widths", &obj1);
 707   if (obj1.isArray()) {
 708     flags |= fontFixedWidth;
 709     for (code = firstChar; code <= lastChar; ++code) {
 710       obj1.arrayGet(code - firstChar, &obj2);
 711       if (obj2.isNum()) {
 712         widths[code] = obj2.getNum() * mul;
 713         if (widths[code] != widths[firstChar]) {
 714           flags &= ~fontFixedWidth;
 715         }
 716       }
 717       obj2.free();
 718     }
 719
 720   // use widths from built-in font
 721   } else if (builtinFont) {
 722     // this is a kludge for broken PDF files that encode char 32
 723     // as .notdef
 724     if (builtinFont->widths->getWidth("space", &w)) {
 725       widths[32] = 0.001 * w;
 726     }
 727     for (code = 0; code < 256; ++code) {
 728       if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
 729         widths[code] = 0.001 * w;
 730       }
 731     }
 732
 733   // couldn't find widths -- use defaults
 734   } else {
 735     // this is technically an error -- the Widths entry is required
 736     // for all but the Base-14 fonts -- but certain PDF generators
 737     // apparently don't include widths for Arial and TimesNewRoman
 738     if (isFixedWidth()) {
 739       i = 0;
 740     } else if (isSerif()) {
 741       i = 8;
 742     } else {
 743       i = 4;
 744     }
 745     if (isBold()) {
 746       i += 2;
 747     }
 748     if (isItalic()) {
 749       i += 1;
 750     }
 751     builtinFont = builtinFontSubst[i];
 752     // this is a kludge for broken PDF files that encode char 32
 753     // as .notdef
 754     if (builtinFont->widths->getWidth("space", &w)) {
 755       widths[32] = 0.001 * w;
 756     }
 757     for (code = 0; code < 256; ++code) {
 758       if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
 759         widths[code] = 0.001 * w;
 760       }
 761     }
 762   }
 763   obj1.free();
 764
 765   ok = gTrue;
 766 }
 767
 768 Gfx8BitFont::~Gfx8BitFont() {
 769   int i;
 770
 771   for (i = 0; i < 256; ++i) {
 772     if (encFree[i] && enc[i]) {
 773       gfree(enc[i]);
 774     }
 775   }
 776   ctu->decRefCnt();
 777   if (charProcs.isDict()) {
 778     charProcs.free();
 779   }
 780   if (resources.isDict()) {
 781     resources.free();
 782   }
 783 }
 784
 785 int Gfx8BitFont::getNextChar(char *s, int len, CharCode *code,
 786                              Unicode *u, int uSize, int *uLen,
 787                              double *dx, double *dy, double *ox, double *oy) {
 788   CharCode c;
 789
 790   *code = c = (CharCode)(*s & 0xff);
 791   *uLen = ctu->mapToUnicode(c, u, uSize);
 792   *dx = widths[c];
 793   *dy = *ox = *oy = 0;
 794   return 1;
 795 }
 796
 797 CharCodeToUnicode *Gfx8BitFont::getToUnicode() {
 798   ctu->incRefCnt();
 799   return ctu;
 800 }
 801
 802 Dict *Gfx8BitFont::getCharProcs() {
 803   return charProcs.isDict() ? charProcs.getDict() : (Dict *)NULL;
 804 }
 805
 806 Object *Gfx8BitFont::getCharProc(int code, Object *proc) {
 807   if (charProcs.isDict()) {
 808     charProcs.dictLookup(enc[code], proc);
 809   } else {
 810     proc->initNull();
 811   }
 812   return proc;
 813 }
 814
 815 Dict *Gfx8BitFont::getResources() {
 816   return resources.isDict() ? resources.getDict() : (Dict *)NULL;
 817 }
 818
 819 //------------------------------------------------------------------------
 820 // GfxCIDFont
 821 //------------------------------------------------------------------------
 822
 823 static int cmpWidthExcep(const void *w1, const void *w2) {
 824   return ((GfxFontCIDWidthExcep *)w1)->first -
 825          ((GfxFontCIDWidthExcep *)w2)->first;
 826 }
 827
 828 static int cmpWidthExcepV(const void *w1, const void *w2) {
 829   return ((GfxFontCIDWidthExcepV *)w1)->first -
 830          ((GfxFontCIDWidthExcepV *)w2)->first;
 831 }
 832
 833 GfxCIDFont::GfxCIDFont(XRef *xref, char *tagA, Ref idA, GString *nameA,
 834                        Dict *fontDict):
 835   GfxFont(tagA, idA, nameA)
 836 {
 837   Dict *desFontDict;
 838   GString *collection, *cMapName;
 839   Object desFontDictObj;
 840   Object obj1, obj2, obj3, obj4, obj5, obj6;
 841   int c1, c2;
 842   int excepsSize, i, j, k;
 843
 844   ascent = 0.95;
 845   descent = -0.35;
 846   fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
 847   cMap = NULL;
 848   ctu = NULL;
 849   widths.defWidth = 1.0;
 850   widths.defHeight = -1.0;
 851   widths.defVY = 0.880;
 852   widths.exceps = NULL;
 853   widths.nExceps = 0;
 854   widths.excepsV = NULL;
 855   widths.nExcepsV = 0;
 856   cidToGID = NULL;
 857   cidToGIDLen = 0;
 858
 859   // get the descendant font
 860   if (!fontDict->lookup("DescendantFonts", &obj1)->isArray()) {
 861     error(-1, "Missing DescendantFonts entry in Type 0 font");
 862     obj1.free();
 863     goto err1;
 864   }
 865   if (!obj1.arrayGet(0, &desFontDictObj)->isDict()) {
 866     error(-1, "Bad descendant font in Type 0 font");
 867     goto err3;
 868   }
 869   obj1.free();
 870   desFontDict = desFontDictObj.getDict();
 871
 872   // font type
 873   if (!desFontDict->lookup("Subtype", &obj1)) {
 874     error(-1, "Missing Subtype entry in Type 0 descendant font");
 875     goto err3;
 876   }
 877   if (obj1.isName("CIDFontType0")) {
 878     type = fontCIDType0;
 879   } else if (obj1.isName("CIDFontType2")) {
 880     type = fontCIDType2;
 881   } else {
 882     error(-1, "Unknown Type 0 descendant font type '%s'",
 883           obj1.isName() ? obj1.getName() : "???");
 884     goto err3;
 885   }
 886   obj1.free();
 887
 888   // get info from font descriptor
 889   readFontDescriptor(xref, desFontDict);
 890
 891   // look for an external font file
 892   findExtFontFile();
 893
 894   //----- encoding info -----
 895
 896   // char collection
 897   if (!desFontDict->lookup("CIDSystemInfo", &obj1)->isDict()) {
 898     error(-1, "Missing CIDSystemInfo dictionary in Type 0 descendant font");
 899     goto err3;
 900   }
 901   obj1.dictLookup("Registry", &obj2);
 902   obj1.dictLookup("Ordering", &obj3);
 903   if (!obj2.isString() || !obj3.isString()) {
 904     error(-1, "Invalid CIDSystemInfo dictionary in Type 0 descendant font");
 905     goto err4;
 906   }
 907   collection = obj2.getString()->copy()->append('-')->append(obj3.getString());
 908   obj3.free();
 909   obj2.free();
 910   obj1.free();
 911
 912   // look for a ToUnicode CMap
 913   if (!(ctu = readToUnicodeCMap(fontDict, 16))) {
 914
 915     // the "Adobe-Identity" and "Adobe-UCS" collections don't have
 916     // cidToUnicode files
 917     if (collection->cmp("Adobe-Identity") &&
 918         collection->cmp("Adobe-UCS")) {
 919
 920       // look for a user-supplied .cidToUnicode file
 921       if (!(ctu = globalParams->getCIDToUnicode(collection))) {
 922         error(-1, "Unknown character collection '%s'",
 923               collection->getCString());
 924         delete collection;
 925         goto err2;
 926       }
 927     }
 928   }
 929
 930   // encoding (i.e., CMap)
 931   //~ need to handle a CMap stream here
 932   //~ also need to deal with the UseCMap entry in the stream dict
 933   if (!fontDict->lookup("Encoding", &obj1)->isName()) {
 934     error(-1, "Missing or invalid Encoding entry in Type 0 font");
 935     delete collection;
 936     goto err3;
 937   }
 938   cMapName = new GString(obj1.getName());
 939   obj1.free();
 940   if (!(cMap = globalParams->getCMap(collection, cMapName))) {
 941     error(-1, "Unknown CMap '%s' for character collection '%s'",
 942           cMapName->getCString(), collection->getCString());
 943     delete collection;
 944     delete cMapName;
 945     goto err2;
 946   }
 947   delete collection;
 948   delete cMapName;
 949
 950   // CIDToGIDMap (for embedded TrueType fonts)
 951   if (type == fontCIDType2) {
 952     fontDict->lookup("CIDToGIDMap", &obj1);
 953     if (obj1.isStream()) {
 954       cidToGIDLen = 0;
 955       i = 64;
 956       cidToGID = (Gushort *)gmalloc(i * sizeof(Gushort));
 957       obj1.streamReset();
 958       while ((c1 = obj1.streamGetChar()) != EOF &&
 959              (c2 = obj1.streamGetChar()) != EOF) {
 960         if (cidToGIDLen == i) {
 961           i *= 2;
 962           cidToGID = (Gushort *)grealloc(cidToGID, i * sizeof(Gushort));
 963         }
 964         cidToGID[cidToGIDLen++] = (Gushort)((c1 << 8) + c2);
 965       }
 966     } else if (!obj1.isName("Identity") && !obj1.isNull()) {
 967       error(-1, "Invalid CIDToGIDMap entry in CID font");
 968     }
 969     obj1.free();
 970   }
 971
 972   //----- character metrics -----
 973
 974   // default char width
 975   if (desFontDict->lookup("DW", &obj1)->isInt()) {
 976     widths.defWidth = obj1.getInt() * 0.001;
 977   }
 978   obj1.free();
 979
 980   // char width exceptions
 981   if (desFontDict->lookup("W", &obj1)->isArray()) {
 982     excepsSize = 0;
 983     i = 0;
 984     while (i + 1 < obj1.arrayGetLength()) {
 985       obj1.arrayGet(i, &obj2);
 986       obj1.arrayGet(i + 1, &obj3);
 987       if (obj2.isInt() && obj3.isInt() && i + 2 < obj1.arrayGetLength()) {
 988         if (obj1.arrayGet(i + 2, &obj4)->isNum()) {
 989           if (widths.nExceps == excepsSize) {
 990             excepsSize += 16;
 991             widths.exceps = (GfxFontCIDWidthExcep *)
 992               grealloc(widths.exceps,
 993                        excepsSize * sizeof(GfxFontCIDWidthExcep));
 994           }
 995           widths.exceps[widths.nExceps].first = obj2.getInt();
 996           widths.exceps[widths.nExceps].last = obj3.getInt();
 997           widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
 998           ++widths.nExceps;
 999         } else {
1000           error(-1, "Bad widths array in Type 0 font");
1001         }
1002         obj4.free();
1003         i += 3;
1004       } else if (obj2.isInt() && obj3.isArray()) {
1005         if (widths.nExceps + obj3.arrayGetLength() > excepsSize) {
1006           excepsSize = (widths.nExceps + obj3.arrayGetLength() + 15) & ~15;
1007           widths.exceps = (GfxFontCIDWidthExcep *)
1008             grealloc(widths.exceps,
1009                      excepsSize * sizeof(GfxFontCIDWidthExcep));
1010         }
1011         j = obj2.getInt();
1012         for (k = 0; k < obj3.arrayGetLength(); ++k) {
1013           if (obj3.arrayGet(k, &obj4)->isNum()) {
1014             widths.exceps[widths.nExceps].first = j;
1015             widths.exceps[widths.nExceps].last = j;
1016             widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
1017             ++j;
1018             ++widths.nExceps;
1019           } else {
1020             error(-1, "Bad widths array in Type 0 font");
1021           }
1022           obj4.free();
1023         }
1024         i += 2;
1025       } else {
1026         error(-1, "Bad widths array in Type 0 font");
1027         ++i;
1028       }
1029       obj3.free();
1030       obj2.free();
1031     }
1032     qsort(widths.exceps, widths.nExceps, sizeof(GfxFontCIDWidthExcep),
1033           &cmpWidthExcep);
1034   }
1035   obj1.free();
1036
1037   // default metrics for vertical font
1038   if (desFontDict->lookup("DW2", &obj1)->isArray() &&
1039       obj1.arrayGetLength() == 2) {
1040     if (obj1.arrayGet(0, &obj2)->isNum()) {
1041       widths.defVY = obj1.getNum() * 0.001;
1042     }
1043     obj2.free();
1044     if (obj1.arrayGet(1, &obj2)->isNum()) {
1045       widths.defHeight = obj1.getNum() * 0.001;
1046     }
1047     obj2.free();
1048   }
1049   obj1.free();
1050
1051   // char metric exceptions for vertical font
1052   if (desFontDict->lookup("W2", &obj1)->isArray()) {
1053     excepsSize = 0;
1054     i = 0;
1055     while (i + 1 < obj1.arrayGetLength()) {
1056       obj1.arrayGet(0, &obj2);
1057       obj2.arrayGet(0, &obj3);
1058       if (obj2.isInt() && obj3.isInt() && i + 4 < obj1.arrayGetLength()) {
1059         if (obj1.arrayGet(i + 2, &obj4)->isNum() &&
1060             obj1.arrayGet(i + 3, &obj5)->isNum() &&
1061             obj1.arrayGet(i + 4, &obj6)->isNum()) {
1062           if (widths.nExcepsV == excepsSize) {
1063             excepsSize += 16;
1064             widths.excepsV = (GfxFontCIDWidthExcepV *)
1065               grealloc(widths.excepsV,
1066                        excepsSize * sizeof(GfxFontCIDWidthExcepV));
1067           }
1068           widths.excepsV[widths.nExcepsV].first = obj2.getInt();
1069           widths.excepsV[widths.nExcepsV].last = obj3.getInt();
1070           widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
1071           widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
1072           widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
1073           ++widths.nExcepsV;
1074         } else {
1075           error(-1, "Bad widths (W2) array in Type 0 font");
1076         }
1077         obj6.free();
1078         obj5.free();
1079         obj4.free();
1080         i += 5;
1081       } else if (obj2.isInt() && obj3.isArray()) {
1082         if (widths.nExcepsV + obj3.arrayGetLength() / 3 > excepsSize) {
1083           excepsSize =
1084             (widths.nExcepsV + obj3.arrayGetLength() / 3 + 15) & ~15;
1085           widths.excepsV = (GfxFontCIDWidthExcepV *)
1086             grealloc(widths.excepsV,
1087                      excepsSize * sizeof(GfxFontCIDWidthExcepV));
1088         }
1089         j = obj2.getInt();
1090         for (k = 0; k < obj3.arrayGetLength(); ++k) {
1091           if (obj3.arrayGet(k, &obj4)->isNum() &&
1092               obj3.arrayGet(k, &obj5)->isNum() &&
1093               obj3.arrayGet(k, &obj6)->isNum()) {
1094             widths.excepsV[widths.nExceps].first = j;
1095             widths.excepsV[widths.nExceps].last = j;
1096             widths.excepsV[widths.nExceps].height = obj4.getNum() * 0.001;
1097             widths.excepsV[widths.nExceps].vx = obj5.getNum() * 0.001;
1098             widths.excepsV[widths.nExceps].vy = obj6.getNum() * 0.001;
1099             ++j;
1100             ++widths.nExcepsV;
1101           } else {
1102             error(-1, "Bad widths (W2) array in Type 0 font");
1103           }
1104           obj6.free();
1105           obj5.free();
1106           obj4.free();
1107         }
1108         i += 2;
1109       } else {
1110         error(-1, "Bad widths (W2) array in Type 0 font");
1111         ++i;
1112       }
1113       obj3.free();
1114       obj2.free();
1115     }
1116     qsort(widths.excepsV, widths.nExcepsV, sizeof(GfxFontCIDWidthExcepV),
1117           &cmpWidthExcepV);
1118   }
1119   obj1.free();
1120
1121   desFontDictObj.free();
1122   ok = gTrue;
1123   return;
1124
1125  err4:
1126   obj3.free();
1127   obj2.free();
1128  err3:
1129   obj1.free();
1130  err2:
1131   desFontDictObj.free();
1132  err1:;
1133 }
1134
1135 GfxCIDFont::~GfxCIDFont() {
1136   if (cMap) {
1137     cMap->decRefCnt();
1138   }
1139   if (ctu) {
1140     ctu->decRefCnt();
1141   }
1142   gfree(widths.exceps);
1143   gfree(widths.excepsV);
1144   if (cidToGID) {
1145     gfree(cidToGID);
1146   }
1147 }
1148
1149 int GfxCIDFont::getNextChar(char *s, int len, CharCode *code,
1150                             Unicode *u, int uSize, int *uLen,
1151                             double *dx, double *dy, double *ox, double *oy) {
1152   CID cid;
1153   double w, h, vx, vy;
1154   int n, a, b, m;
1155
1156   if (!cMap) {
1157     *code = 0;
1158     *uLen = 0;
1159     *dx = *dy = 0;
1160     return 1;
1161   }
1162
1163   *code = (CharCode)(cid = cMap->getCID(s, len, &n));
1164   if (ctu) {
1165     *uLen = ctu->mapToUnicode(cid, u, uSize);
1166   } else {
1167     *uLen = 0;
1168   }
1169
1170   // horizontal
1171   if (cMap->getWMode() == 0) {
1172     w = widths.defWidth;
1173     h = vx = vy = 0;
1174     if (widths.nExceps > 0 && cid >= widths.exceps[0].first) {
1175       a = 0;
1176       b = widths.nExceps;
1177       // invariant: widths.exceps[a].first <= cid < widths.exceps[b].first
1178       while (b - a > 1) {
1179         m = (a + b) / 2;
1180         if (widths.exceps[m].first <= cid) {
1181           a = m;
1182         } else {
1183           b = m;
1184         }
1185       }
1186       if (cid <= widths.exceps[a].last) {
1187         w = widths.exceps[a].width;
1188       }
1189     }
1190
1191   // vertical
1192   } else {
1193     w = 0;
1194     h = widths.defHeight;
1195     vx = widths.defWidth / 2;
1196     vy = widths.defVY;
1197     if (widths.nExcepsV > 0 && cid >= widths.excepsV[0].first) {
1198       a = 0;
1199       b = widths.nExcepsV;
1200       // invariant: widths.excepsV[a].first <= cid < widths.excepsV[b].first
1201       while (b - a > 1) {
1202         m = (a + b) / 2;
1203         if (widths.excepsV[m].last <= cid) {
1204           a = m;
1205         } else {
1206           b = m;
1207         }
1208       }
1209       if (cid <= widths.excepsV[a].last) {
1210         h = widths.excepsV[a].height;
1211         vx = widths.excepsV[a].vx;
1212         vy = widths.excepsV[a].vy;
1213       }
1214     }
1215   }
1216
1217   *dx = w;
1218   *dy = h;
1219   *ox = vx;
1220   *oy = vy;
1221
1222   return n;
1223 }
1224
1225 int GfxCIDFont::getWMode() {
1226   return cMap ? cMap->getWMode() : 0;
1227 }
1228
1229 CharCodeToUnicode *GfxCIDFont::getToUnicode() {
1230   ctu->incRefCnt();
1231   return ctu;
1232 }
1233
1234 GString *GfxCIDFont::getCollection() {
1235   return cMap ? cMap->getCollection() : (GString *)NULL;
1236 }
1237
1238 //------------------------------------------------------------------------
1239 // GfxFontDict
1240 //------------------------------------------------------------------------
1241
1242 GfxFontDict::GfxFontDict(XRef *xref, Dict *fontDict) {
1243   int i;
1244   Object obj1, obj2;
1245   Ref r;
1246
1247   numFonts = fontDict->getLength();
1248   fonts = (GfxFont **)gmalloc(numFonts * sizeof(GfxFont *));
1249   for (i = 0; i < numFonts; ++i) {
1250     fontDict->getValNF(i, &obj1);
1251     obj1.fetch(xref, &obj2);
1252     if (obj2.isDict()) {
1253       if (obj1.isRef()) {
1254         r = obj1.getRef();
1255       } else {
1256         // no indirect reference for this font, so invent a unique one
1257         // (legal generation numbers are five digits, so any 6-digit
1258         // number would be safe)
1259         r.num = i;
1260         r.gen = 999999;
1261       }
1262       fonts[i] = GfxFont::makeFont(xref, fontDict->getKey(i),
1263                                    r, obj2.getDict());
1264       if (fonts[i] && !fonts[i]->isOk()) {
1265         delete fonts[i];
1266         fonts[i] = NULL;
1267       }
1268     } else {
1269       error(-1, "font resource is not a dictionary");
1270       fonts[i] = NULL;
1271     }
1272     obj1.free();
1273     obj2.free();
1274   }
1275 }
1276
1277 GfxFontDict::~GfxFontDict() {
1278   int i;
1279
1280   for (i = 0; i < numFonts; ++i) {
1281     if (fonts[i]) {
1282       delete fonts[i];
1283     }
1284   }
1285   gfree(fonts);
1286 }
1287
1288 GfxFont *GfxFontDict::lookup(char *tag) {
1289   int i;
1290
1291   for (i = 0; i < numFonts; ++i) {
1292     if (fonts[i] && fonts[i]->matches(tag)) {
1293       return fonts[i];
1294     }
1295   }
1296   return NULL;
1297 }