1 //========================================================================
5 // Copyright 1997 Derek B. Noonburg
7 //========================================================================
10 #pragma implementation
23 #include "TextOutputDev.h"
25 #include "TextOutputFontInfo.h"
27 //------------------------------------------------------------------------
28 // Character substitutions
29 //------------------------------------------------------------------------
31 static char *isoLatin1Subst[] = {
46 "-", "-", // emdash, hyphen
47 "\"", "\"", // quotedblleft, quotedblright
52 static char *ascii7Subst[] = {
53 "A", "A", "A", "A", // A{acute,circumflex,dieresis,grave}
54 "A", "A", // A{ring,tilde}
57 "E", "E", "E", "E", // E{acute,circumflex,dieresis,grave}
58 "I", "I", "I", "I", // I{acute,circumflex,dieresis,grave}
61 "O", "O", "O", "O", // O{acute,circumflex,dieresis,grave}
62 "O", "O", // O{slash,tilde}
65 "U", "U", "U", "U", // U{acute,circumflex,dieresis,grave}
66 "Y", "Y", // T{acute,dieresis}
68 "a", "a", "a", "a", // a{acute,circumflex,dieresis,grave}
69 "a", "a", // a{ring,tilde}
72 "e", "e", "e", "e", // e{acute,circumflex,dieresis,grave}
76 "i", "i", "i", "i", // i{acute,circumflex,dieresis,grave}
79 "o", "o", "o", "o", // o{acute,circumflex,dieresis,grave}
80 "o", "o", // o{slash,tilde}
83 "u", "u", "u", "u", // u{acute,circumflex,dieresis,grave}
84 "y", "y", // t{acute,dieresis}
89 "-", "-", "-", // emdash, endash, hyphen
90 "\"", "\"", // quotedblleft, quotedblright
96 //------------------------------------------------------------------------
98 //------------------------------------------------------------------------
100 TextString::TextString(GfxState *state, GBool hexCodes1) {
103 state->transform(state->getCurX(), state->getCurY(), &x, &y);
104 h = state->getTransformedFontSize();
105 //~ yMin/yMax computation should use font ascent/descent values
107 yMax = yMin + 1.3 * h;
109 text = new GString();
113 hexCodes = hexCodes1;
116 TextString::~TextString() {
121 void TextString::addChar(GfxState *state, double x, double y,
122 double dx, double dy,
123 Guchar c, GBool useASCII7) {
124 char *charName, *sub;
129 i = text->getLength();
131 // append translated character(s) to string
134 if ((charName = state->getFont()->getCharName(c))) {
136 c1 = ascii7Encoding.getCharCode(charName);
138 c1 = isoLatin1Encoding.getCharCode(charName);
140 m = strlen(charName);
141 if (hexCodes && m == 3 &&
142 (charName[0] == 'B' || charName[0] == 'C' ||
143 charName[0] == 'G') &&
144 isxdigit(charName[1]) && isxdigit(charName[2])) {
145 sscanf(charName+1, "%x", &c1);
146 } else if (!hexCodes && m >= 2 && m <= 3 &&
147 isdigit(charName[0]) && isdigit(charName[1])) {
151 } else if (!hexCodes && m >= 3 && m <= 5 && isdigit(charName[1])) {
152 c1 = atoi(charName+1);
156 //~ this is a kludge -- is there a standard internal encoding
157 //~ used by all/most Type 1 fonts?
158 if (c1 == 262) // hyphen
160 else if (c1 == 266) // emdash
163 c1 = ascii7Encoding.getCharCode(isoLatin1Encoding.getCharName(c1));
167 sub = ascii7Subst[c1 - 128];
172 sub = isoLatin1Subst[c1 - 256];
182 text->append((char)c1);
186 // update position information
187 if (i+n > ((i+15) & ~15))
188 xRight = (double *)grealloc(xRight, ((i+n+15) & ~15) * sizeof(double));
191 for (j = 0; j < n; ++j)
192 xRight[i+j] = x + ((j+1) * dx) / n;
196 //------------------------------------------------------------------------
198 //------------------------------------------------------------------------
200 TextPage::TextPage(GBool useASCII71) {
201 useASCII7 = useASCII71;
207 TextPage::~TextPage() {
211 void TextPage::beginString(GfxState *state, GString *s, GBool hexCodes) {
212 curStr = new TextString(state, hexCodes);
215 void TextPage::addChar(GfxState *state, double x, double y,
216 double dx, double dy, Guchar c) {
217 double x1, y1, w1, h1;
219 state->transform(x, y, &x1, &y1);
220 state->transformDelta(dx, dy, &w1, &h1);
221 curStr->addChar(state, x1, y1, w1, h1, c, useASCII7);
224 void TextPage::endString() {
228 // throw away zero-length strings -- they don't have valid xMin/xMax
229 // values, and they're useless anyway
230 if (curStr->text->getLength() == 0) {
237 if (curStr->yMax - curStr->yMin > 20) {
244 // insert string in y-major list
245 h = curStr->yMax - curStr->yMin;
246 y1 = curStr->yMin + 0.5 * h;
247 y2 = curStr->yMin + 0.8 * h;
248 for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
249 if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin))
260 void TextPage::coalesce() {
261 TextString *str1, *str2;
265 #if 0 //~ for debugging
266 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
267 printf("x=%3d..%3d y=%3d..%3d size=%2d '%s'\n",
268 (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
269 (int)(str1->yMax - str1->yMin), str1->text->getCString());
271 printf("\n------------------------------------------------------------\n\n");
274 while (str1 && (str2 = str1->yxNext)) {
275 space = str1->yMax - str1->yMin;
276 d = str2->xMin - str1->xMax;
278 if (str2->yMin < str1->yMax && d > -0.1 * space && d < 0.2 * space) {
280 if (str2->yMin < str1->yMax && d > -0.5 * space && d < space) {
282 n = str1->text->getLength();
284 str1->text->append(' ');
285 str1->text->append(str2->text);
286 str1->xRight = (double *)
287 grealloc(str1->xRight, str1->text->getLength() * sizeof(double));
289 str1->xRight[n++] = str2->xMin;
290 for (i = 0; i < str2->text->getLength(); ++i)
291 str1->xRight[n++] = str2->xRight[i];
292 if (str2->xMax > str1->xMax)
293 str1->xMax = str2->xMax;
294 if (str2->yMax > str1->yMax)
295 str1->yMax = str2->yMax;
296 str1->yxNext = str2->yxNext;
304 GBool TextPage::findText(char *s, GBool top, GBool bottom,
305 double *xMin, double *yMin,
306 double *xMax, double *yMax) {
312 // scan all strings on page
314 for (str = yxStrings; str; str = str->yxNext) {
316 // check: above top limit?
317 if (!top && (str->yMax < *yMin ||
318 (str->yMin < *yMin && str->xMax <= *xMin)))
321 // check: below bottom limit?
322 if (!bottom && (str->yMin > *yMax ||
323 (str->yMax > *yMax && str->xMin >= *xMax)))
326 // search each position in this string
327 m = str->text->getLength();
328 for (i = 0, p = str->text->getCString(); i <= m - n; ++i, ++p) {
330 // check: above top limit?
331 if (!top && str->yMin < *yMin) {
332 x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2;
337 // check: below bottom limit?
338 if (!bottom && str->yMax > *yMax) {
339 x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2;
344 // compare the strings
345 for (p1 = p, q = s; *q; ++p1, ++q) {
346 if (tolower(*p1) != tolower(*q))
352 *xMin = (i == 0) ? str->xMin : str->xRight[i-1];
353 *xMax = str->xRight[i+n-1];
363 GString *TextPage::getText(double xMin, double yMin,
364 double xMax, double yMax) {
367 double x0, x1, x2, y;
375 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
376 y = 0.5 * (str1->yMin + str1->yMax);
379 if (y > yMin && str1->xMin < xMax && str1->xMax > xMin) {
380 x0 = x1 = x2 = str1->xMin;
381 for (i1 = 0; i1 < str1->text->getLength(); ++i1) {
382 x0 = (i1==0) ? str1->xMin : str1->xRight[i1-1];
383 x1 = str1->xRight[i1];
384 if (0.5 * (x0 + x1) >= xMin)
387 for (i2 = str1->text->getLength() - 1; i2 > i1; --i2) {
388 x1 = (i2==0) ? str1->xMin : str1->xRight[i2-1];
389 x2 = str1->xRight[i2];
390 if (0.5 * (x1 + x2) <= xMax)
393 if (s->getLength() > 0) {
394 if (x0 < xPrev || str1->yMin > yPrev) {
401 s->append(str1->text->getCString() + i1, i2 - i1 + 1);
411 void TextPage::dump(FILE *f) {
412 TextString *str1, *str2, *str3;
417 // build x-major list
419 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
420 for (str2 = NULL, str3 = xyStrings;
422 str2 = str3, str3 = str3->xyNext) {
423 if (str1->xMin < str3->xMin ||
424 (str1->xMin == str3->xMin && str1->yMin < str3->yMin))
434 // do column assignment
435 for (str1 = xyStrings; str1; str1 = str1->xyNext) {
437 for (str2 = xyStrings; str2 != str1; str2 = str2->xyNext) {
438 if (str1->xMin >= str2->xMax) {
439 col2 = str2->col + str2->text->getLength() + 4;
442 } else if (str1->xMin > str2->xMin) {
444 (int)(((str1->xMin - str2->xMin) / (str2->xMax - str2->xMin)) *
445 str2->text->getLength());
454 #if 0 //~ for debugging
455 fprintf(f, "~~~~~~~~~~\n");
456 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
457 fprintf(f, "(%4d,%4d) - (%4d,%4d) [%3d] %s\n",
458 (int)str1->xMin, (int)str1->yMin, (int)str1->xMax, (int)str1->yMax,
459 str1->col, str1->text->getCString());
461 fprintf(f, "~~~~~~~~~~\n");
466 yMax = yxStrings ? yxStrings->yMax : 0;
467 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
469 // line this string up with the correct column
470 for (; col1 < str1->col; ++col1)
474 fputs(str1->text->getCString(), f);
477 col1 += str1->text->getLength();
479 // update yMax for this line
480 if (str1->yMax > yMax)
483 // if we've hit the end of the line...
485 if (!(str1->yxNext && str1->yxNext->yMin < str1->yMax &&
486 str1->yxNext->xMin >= str1->xMax)) {
488 if (!(str1->yxNext &&
489 str1->yxNext->yMin < 0.2*str1->yMin + 0.8*str1->yMax &&
490 str1->yxNext->xMin >= str1->xMax)) {
496 // print extra vertical space if necessary
499 // find yMin for next line
500 yMin = str1->yxNext->yMin;
501 for (str2 = str1->yxNext; str2; str2 = str2->yxNext) {
502 if (str2->yMin < yMin)
504 if (!(str2->yxNext && str2->yxNext->yMin < str2->yMax &&
505 str2->yxNext->xMin >= str2->xMax))
510 d = (int)((yMin - yMax) / (str1->yMax - str1->yMin) + 0.5);
515 // set up for next line
517 yMax = str1->yxNext ? str1->yxNext->yMax : 0;
522 void TextPage::clear() {
529 for (p1 = yxStrings; p1; p1 = p2) {
537 //------------------------------------------------------------------------
539 //------------------------------------------------------------------------
541 TextOutputDev::TextOutputDev(char *fileName, GBool useASCII7) {
548 if (!strcmp(fileName, "-")) {
550 } else if ((f = fopen(fileName, "w"))) {
553 error(-1, "Couldn't open text file '%s'", fileName);
561 // set up text object
562 text = new TextPage(useASCII7);
565 TextOutputDev::~TextOutputDev() {
572 void TextOutputDev::startPage(int pageNum, GfxState *state) {
576 void TextOutputDev::endPage() {
586 void TextOutputDev::updateFont(GfxState *state) {
591 // look for hex char codes in subsetted font
593 if ((font = state->getFont())) {
594 for (c = 0; c < 256; ++c) {
595 if ((charName = font->getCharName(c))) {
596 if ((charName[0] == 'B' || charName[0] == 'C' ||
597 charName[0] == 'G') &&
598 strlen(charName) == 3 &&
599 ((charName[1] >= 'a' && charName[1] <= 'f') ||
600 (charName[1] >= 'A' && charName[1] <= 'F') ||
601 (charName[2] >= 'a' && charName[2] <= 'f') ||
602 (charName[2] >= 'A' && charName[2] <= 'F'))) {
611 void TextOutputDev::beginString(GfxState *state, GString *s) {
612 text->beginString(state, s, hexCodes);
615 void TextOutputDev::endString(GfxState *state) {
619 void TextOutputDev::drawChar(GfxState *state, double x, double y,
620 double dx, double dy, Guchar c) {
621 text->addChar(state, x, y, dx, dy, c);
624 GBool TextOutputDev::findText(char *s, GBool top, GBool bottom,
625 double *xMin, double *yMin,
626 double *xMax, double *yMax) {
627 return text->findText(s, top, bottom, xMin, yMin, xMax, yMax);