1 //========================================================================
5 // Copyright 1997-2002 Glyph & Cog, LLC
7 //========================================================================
10 #pragma implementation
23 #include "GlobalParams.h"
24 #include "UnicodeMap.h"
26 #include "TextOutputDev.h"
29 // needed for setting type/creator of MacOS files
30 #include "ICSupport.h"
33 //------------------------------------------------------------------------
35 //------------------------------------------------------------------------
37 TextString::TextString(GfxState *state, double fontSize) {
41 state->transform(state->getCurX(), state->getCurY(), &x, &y);
42 if ((font = state->getFont())) {
43 yMin = y - font->getAscent() * fontSize;
44 yMax = y - font->getDescent() * fontSize;
46 // this means that the PDF file draws text without a current font,
47 // which should never happen
48 yMin = y - 0.95 * fontSize;
49 yMax = y + 0.35 * fontSize;
52 // this is a sanity check for a case that shouldn't happen -- but
53 // if it does happen, we want to avoid dividing by zero later
65 TextString::~TextString() {
70 void TextString::addChar(GfxState *state, double x, double y,
71 double dx, double dy, Unicode u) {
74 text = (Unicode *)grealloc(text, size * sizeof(Unicode));
75 xRight = (double *)grealloc(xRight, size * sizeof(double));
81 xMax = xRight[len] = x + dx;
85 //------------------------------------------------------------------------
87 //------------------------------------------------------------------------
89 TextPage::TextPage(GBool rawOrderA) {
95 yxCur1 = yxCur2 = NULL;
99 TextPage::~TextPage() {
103 void TextPage::updateFont(GfxState *state) {
110 // adjust the font size
111 fontSize = state->getTransformedFontSize();
112 if ((font = state->getFont()) && font->getType() == fontType3) {
113 // This is a hack which makes it possible to deal with some Type 3
114 // fonts. The problem is that it's impossible to know what the
115 // base coordinate system used in the font is without actually
116 // rendering the font. This code tries to guess by looking at the
117 // width of the character 'm' (which breaks if the font is a
118 // subset that doesn't contain 'm').
119 for (code = 0; code < 256; ++code) {
120 if ((name = ((Gfx8BitFont *)font)->getCharName(code)) &&
121 name[0] == 'm' && name[1] == '\0') {
126 w = ((Gfx8BitFont *)font)->getWidth(code);
128 // 600 is a generic average 'm' width -- yes, this is a hack
132 fm = font->getFontMatrix();
134 fontSize *= fabs(fm[3] / fm[0]);
139 void TextPage::beginString(GfxState *state) {
140 // This check is needed because Type 3 characters can contain
141 // text-drawing operations.
147 curStr = new TextString(state, fontSize);
150 void TextPage::addChar(GfxState *state, double x, double y,
151 double dx, double dy, Unicode *u, int uLen) {
152 double x1, y1, w1, h1, dx2, dy2;
155 state->transform(x, y, &x1, &y1);
158 x1 - curStr->xRight[n-1] > 0.1 * (curStr->yMax - curStr->yMin)) {
162 state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(),
166 state->transformDelta(dx, dy, &w1, &h1);
171 for (i = 0; i < uLen; ++i) {
172 curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
176 void TextPage::endString() {
180 // This check is needed because Type 3 characters can contain
181 // text-drawing operations.
187 // throw away zero-length strings -- they don't have valid xMin/xMax
188 // values, and they're useless anyway
189 if (curStr->len == 0) {
195 // insert string in y-major list
196 h = curStr->yMax - curStr->yMin;
197 y1 = curStr->yMin + 0.5 * h;
198 y2 = curStr->yMin + 0.8 * h;
202 } else if ((!yxCur1 ||
203 (y1 >= yxCur1->yMin &&
204 (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) &&
206 (y1 < yxCur2->yMin ||
207 (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
211 for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
212 if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin)) {
228 void TextPage::coalesce() {
229 TextString *str1, *str2;
234 #if 0 //~ for debugging
235 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
236 printf("x=%3d..%3d y=%3d..%3d size=%2d '",
237 (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
238 (int)(str1->yMax - str1->yMin));
239 for (i = 0; i < str1->len; ++i) {
240 fputc(str1->text[i] & 0xff, stdout);
244 printf("\n------------------------------------------------------------\n\n");
247 while (str1 && (str2 = str1->yxNext)) {
248 space = str1->yMax - str1->yMin;
249 d = str2->xMin - str1->xMax;
251 ((str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) ||
252 (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax))) ||
253 (!rawOrder && str2->yMin < str1->yMax)) &&
254 d > -0.5 * space && d < space) {
255 n = str1->len + str2->len;
256 if ((addSpace = d > 0.1 * space)) {
259 str1->size = (n + 15) & ~15;
260 str1->text = (Unicode *)grealloc(str1->text,
261 str1->size * sizeof(Unicode));
262 str1->xRight = (double *)grealloc(str1->xRight,
263 str1->size * sizeof(double));
265 str1->text[str1->len] = 0x20;
266 str1->xRight[str1->len] = str2->xMin;
269 for (i = 0; i < str2->len; ++i) {
270 str1->text[str1->len] = str2->text[i];
271 str1->xRight[str1->len] = str2->xRight[i];
274 if (str2->xMax > str1->xMax) {
275 str1->xMax = str2->xMax;
277 if (str2->yMax > str1->yMax) {
278 str1->yMax = str2->yMax;
280 str1->yxNext = str2->yxNext;
288 GBool TextPage::findText(Unicode *s, int len,
289 GBool top, GBool bottom,
290 double *xMin, double *yMin,
291 double *xMax, double *yMax) {
298 // scan all strings on page
299 for (str = yxStrings; str; str = str->yxNext) {
301 // check: above top limit?
302 if (!top && (str->yMax < *yMin ||
303 (str->yMin < *yMin && str->xMax <= *xMin))) {
307 // check: below bottom limit?
308 if (!bottom && (str->yMin > *yMax ||
309 (str->yMax > *yMax && str->xMin >= *xMax))) {
313 // search each position in this string
315 for (i = 0, p = str->text; i <= m - len; ++i, ++p) {
317 // check: above top limit?
318 if (!top && str->yMin < *yMin) {
319 x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2;
325 // check: below bottom limit?
326 if (!bottom && str->yMax > *yMax) {
327 x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2;
333 // compare the strings
334 for (j = 0; j < len; ++j) {
335 #if 1 //~ this lowercases Latin A-Z only -- this will eventually be
336 //~ extended to handle other character sets
337 if (p[j] >= 0x41 && p[j] <= 0x5a) {
342 if (s[j] >= 0x41 && s[j] <= 0x5a) {
355 *xMin = (i == 0) ? str->xMin : str->xRight[i-1];
356 *xMax = str->xRight[i + len - 1];
366 GString *TextPage::getText(double xMin, double yMin,
367 double xMax, double yMax) {
370 char space[8], eol[16], buf[8];
371 int spaceLen, eolLen, n;
373 double x0, x1, x2, y;
379 if (!(uMap = globalParams->getTextEncoding())) {
382 spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
383 eolLen = 0; // make gcc happy
384 switch (globalParams->getTextEOL()) {
386 eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
389 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
390 eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
393 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
398 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
399 y = 0.5 * (str1->yMin + str1->yMax);
403 if (y > yMin && str1->xMin < xMax && str1->xMax > xMin) {
404 x0 = x1 = x2 = str1->xMin;
405 for (i1 = 0; i1 < str1->len; ++i1) {
406 x0 = (i1==0) ? str1->xMin : str1->xRight[i1-1];
407 x1 = str1->xRight[i1];
408 if (0.5 * (x0 + x1) >= xMin) {
412 for (i2 = str1->len - 1; i2 > i1; --i2) {
413 x1 = (i2==0) ? str1->xMin : str1->xRight[i2-1];
414 x2 = str1->xRight[i2];
415 if (0.5 * (x1 + x2) <= xMax) {
419 if (s->getLength() > 0) {
420 if (x0 < xPrev || str1->yMin > yPrev) {
421 s->append(eol, eolLen);
424 for (i = 0; i < 4; ++i) {
425 s->append(space, spaceLen);
429 for (i = i1; i <= i2; ++i) {
430 n = uMap->mapUnicode(str1->text[i], buf, sizeof(buf));
438 s->append(eol, eolLen);
444 void TextPage::dump(void *outputStream, TextOutputFunc outputFunc) {
446 char space[8], eol[16], eop[8], buf[8];
447 int spaceLen, eolLen, eopLen, n;
448 TextString *str1, *str2, *str3;
450 int col1, col2, d, i;
452 // get the output encoding
453 if (!(uMap = globalParams->getTextEncoding())) {
456 spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
457 eolLen = 0; // make gcc happy
458 switch (globalParams->getTextEOL()) {
460 eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
463 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
464 eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
467 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
470 eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop));
472 // build x-major list
474 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
475 for (str2 = NULL, str3 = xyStrings;
477 str2 = str3, str3 = str3->xyNext) {
478 if (str1->xMin < str3->xMin ||
479 (str1->xMin == str3->xMin && str1->yMin < str3->yMin)) {
491 // do column assignment
492 for (str1 = xyStrings; str1; str1 = str1->xyNext) {
494 for (str2 = xyStrings; str2 != str1; str2 = str2->xyNext) {
495 if (str1->xMin >= str2->xMax) {
496 col2 = str2->col + str2->len + 4;
500 } else if (str1->xMin > str2->xMin) {
502 (int)(((str1->xMin - str2->xMin) / (str2->xMax - str2->xMin)) *
512 #if 0 //~ for debugging
513 fprintf((FILE *)outputStream, "~~~~~~~~~~\n");
514 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
515 fprintf((FILE *)outputStream, "(%4d,%4d) - (%4d,%4d) [%3d] '",
516 (int)str1->xMin, (int)str1->yMin,
517 (int)str1->xMax, (int)str1->yMax, str1->col);
518 for (i = 0; i < str1->len; ++i) {
519 fputc(str1->text[i] & 0xff, stdout);
523 fprintf((FILE *)outputStream, "~~~~~~~~~~\n");
528 yMax = yxStrings ? yxStrings->yMax : 0;
529 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
531 // line this string up with the correct column
532 if (rawOrder && col1 == 0) {
535 for (; col1 < str1->col; ++col1) {
536 (*outputFunc)(outputStream, space, spaceLen);
541 for (i = 0; i < str1->len; ++i) {
542 if ((n = uMap->mapUnicode(str1->text[i], buf, sizeof(buf))) > 0) {
543 (*outputFunc)(outputStream, buf, n);
550 // update yMax for this line
551 if (str1->yMax > yMax) {
555 // if we've hit the end of the line...
556 if (!(str1->yxNext &&
557 !(rawOrder && str1->yxNext->yMax < str1->yMin) &&
558 str1->yxNext->yMin < 0.2*str1->yMin + 0.8*str1->yMax &&
559 str1->yxNext->xMin >= str1->xMax)) {
562 (*outputFunc)(outputStream, eol, eolLen);
564 // print extra vertical space if necessary
567 // find yMin for next line
568 yMin = str1->yxNext->yMin;
569 for (str2 = str1->yxNext; str2; str2 = str2->yxNext) {
570 if (str2->yMin < yMin) {
573 if (!(str2->yxNext && str2->yxNext->yMin < str2->yMax &&
574 str2->yxNext->xMin >= str2->xMax))
579 d = (int)((yMin - yMax) / (str1->yMax - str1->yMin) + 0.5);
580 // various things (weird font matrices) can result in bogus
581 // values here, so do a sanity check
582 if (rawOrder && d > 2) {
584 } else if (!rawOrder && d > 5) {
588 (*outputFunc)(outputStream, eol, eolLen);
592 // set up for next line
594 yMax = str1->yxNext ? str1->yxNext->yMax : 0;
599 (*outputFunc)(outputStream, eol, eolLen);
600 (*outputFunc)(outputStream, eop, eopLen);
601 (*outputFunc)(outputStream, eol, eolLen);
606 void TextPage::clear() {
613 for (p1 = yxStrings; p1; p1 = p2) {
619 yxCur1 = yxCur2 = NULL;
622 //------------------------------------------------------------------------
624 //------------------------------------------------------------------------
626 static void outputToFile(void *stream, char *text, int len) {
627 fwrite(text, 1, len, (FILE *)stream);
630 TextOutputDev::TextOutputDev(char *fileName, GBool rawOrderA, GBool append) {
632 rawOrder = rawOrderA;
638 if (!strcmp(fileName, "-")) {
639 outputStream = stdout;
640 } else if ((outputStream = fopen(fileName, append ? "ab" : "wb"))) {
643 error(-1, "Couldn't open text file '%s'", fileName);
647 outputFunc = &outputToFile;
652 // set up text object
653 text = new TextPage(rawOrder);
656 TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream,
659 outputStream = stream;
661 rawOrder = rawOrderA;
662 text = new TextPage(rawOrder);
666 TextOutputDev::~TextOutputDev() {
669 ICS_MapRefNumAndAssign((short)((FILE *)outputStream)->handle);
671 fclose((FILE *)outputStream);
678 void TextOutputDev::startPage(int pageNum, GfxState *state) {
682 void TextOutputDev::endPage() {
685 text->dump(outputStream, outputFunc);
689 void TextOutputDev::updateFont(GfxState *state) {
690 text->updateFont(state);
693 void TextOutputDev::beginString(GfxState *state, GString *s) {
694 text->beginString(state);
697 void TextOutputDev::endString(GfxState *state) {
701 void TextOutputDev::drawChar(GfxState *state, double x, double y,
702 double dx, double dy,
703 double originX, double originY,
704 CharCode c, Unicode *u, int uLen) {
705 text->addChar(state, x, y, dx, dy, u, uLen);
708 GBool TextOutputDev::findText(Unicode *s, int len,
709 GBool top, GBool bottom,
710 double *xMin, double *yMin,
711 double *xMax, double *yMax) {
712 return text->findText(s, len, top, bottom, xMin, yMin, xMax, yMax);