1 //========================================================================
5 // Copyright 1997-2002 Glyph & Cog, LLC
7 //========================================================================
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
25 #include "GlobalParams.h"
26 #include "UnicodeMap.h"
28 #include "TextOutputDev.h"
31 // needed for setting type/creator of MacOS files
32 #include "ICSupport.h"
35 //------------------------------------------------------------------------
37 //------------------------------------------------------------------------
39 // Minium and maximum inter-word spacing (as a fraction of the average
41 #define wordMinSpaceWidth 0.3
42 #define wordMaxSpaceWidth 2.0
44 // Default min and max inter-word spacing (when the average character
46 #define wordDefMinSpaceWidth 0.2
47 #define wordDefMaxSpaceWidth 1.5
49 // Max difference in x,y coordinates (as a fraction of the font size)
50 // allowed for duplicated text (fake boldface, drop shadows) which is
52 #define dupMaxDeltaX 0.2
53 #define dupMaxDeltaY 0.2
55 // Min overlap (as a fraction of the font size) required for two
56 // lines to be considered vertically overlapping.
57 #define lineOverlapSlack 0.5
59 // Max difference in baseline y coordinates (as a fraction of the font
60 // size) allowed for words which are to be grouped into a line, not
61 // including sub/superscripts.
62 #define lineMaxBaselineDelta 0.1
64 // Max ratio of font sizes allowed for words which are to be grouped
65 // into a line, not including sub/superscripts.
66 #define lineMaxFontSizeRatio 1.4
68 // Min spacing (as a fraction of the font size) allowed between words
69 // which are to be grouped into a line.
70 #define lineMinDeltaX -0.5
72 // Minimum vertical overlap (as a fraction of the font size) required
73 // for superscript and subscript words.
74 #define lineMinSuperscriptOverlap 0.3
75 #define lineMinSubscriptOverlap 0.3
77 // Min/max ratio of font sizes allowed for sub/superscripts compared to
79 #define lineMinSubscriptFontSizeRatio 0.4
80 #define lineMaxSubscriptFontSizeRatio 1.01
81 #define lineMinSuperscriptFontSizeRatio 0.4
82 #define lineMaxSuperscriptFontSizeRatio 1.01
84 // Max horizontal spacing (as a fraction of the font size) allowed
85 // before sub/superscripts.
86 #define lineMaxSubscriptDeltaX 0.2
87 #define lineMaxSuperscriptDeltaX 0.2
89 // Maximum vertical spacing (as a fraction of the font size) allowed
90 // for lines which are to be grouped into a block.
91 #define blkMaxSpacing 2.0
93 // Max ratio of primary font sizes allowed for lines which are to be
94 // grouped into a block.
95 #define blkMaxFontSizeRatio 1.3
97 // Min overlap (as a fraction of the font size) required for two
98 // blocks to be considered vertically overlapping.
99 #define blkOverlapSlack 0.5
101 // Max vertical spacing (as a fraction of the font size) allowed
102 // between blocks which are 'adjacent' when sorted by reading order.
103 #define blkMaxSortSpacing 2.0
105 // Max vertical offset (as a fraction of the font size) of the top and
106 // bottom edges allowed for blocks which are to be grouped into a
108 #define flowMaxDeltaY 1.0
110 //------------------------------------------------------------------------
112 //------------------------------------------------------------------------
114 TextFontInfo::TextFontInfo(GfxState *state) {
116 double t1, t2, avgWidth, w;
119 gfxFont = state->getFont();
120 textMat = state->getTextMat();
121 horizScaling = state->getHorizScaling();
122 if ((t1 = fabs(textMat[0])) > 0.01 &&
123 (t2 = fabs(textMat[3])) > 0.01) {
124 horizScaling *= t1 / t2;
128 minSpaceWidth = horizScaling * wordDefMinSpaceWidth;
129 maxSpaceWidth = horizScaling * wordDefMaxSpaceWidth;
130 } else if (gfxFont->isCIDFont()) {
131 //~ handle 16-bit fonts
132 minSpaceWidth = horizScaling * wordDefMinSpaceWidth;
133 maxSpaceWidth = horizScaling * wordDefMaxSpaceWidth;
137 for (i = 0; i < 256; ++i) {
138 w = ((Gfx8BitFont *)gfxFont)->getWidth(i);
145 minSpaceWidth = horizScaling * wordMinSpaceWidth * avgWidth;
146 maxSpaceWidth = horizScaling * wordMaxSpaceWidth * avgWidth;
151 TextFontInfo::~TextFontInfo() {
154 GBool TextFontInfo::matches(GfxState *state) {
158 textMat = state->getTextMat();
159 h = state->getHorizScaling();
160 if ((t1 = fabs(textMat[0])) > 0.01 &&
161 (t2 = fabs(textMat[3])) > 0.01) {
164 return state->getFont() == gfxFont &&
165 fabs(h - horizScaling) < 0.01;
168 //------------------------------------------------------------------------
170 //------------------------------------------------------------------------
172 TextWord::TextWord(GfxState *state, double x0, double y0,
173 TextFontInfo *fontA, double fontSizeA) {
178 fontSize = fontSizeA;
179 state->transform(x0, y0, &x, &y);
180 if ((gfxFont = font->gfxFont)) {
181 yMin = y - gfxFont->getAscent() * fontSize;
182 yMax = y - gfxFont->getDescent() * fontSize;
184 // this means that the PDF file draws text without a current font,
185 // which should never happen
186 yMin = y - 0.95 * fontSize;
187 yMax = y + 0.35 * fontSize;
190 // this is a sanity check for a case that shouldn't happen -- but
191 // if it does happen, we want to avoid dividing by zero later
205 TextWord::~TextWord() {
210 void TextWord::addChar(GfxState *state, double x, double y,
211 double dx, double dy, Unicode u) {
214 text = (Unicode *)grealloc(text, size * sizeof(Unicode));
215 xRight = (double *)grealloc(xRight, size * sizeof(double));
221 xMax = xRight[len] = x + dx;
225 // Returns true if <this> comes before <word2> in xy order.
226 GBool TextWord::xyBefore(TextWord *word2) {
227 return xMin < word2->xMin ||
228 (xMin == word2->xMin && yMin < word2->yMin);
231 // Merge another word onto the end of this one.
232 void TextWord::merge(TextWord *word2) {
236 if (word2->yMin < yMin) {
239 if (word2->yMax > yMax) {
242 if (len + word2->len > size) {
243 size = len + word2->len;
244 text = (Unicode *)grealloc(text, size * sizeof(Unicode));
245 xRight = (double *)grealloc(xRight, size * sizeof(double));
247 for (i = 0; i < word2->len; ++i) {
248 text[len + i] = word2->text[i];
249 xRight[len + i] = word2->xRight[i];
254 //------------------------------------------------------------------------
256 //------------------------------------------------------------------------
258 TextLine::TextLine() {
270 TextLine::~TextLine() {
273 for (w1 = words; w1; w1 = w2) {
282 // Returns true if <this> comes before <line2> in yx order, allowing
283 // slack for vertically overlapping lines.
284 GBool TextLine::yxBefore(TextLine *line2) {
287 dy = lineOverlapSlack * fontSize;
289 // non-overlapping case
290 if (line2->yMin > yMax - dy ||
291 line2->yMax < yMin + dy) {
292 return yMin < line2->yMin ||
293 (yMin == line2->yMin && xMin < line2->xMin);
297 return xMin < line2->xMin;
300 // Merge another line's words onto the end of this line.
301 void TextLine::merge(TextLine *line2) {
306 if (line2->yMin < yMin) {
309 if (line2->yMax > yMax) {
312 xSpaceR = line2->xSpaceR;
313 for (word = words; word->next; word = word->next) ;
314 word->spaceAfter = gTrue;
315 word->next = line2->words;
317 newLen = len + 1 + line2->len;
318 text = (Unicode *)grealloc(text, newLen * sizeof(Unicode));
319 xRight = (double *)grealloc(xRight, newLen * sizeof(double));
320 text[len] = (Unicode)0x0020;
321 xRight[len] = line2->xMin;
322 for (i = 0; i < line2->len; ++i) {
323 text[len + 1 + i] = line2->text[i];
324 xRight[len + 1 + i] = line2->xRight[i];
327 convertedLen += line2->convertedLen;
328 hyphenated = line2->hyphenated;
331 //------------------------------------------------------------------------
333 //------------------------------------------------------------------------
335 TextBlock::TextBlock() {
340 TextBlock::~TextBlock() {
343 for (l1 = lines; l1; l1 = l2) {
349 // Returns true if <this> comes before <blk2> in xy order, allowing
350 // slack for vertically overlapping blocks.
351 GBool TextBlock::yxBefore(TextBlock *blk2) {
354 dy = blkOverlapSlack * lines->fontSize;
356 // non-overlapping case
357 if (blk2->yMin > yMax - dy ||
358 blk2->yMax < yMin + dy) {
359 return yMin < blk2->yMin ||
360 (yMin == blk2->yMin && xMin < blk2->xMin);
364 return xMin < blk2->xMin;
367 // Merge another block's line onto the right of this one.
368 void TextBlock::mergeRight(TextBlock *blk2) {
369 lines->merge(blk2->lines);
373 xSpaceR = lines->xSpaceR;
376 // Merge another block's lines onto the bottom of this block.
377 void TextBlock::mergeBelow(TextBlock *blk2) {
380 if (blk2->xMin < xMin) {
383 if (blk2->xMax > xMax) {
387 if (blk2->xSpaceL > xSpaceL) {
388 xSpaceL = blk2->xSpaceL;
390 if (blk2->xSpaceR < xSpaceR) {
391 xSpaceR = blk2->xSpaceR;
393 if (blk2->maxFontSize > maxFontSize) {
394 maxFontSize = blk2->maxFontSize;
396 for (line = lines; line->next; line = line->next) ;
397 line->next = line->flowNext = blk2->lines;
401 //------------------------------------------------------------------------
403 //------------------------------------------------------------------------
405 TextFlow::TextFlow() {
410 TextFlow::~TextFlow() {
413 for (b1 = blocks; b1; b1 = b2) {
420 //------------------------------------------------------------------------
422 //------------------------------------------------------------------------
424 TextPage::TextPage(GBool rawOrderA) {
425 rawOrder = rawOrderA;
431 words = wordPtr = NULL;
437 TextPage::~TextPage() {
442 void TextPage::updateFont(GfxState *state) {
446 int code, mCode, letterCode, anyCode;
450 // get the font info object
452 for (i = 0; i < fonts->getLength(); ++i) {
453 font = (TextFontInfo *)fonts->get(i);
454 if (font->matches(state)) {
460 font = new TextFontInfo(state);
464 // adjust the font size
465 gfxFont = state->getFont();
466 fontSize = state->getTransformedFontSize();
467 if (gfxFont && gfxFont->getType() == fontType3) {
468 // This is a hack which makes it possible to deal with some Type 3
469 // fonts. The problem is that it's impossible to know what the
470 // base coordinate system used in the font is without actually
471 // rendering the font. This code tries to guess by looking at the
472 // width of the character 'm' (which breaks if the font is a
473 // subset that doesn't contain 'm').
474 mCode = letterCode = anyCode = -1;
475 for (code = 0; code < 256; ++code) {
476 name = ((Gfx8BitFont *)gfxFont)->getCharName(code);
477 if (name && name[0] == 'm' && name[1] == '\0') {
480 if (letterCode < 0 && name && name[1] == '\0' &&
481 ((name[0] >= 'A' && name[0] <= 'Z') ||
482 (name[0] >= 'a' && name[0] <= 'z'))) {
485 if (anyCode < 0 && name &&
486 ((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) {
491 (w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) {
492 // 0.6 is a generic average 'm' width -- yes, this is a hack
494 } else if (letterCode >= 0 &&
495 (w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) {
496 // even more of a hack: 0.5 is a generic letter width
498 } else if (anyCode >= 0 &&
499 (w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) {
500 // better than nothing: 0.5 is a generic character width
503 fm = gfxFont->getFontMatrix();
505 fontSize *= fabs(fm[3] / fm[0]);
510 void TextPage::beginWord(GfxState *state, double x0, double y0) {
511 // This check is needed because Type 3 characters can contain
512 // text-drawing operations (when TextPage is being used via
513 // XOutputDev rather than TextOutputDev).
519 curWord = new TextWord(state, x0, y0, font, fontSize);
522 void TextPage::addChar(GfxState *state, double x, double y,
523 double dx, double dy,
524 CharCode c, Unicode *u, int uLen) {
525 double x1, y1, w1, h1, dx2, dy2, sp;
528 // if the previous char was a space, addChar will have called
529 // endWord, so we need to start a new word
531 beginWord(state, x, y);
534 // throw away chars that aren't inside the page bounds
535 state->transform(x, y, &x1, &y1);
536 if (x1 < 0 || x1 > pageWidth ||
537 y1 < 0 || y1 > pageHeight) {
541 // subtract char and word spacing from the dx,dy values
542 sp = state->getCharSpace();
543 if (c == (CharCode)0x20) {
544 sp += state->getWordSpace();
546 state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2);
549 state->transformDelta(dx, dy, &w1, &h1);
551 // check the tiny chars limit
552 if (!globalParams->getTextKeepTinyChars() &&
553 fabs(w1) < 3 && fabs(h1) < 3) {
554 if (++nTinyChars > 20000) {
559 // break words at space character
560 if (uLen == 1 && u[0] == (Unicode)0x20) {
565 // large char spacing is sometimes used to move text around -- in
566 // this case, break text into individual chars and let the coalesce
567 // function deal with it later
569 if (n > 0 && x1 - curWord->xRight[n-1] >
570 curWord->font->minSpaceWidth * curWord->fontSize) {
571 // large char spacing is sometimes used to move text around
573 beginWord(state, x, y);
576 // add the characters to the current word
581 for (i = 0; i < uLen; ++i) {
582 curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
586 void TextPage::endWord() {
587 // This check is needed because Type 3 characters can contain
588 // text-drawing operations (when TextPage is being used via
589 // XOutputDev rather than TextOutputDev).
601 void TextPage::addWord(TextWord *word) {
604 // throw away zero-length words -- they don't have valid xMin/xMax
605 // values, and they're useless anyway
606 if (word->len == 0) {
611 // insert word in xy list
616 if (wordPtr && wordPtr->xyBefore(word)) {
623 for (; p2; p1 = p2, p2 = p2->next) {
624 if (word->xyBefore(p2)) {
638 void TextPage::coalesce() {
639 TextWord *word0, *word1, *word2, *word3, *word4;
640 TextLine *line0, *line1, *line2, *line3, *line4, *lineList;
641 TextBlock *blk0, *blk1, *blk2, *blk3, *blk4, *blk5, *blk6;
642 TextBlock *yxBlocks, *blocks, *blkStack;
643 TextFlow *flow0, *flow1;
644 double sz, xLimit, minSpace, maxSpace, yLimit;
650 int col1, col2, d, i, j;
652 #if 0 // for debugging
653 printf("*** initial word list ***\n");
654 for (word0 = words; word0; word0 = word0->next) {
655 printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '",
656 word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase);
657 for (i = 0; i < word0->len; ++i) {
658 fputc(word0->text[i] & 0xff, stdout);
666 //----- discard duplicated text (fake boldface, drop shadows)
670 sz = word0->fontSize;
671 xLimit = word0->xMin + sz * dupMaxDeltaX;
673 for (word1 = word0, word2 = word0->next;
674 word2 && word2->xMin < xLimit;
675 word1 = word2, word2 = word2->next) {
676 if (word2->len == word0->len &&
677 !memcmp(word2->text, word0->text, word0->len * sizeof(Unicode)) &&
678 fabs(word2->yMin - word0->yMin) < sz * dupMaxDeltaY &&
679 fabs(word2->yMax - word0->yMax) < sz * dupMaxDeltaY &&
680 fabs(word2->xMax - word0->xMax) < sz * dupMaxDeltaX) {
686 word1->next = word2->next;
693 #if 0 // for debugging
694 printf("*** words after removing duplicate text ***\n");
695 for (word0 = words; word0; word0 = word0->next) {
696 printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '",
697 word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase);
698 for (i = 0; i < word0->len; ++i) {
699 fputc(word0->text[i] & 0xff, stdout);
711 sz = word0->fontSize;
713 // look for adjacent text which is part of the same word, and
714 // merge it into this word
715 xLimit = word0->xMax + sz * word0->font->minSpaceWidth;
720 word2->xMin < xLimit &&
721 word2->font == word0->font &&
722 fabs(word2->fontSize - sz) < 0.05 &&
723 fabs(word2->yBase - word0->yBase) < 0.05;
726 for (word1 = word0, word2 = word0->next;
727 word2 && word2->xMin < xLimit;
728 word1 = word2, word2 = word2->next) {
729 if (word2->font == word0->font &&
730 fabs(word2->fontSize - sz) < 0.05 &&
731 fabs(word2->yBase - word0->yBase) < 0.05) {
739 word1->next = word2->next;
747 #if 0 // for debugging
748 printf("*** after merging words ***\n");
749 for (word0 = words; word0; word0 = word0->next) {
750 printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '",
751 word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase);
752 for (i = 0; i < word0->len; ++i) {
753 fputc(word0->text[i] & 0xff, stdout);
761 //----- assemble words into lines
763 uMap = globalParams->getTextEncoding();
764 isUnicode = uMap ? uMap->isUnicode() : gFalse;
770 // build a new line object
774 line1 = new TextLine();
775 line1->words = word0;
776 line1->xMin = word0->xMin;
777 line1->xMax = word0->xMax;
778 line1->yMin = word0->yMin;
779 line1->yMax = word0->yMax;
780 line1->yBase = word0->yBase;
781 line1->font = word0->font;
782 line1->fontSize = word0->fontSize;
783 line1->len = word0->len;
784 minSpace = line1->fontSize * word0->font->minSpaceWidth;
785 maxSpace = line1->fontSize * word0->font->maxSpaceWidth;
787 // find subsequent words in the line
789 xLimit = line1->xMax + maxSpace;
791 word3 = word4 = NULL;
794 words->xMin < xLimit &&
795 ((fit1 = lineFit(line1, word0, words)) >= 0)) {
800 for (word1 = NULL, word2 = words;
801 word2 && word2->xMin < xLimit;
802 word1 = word2, word2 = word2->next) {
803 fit2 = lineFit(line1, word0, word2);
804 if (fit2 >= 0 && (!word4 ||
805 (word4 && fit2 < fit1))) {
814 word3->next = word4->next;
820 if (word4->xMax > line1->xMax) {
821 line1->xMax = word4->xMax;
823 if (word4->yMin < line1->yMin) {
824 line1->yMin = word4->yMin;
826 if (word4->yMax > line1->yMax) {
827 line1->yMax = word4->yMax;
829 line1->len += word4->len;
830 if (fit1 > minSpace) {
831 word0->spaceAfter = gTrue;
840 // build the line text
841 line1->text = (Unicode *)gmalloc(line1->len * sizeof(Unicode));
842 line1->xRight = (double *)gmalloc(line1->len * sizeof(double));
843 line1->col = (int *)gmalloc(line1->len * sizeof(int));
845 for (word1 = line1->words; word1; word1 = word1->next) {
846 for (j = 0; j < word1->len; ++j) {
847 line1->text[i] = word1->text[j];
848 line1->xRight[i] = word1->xRight[j];
851 if (word1->spaceAfter && word1->next) {
852 line1->text[i] = (Unicode)0x0020;
853 line1->xRight[i] = word1->next->xMin;
857 line1->convertedLen = 0;
858 for (j = 0; j < line1->len; ++j) {
859 line1->col[j] = line1->convertedLen;
861 ++line1->convertedLen;
863 line1->convertedLen +=
864 uMap->mapUnicode(line1->text[j], buf, sizeof(buf));
868 // check for hyphen at end of line
869 //~ need to check for other chars used as hyphens
870 if (line1->text[line1->len - 1] == (Unicode)'-') {
871 line1->hyphenated = gTrue;
874 // insert line on list
887 #if 0 // for debugging
888 printf("*** lines in xy order ***\n");
889 for (line0 = lineList; line0; line0 = line0->next) {
890 printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n",
891 line0->xMin, line0->xMax, line0->yMin, line0->yMax,
892 line0->yBase, line0->len);
893 for (word0 = line0->words; word0; word0 = word0->next) {
894 printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '",
895 word0->xMin, word0->xMax, word0->yMin, word0->yMax,
896 word0->yBase, word0->fontSize, word0->spaceAfter);
897 for (i = 0; i < word0->len; ++i) {
898 fputc(word0->text[i] & 0xff, stdout);
907 //----- column assignment
909 for (line1 = lineList; line1; line1 = line1->next) {
911 for (line2 = lineList; line2 != line1; line2 = line2->next) {
912 if (line1->xMin >= line2->xMax) {
913 d = (int)((line1->xMin - line2->xMax) /
914 (line1->font->maxSpaceWidth * line1->fontSize));
918 col2 = line2->col[0] + line2->convertedLen + d;
922 } else if (line1->xMin > line2->xMin) {
923 for (i = 0; i < line2->len && line1->xMin >= line2->xRight[i]; ++i) ;
924 col2 = line2->col[i];
930 for (j = 0; j < line1->len; ++j) {
931 line1->col[j] += col1;
935 //----- assemble lines into blocks
940 for (line1 = lines; line1; line1 = line1->next) {
942 line1->xSpaceR = pageWidth;
947 // sort lines into yx order
951 lineList = lineList->next;
952 for (line1 = NULL, line2 = lines;
953 line2 && !line0->yxBefore(line2);
954 line1 = line2, line2 = line2->next) ;
963 // compute whitespace to left and right of each line
965 for (line1 = lines; line1; line1 = line1->next) {
967 // find the first vertically overlapping line
968 for (; line0 && line0->yMax < line1->yMin; line0 = line0->next) ;
970 // check each vertically overlapping line -- look for the nearest
973 line1->xSpaceR = pageWidth;
975 line2 && line2->yMin < line1->yMax;
976 line2 = line2->next) {
977 if (line2->yMax > line1->yMin) {
978 if (line2->xMax < line1->xMin) {
979 if (line2->xMax > line1->xSpaceL) {
980 line1->xSpaceL = line2->xMax;
982 } else if (line2->xMin > line1->xMax) {
983 if (line2->xMin < line1->xSpaceR) {
984 line1->xSpaceR = line2->xMin;
992 #if 0 // for debugging
993 printf("*** lines in yx order ***\n");
994 for (line0 = lines; line0; line0 = line0->next) {
995 printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f xSpaceL=%.2f xSpaceR=%.2f len=%d]\n",
996 line0->xMin, line0->xMax, line0->yMin, line0->yMax,
997 line0->yBase, line0->xSpaceL, line0->xSpaceR, line0->len);
998 for (word0 = line0->words; word0; word0 = word0->next) {
999 printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '",
1000 word0->xMin, word0->xMax, word0->yMin, word0->yMax,
1001 word0->yBase, word0->fontSize, word0->spaceAfter);
1002 for (i = 0; i < word0->len; ++i) {
1003 fputc(word0->text[i] & 0xff, stdout);
1017 // build a new block object
1019 lineList = lineList->next;
1021 blk1 = new TextBlock();
1022 blk1->lines = line0;
1023 blk1->xMin = line0->xMin;
1024 blk1->xMax = line0->xMax;
1025 blk1->yMin = line0->yMin;
1026 blk1->yMax = line0->yMax;
1027 blk1->xSpaceL = line0->xSpaceL;
1028 blk1->xSpaceR = line0->xSpaceR;
1029 blk1->maxFontSize = line0->fontSize;
1031 // find subsequent lines in the block
1034 // look for the first horizontally overlapping line below this
1036 yLimit = line0->yMax + blkMaxSpacing * line0->fontSize;
1037 line3 = line4 = NULL;
1039 if (lineList->yMin < yLimit &&
1040 lineList->xMax > blk1->xMin &&
1041 lineList->xMin < blk1->xMax) {
1046 for (line1 = NULL, line2 = lineList;
1047 line2 && line2->yMin < yLimit;
1048 line1 = line2, line2 = line2->next) {
1049 if (line2->xMax > blk1->xMin &&
1050 line2->xMin < blk1->xMax) {
1058 // if there is an overlapping line and it fits in the block, add
1060 if (line4 && blockFit(blk1, line4)) {
1062 line3->next = line4->next;
1064 lineList = line4->next;
1066 line0->next = line0->flowNext = line4;
1068 if (line4->xMin < blk1->xMin) {
1069 blk1->xMin = line4->xMin;
1070 } else if (line4->xMax > blk1->xMax) {
1071 blk1->xMax = line4->xMax;
1073 if (line4->yMax > blk1->yMax) {
1074 blk1->yMax = line4->yMax;
1076 if (line4->xSpaceL > blk1->xSpaceL) {
1077 blk1->xSpaceL = line4->xSpaceL;
1079 if (line4->xSpaceR < blk1->xSpaceR) {
1080 blk1->xSpaceR = line4->xSpaceR;
1082 if (line4->fontSize > blk1->maxFontSize) {
1083 blk1->maxFontSize = line4->fontSize;
1087 // otherwise, we're done with this block
1093 // insert block on list, in yx order
1099 for (blk2 = NULL, blk3 = yxBlocks;
1100 blk3 && !blk1->yxBefore(blk3);
1101 blk2 = blk3, blk3 = blk3->next) ;
1111 #if 0 // for debugging
1112 printf("*** blocks in yx order ***\n");
1113 for (blk0 = yxBlocks; blk0; blk0 = blk0->next) {
1114 printf("[block: x=%.2f..%.2f y=%.2f..%.2f]\n",
1115 blk0->xMin, blk0->xMax, blk0->yMin, blk0->yMax);
1116 for (line0 = blk0->lines; line0; line0 = line0->next) {
1117 printf(" [line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n",
1118 line0->xMin, line0->xMax, line0->yMin, line0->yMax,
1119 line0->yBase, line0->len);
1120 for (word0 = line0->words; word0; word0 = word0->next) {
1121 printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '",
1122 word0->xMin, word0->xMax, word0->yMin, word0->yMax,
1123 word0->yBase, word0->spaceAfter);
1124 for (i = 0; i < word0->len; ++i) {
1125 fputc(word0->text[i] & 0xff, stdout);
1135 //----- merge lines and blocks, sort blocks into reading order
1146 // find the next two blocks:
1147 // - if the depth-first traversal stack is empty, take the first
1148 // (upper-left-most) two blocks on the yx-sorted block list
1149 // - otherwise, find the two upper-left-most blocks under the top
1150 // block on the stack
1152 blk3 = blk4 = blk5 = blk6 = NULL;
1153 for (blk1 = NULL, blk2 = yxBlocks;
1155 blk1 = blk2, blk2 = blk2->next) {
1156 if (blk2->yMin > blkStack->yMin &&
1157 blk2->xMax > blkStack->xMin &&
1158 blk2->xMin < blkStack->xMax) {
1159 if (!blk4 || blk2->yxBefore(blk4)) {
1164 } else if (!blk6 || blk2->yxBefore(blk6)) {
1174 blk6 = yxBlocks->next;
1179 // | blkStack | | blkStack
1180 // +---------------------+ --> +--------------
1181 // +------+ +------+ +-----------+
1182 // | blk4 | | blk6 | ... | blk4+blk6 |
1183 // +------+ +------+ +-----------+
1185 yLimit = blkStack->yMax + blkMaxSpacing * blkStack->lines->fontSize;
1187 if (blkStack && blk4 && blk6 &&
1188 !blk4->lines->next && !blk6->lines->next &&
1189 lineFit2(blk4->lines, blk6->lines) &&
1190 blk4->yMin < yLimit &&
1191 blk4->xMin > blkStack->xSpaceL &&
1192 blkStack->xMin > blk4->xSpaceL &&
1193 blk6->xMax < blkStack->xSpaceR) {
1194 blk4->mergeRight(blk6);
1196 blk5->next = blk6->next;
1198 yxBlocks = blk6->next;
1205 // +---------------------+ --> | blkStack+blk2 |
1206 // +---------------------+ | |
1209 } else if (blkStack && blk4 &&
1210 blk4->yMin < yLimit &&
1211 blockFit2(blkStack, blk4)) {
1212 blkStack->mergeBelow(blk4);
1214 blk3->next = blk4->next;
1216 yxBlocks = blk4->next;
1221 // 1. no block found
1222 // 2. non-fully overlapping block found
1223 // 3. large vertical gap above the overlapping block
1224 // then pop the stack and try again
1226 (blkStack && (blk4->xMin < blkStack->xSpaceL ||
1227 blk4->xMax > blkStack->xSpaceR ||
1228 blk4->yMin - blkStack->yMax >
1229 blkMaxSortSpacing * blkStack->maxFontSize))) {
1230 blkStack = blkStack->stackNext;
1232 // add a block to the sorted list
1235 // remove the block from the yx-sorted list
1237 blk3->next = blk4->next;
1239 yxBlocks = blk4->next;
1243 // append the block to the reading-order list
1251 // push the block on the traversal stack
1252 blk4->stackNext = blkStack;
1258 #if 0 // for debugging
1259 printf("*** blocks in reading order (after merging) ***\n");
1260 for (blk0 = blocks; blk0; blk0 = blk0->next) {
1261 printf("[block: x=%.2f..%.2f y=%.2f..%.2f]\n",
1262 blk0->xMin, blk0->xMax, blk0->yMin, blk0->yMax);
1263 for (line0 = blk0->lines; line0; line0 = line0->next) {
1264 printf(" [line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n",
1265 line0->xMin, line0->xMax, line0->yMin, line0->yMax,
1266 line0->yBase, line0->len);
1267 for (word0 = line0->words; word0; word0 = word0->next) {
1268 printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '",
1269 word0->xMin, word0->xMax, word0->yMin, word0->yMax,
1270 word0->yBase, word0->spaceAfter);
1271 for (i = 0; i < word0->len; ++i) {
1272 fputc(word0->text[i] & 0xff, stdout);
1282 //----- assemble blocks into flows
1286 // one flow per block
1289 flow1 = new TextFlow();
1290 flow1->blocks = blocks;
1291 flow1->lines = blocks->lines;
1292 flow1->yMin = blocks->yMin;
1293 flow1->yMax = blocks->yMax;
1294 blocks = blocks->next;
1295 flow1->blocks->next = NULL;
1297 flow0->next = flow1;
1306 // compute whitespace above and below each block
1307 for (blk0 = blocks; blk0; blk0 = blk0->next) {
1309 blk0->ySpaceB = pageHeight;
1311 // check each horizontally overlapping block
1312 for (blk1 = blocks; blk1; blk1 = blk1->next) {
1314 blk1->xMin < blk0->xMax &&
1315 blk1->xMax > blk0->xMin) {
1316 if (blk1->yMax < blk0->yMin) {
1317 if (blk1->yMax > blk0->ySpaceT) {
1318 blk0->ySpaceT = blk1->yMax;
1320 } else if (blk1->yMin > blk0->yMax) {
1321 if (blk1->yMin < blk0->ySpaceB) {
1322 blk0->ySpaceB = blk1->yMin;
1332 // build a new flow object
1333 flow1 = new TextFlow();
1334 flow1->blocks = blocks;
1335 flow1->lines = blocks->lines;
1336 flow1->yMin = blocks->yMin;
1337 flow1->yMax = blocks->yMax;
1338 flow1->ySpaceT = blocks->ySpaceT;
1339 flow1->ySpaceB = blocks->ySpaceB;
1341 // find subsequent blocks in the flow
1342 for (blk1 = blocks, blk2 = blocks->next;
1343 blk2 && flowFit(flow1, blk2);
1344 blk1 = blk2, blk2 = blk2->next) {
1345 if (blk2->yMin < flow1->yMin) {
1346 flow1->yMin = blk2->yMin;
1348 if (blk2->yMax > flow1->yMax) {
1349 flow1->yMax = blk2->yMax;
1351 if (blk2->ySpaceT > flow1->ySpaceT) {
1352 flow1->ySpaceT = blk2->ySpaceT;
1354 if (blk2->ySpaceB < flow1->ySpaceB) {
1355 flow1->ySpaceB = blk2->ySpaceB;
1357 for (line1 = blk1->lines; line1->next; line1 = line1->next) ;
1358 line1->flowNext = blk2->lines;
1361 // chop the block list
1362 blocks = blk1->next;
1365 // append the flow to the list
1367 flow0->next = flow1;
1375 #if 0 // for debugging
1376 printf("*** flows ***\n");
1377 for (flow0 = flows; flow0; flow0 = flow0->next) {
1379 for (blk0 = flow0->blocks; blk0; blk0 = blk0->next) {
1380 printf(" [block: x=%.2f..%.2f y=%.2f..%.2f ySpaceT=%.2f ySpaceB=%.2f]\n",
1381 blk0->xMin, blk0->xMax, blk0->yMin, blk0->yMax,
1382 blk0->ySpaceT, blk0->ySpaceB);
1383 for (line0 = blk0->lines; line0; line0 = line0->next) {
1384 printf(" [line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n",
1385 line0->xMin, line0->xMax, line0->yMin, line0->yMax,
1386 line0->yBase, line0->len);
1387 for (word0 = line0->words; word0; word0 = word0->next) {
1388 printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '",
1389 word0->xMin, word0->xMax, word0->yMin, word0->yMax,
1390 word0->yBase, word0->spaceAfter);
1391 for (i = 0; i < word0->len; ++i) {
1392 fputc(word0->text[i] & 0xff, stdout);
1403 //----- sort lines into yx order
1405 // (the block/line merging process doesn't maintain the full-page
1406 // linked list of lines)
1411 for (flow0 = flows; flow0; flow0 = flow0->next) {
1412 for (line1 = flow0->lines; line1; line1 = line1->flowNext) {
1414 line0->pageNext = line1;
1422 for (flow0 = flows; flow0; flow0 = flow0->next) {
1423 for (line0 = flow0->lines; line0; line0 = line0->flowNext) {
1424 for (line1 = NULL, line2 = lines;
1425 line2 && !line0->yxBefore(line2);
1426 line1 = line2, line2 = line2->pageNext) ;
1428 line1->pageNext = line0;
1432 line0->pageNext = line2;
1437 #if 0 // for debugging
1438 printf("*** lines in yx order ***\n");
1439 for (line0 = lines; line0; line0 = line0->pageNext) {
1440 printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f xSpaceL=%.2f xSpaceR=%.2f col=%d len=%d]\n",
1441 line0->xMin, line0->xMax, line0->yMin, line0->yMax,
1442 line0->yBase, line0->xSpaceL, line0->xSpaceR, line0->col[0],
1444 for (word0 = line0->words; word0; word0 = word0->next) {
1445 printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '",
1446 word0->xMin, word0->xMax, word0->yMin, word0->yMax,
1447 word0->yBase, word0->spaceAfter);
1448 for (i = 0; i < word0->len; ++i) {
1449 fputc(word0->text[i] & 0xff, stdout);
1459 // Returns a non-negative number if <word> can be added to <line>
1460 // (whose last word is <lastWord>). A smaller return value indicates
1461 // a better fit. If <word> cannot be added to <line> at all, returns
1462 // a negative number.
1463 double TextPage::lineFit(TextLine *line, TextWord *lastWord, TextWord *word) {
1464 double fontSize0, fontSize1;
1467 fontSize0 = line->fontSize;
1468 fontSize1 = word->fontSize;
1469 dx = word->xMin - lastWord->xMax;
1470 dxLimit = fontSize0 * line->font->maxSpaceWidth;
1472 // check inter-word spacing
1473 if (dx < fontSize0 * lineMinDeltaX ||
1478 // ensure a non-negative return value
1483 // look for adjacent words with close baselines and close font sizes
1484 if (fabs(line->yBase - word->yBase) < lineMaxBaselineDelta * fontSize0 &&
1485 fontSize0 < lineMaxFontSizeRatio * fontSize1 &&
1486 fontSize1 < lineMaxFontSizeRatio * fontSize0) {
1490 // look for a superscript
1491 if (fontSize1 > lineMinSuperscriptFontSizeRatio * fontSize0 &&
1492 fontSize1 < lineMaxSuperscriptFontSizeRatio * fontSize0 &&
1493 (word->yMax < lastWord->yMax ||
1494 word->yBase < lastWord->yBase) &&
1495 word->yMax - lastWord->yMin > lineMinSuperscriptOverlap * fontSize0 &&
1496 dx < fontSize0 * lineMaxSuperscriptDeltaX) {
1500 // look for a subscript
1501 if (fontSize1 > lineMinSubscriptFontSizeRatio * fontSize0 &&
1502 fontSize1 < lineMaxSubscriptFontSizeRatio * fontSize0 &&
1503 (word->yMin > lastWord->yMin ||
1504 word->yBase > lastWord->yBase) &&
1505 line->yMax - word->yMin > lineMinSubscriptOverlap * fontSize0 &&
1506 dx < fontSize0 * lineMaxSubscriptDeltaX) {
1513 // Returns true if <line0> and <line1> can be merged into a single
1514 // line, ignoring max word spacing.
1515 GBool TextPage::lineFit2(TextLine *line0, TextLine *line1) {
1516 double fontSize0, fontSize1;
1519 fontSize0 = line0->fontSize;
1520 fontSize1 = line1->fontSize;
1521 dx = line1->xMin - line0->xMax;
1523 // check inter-word spacing
1524 if (dx < fontSize0 * lineMinDeltaX) {
1528 // look for close baselines and close font sizes
1529 if (fabs(line0->yBase - line1->yBase) < lineMaxBaselineDelta * fontSize0 &&
1530 fontSize0 < lineMaxFontSizeRatio * fontSize1 &&
1531 fontSize1 < lineMaxFontSizeRatio * fontSize0) {
1538 // Returns true if <line> can be added to <blk>. Assumes the y
1539 // coordinates are within range.
1540 GBool TextPage::blockFit(TextBlock *blk, TextLine *line) {
1541 double fontSize0, fontSize1;
1544 if (line->xMin < blk->xSpaceL ||
1545 line->xMax > blk->xSpaceR ||
1546 blk->xMin < line->xSpaceL ||
1547 blk->xMax > line->xSpaceR) {
1552 fontSize0 = blk->lines->fontSize;
1553 fontSize1 = line->fontSize;
1554 if (fontSize0 > blkMaxFontSizeRatio * fontSize1 ||
1555 fontSize1 > blkMaxFontSizeRatio * fontSize0) {
1562 // Returns true if <blk0> and <blk1> can be merged into a single
1563 // block. Assumes the y coordinates are within range.
1564 GBool TextPage::blockFit2(TextBlock *blk0, TextBlock *blk1) {
1565 double fontSize0, fontSize1;
1568 if (blk1->xMin < blk0->xSpaceL ||
1569 blk1->xMax > blk0->xSpaceR ||
1570 blk0->xMin < blk1->xSpaceL ||
1571 blk0->xMax > blk1->xSpaceR) {
1576 fontSize0 = blk0->lines->fontSize;
1577 fontSize1 = blk1->lines->fontSize;
1578 if (fontSize0 > blkMaxFontSizeRatio * fontSize1 ||
1579 fontSize1 > blkMaxFontSizeRatio * fontSize0) {
1586 // Returns true if <blk> can be added to <flow>.
1587 GBool TextPage::flowFit(TextFlow *flow, TextBlock *blk) {
1590 // check whitespace above and below
1591 if (blk->yMin < flow->ySpaceT ||
1592 blk->yMax > flow->ySpaceB ||
1593 flow->yMin < blk->ySpaceT ||
1594 flow->yMax > blk->ySpaceB) {
1598 // check that block top edge is within +/- dy of flow top edge,
1599 // and that block bottom edge is above flow bottom edge + dy
1600 dy = flowMaxDeltaY * flow->blocks->maxFontSize;
1601 return blk->yMin > flow->yMin - dy &&
1602 blk->yMin < flow->yMin + dy &&
1603 blk->yMax < flow->yMax + dy;
1607 GBool TextPage::findText(Unicode *s, int len,
1608 GBool top, GBool bottom,
1609 double *xMin, double *yMin,
1610 double *xMax, double *yMax) {
1617 // scan all text on the page
1618 for (line = lines; line; line = line->pageNext) {
1620 // check: above top limit?
1621 if (!top && (line->yMax < *yMin ||
1622 (line->yMin < *yMin && line->xMax <= *xMin))) {
1626 // check: below bottom limit?
1627 if (!bottom && (line->yMin > *yMax ||
1628 (line->yMax > *yMax && line->xMin >= *xMax))) {
1632 // search each position in this line
1634 for (i = 0, p = line->text; i <= m - len; ++i, ++p) {
1636 x0 = (i == 0) ? line->xMin : line->xRight[i-1];
1637 x1 = line->xRight[i];
1638 x = 0.5 * (x0 + x1);
1640 // check: above top limit?
1641 if (!top && line->yMin < *yMin) {
1647 // check: below bottom limit?
1648 if (!bottom && line->yMax > *yMax) {
1654 // compare the strings
1655 for (j = 0; j < len; ++j) {
1656 #if 1 //~ this lowercases Latin A-Z only -- this will eventually be
1657 //~ extended to handle other character sets
1658 if (p[j] >= 0x41 && p[j] <= 0x5a) {
1663 if (s[j] >= 0x41 && s[j] <= 0x5a) {
1677 *xMax = line->xRight[i + len - 1];
1688 GString *TextPage::getText(double xMin, double yMin,
1689 double xMax, double yMax) {
1693 char space[8], eol[16], buf[8];
1694 int spaceLen, eolLen, len;
1695 TextLine *line, *prevLine;
1697 int firstCol, col, i;
1702 // get the output encoding
1703 if (!(uMap = globalParams->getTextEncoding())) {
1706 isUnicode = uMap->isUnicode();
1707 spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
1708 eolLen = 0; // make gcc happy
1709 switch (globalParams->getTextEOL()) {
1711 eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
1714 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
1715 eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
1718 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
1722 // find the leftmost column
1724 for (line = lines; line; line = line->pageNext) {
1725 if (line->yMin > yMax) {
1728 if (line->yMax < yMin ||
1729 line->xMax < xMin ||
1730 line->xMin > xMax) {
1734 y = 0.5 * (line->yMin + line->yMax);
1735 if (y < yMin || y > yMax) {
1741 x0 = (i==0) ? line->xMin : line->xRight[i-1];
1742 x1 = line->xRight[i];
1743 if (0.5 * (x0 + x1) > xMin) {
1750 if (firstCol < 0 || col < firstCol) {
1758 for (prevLine = NULL, line = lines;
1760 prevLine = line, line = line->pageNext) {
1761 if (line->yMin > yMax) {
1764 if (line->yMax < yMin ||
1765 line->xMax < xMin ||
1766 line->xMin > xMax) {
1770 y = 0.5 * (line->yMin + line->yMax);
1771 if (y < yMin || y > yMax) {
1777 x0 = (i==0) ? line->xMin : line->xRight[i-1];
1778 x1 = line->xRight[i];
1779 if (0.5 * (x0 + x1) > xMin) {
1786 if (col > line->col[i] ||
1789 prevLine->yMax - lineOverlapSlack * prevLine->fontSize)) {
1790 s->append(eol, eolLen);
1795 // line this block up with the correct column
1796 for (; col < line->col[i]; ++col) {
1797 s->append(space, spaceLen);
1800 // print the portion of the line
1801 for (; i < line->len; ++i) {
1803 x0 = (i==0) ? line->xMin : line->xRight[i-1];
1804 x1 = line->xRight[i];
1805 if (0.5 * (x0 + x1) > xMax) {
1809 len = uMap->mapUnicode(line->text[i], buf, sizeof(buf));
1810 s->append(buf, len);
1811 col += isUnicode ? 1 : len;
1816 s->append(eol, eolLen);
1824 void TextPage::dump(void *outputStream, TextOutputFunc outputFunc,
1827 char space[8], eol[16], eop[8], buf[8];
1828 int spaceLen, eolLen, eopLen, len;
1833 // get the output encoding
1834 if (!(uMap = globalParams->getTextEncoding())) {
1837 spaceLen = uMap->mapUnicode(0x20, space, sizeof(space));
1838 eolLen = 0; // make gcc happy
1839 switch (globalParams->getTextEOL()) {
1841 eolLen = uMap->mapUnicode(0x0a, eol, sizeof(eol));
1844 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
1845 eolLen += uMap->mapUnicode(0x0a, eol + eolLen, sizeof(eol) - eolLen);
1848 eolLen = uMap->mapUnicode(0x0d, eol, sizeof(eol));
1851 eopLen = uMap->mapUnicode(0x0c, eop, sizeof(eop));
1853 // output the page, maintaining the original physical layout
1854 if (physLayout || rawOrder) {
1856 for (line = lines; line; line = line->pageNext) {
1858 // line this block up with the correct column
1860 for (; col < line->col[0]; ++col) {
1861 (*outputFunc)(outputStream, space, spaceLen);
1866 for (i = 0; i < line->len; ++i) {
1867 len = uMap->mapUnicode(line->text[i], buf, sizeof(buf));
1868 (*outputFunc)(outputStream, buf, len);
1870 col += line->convertedLen;
1872 // print one or more returns if necessary
1873 if (!line->pageNext ||
1874 line->pageNext->col[0] < col ||
1875 line->pageNext->yMin >
1876 line->yMax - lineOverlapSlack * line->fontSize) {
1878 // compute number of returns
1880 if (line->pageNext) {
1881 d += (int)((line->pageNext->yMin - line->yMax) /
1882 line->fontSize + 0.5);
1885 // various things (weird font matrices) can result in bogus
1886 // values here, so do a sanity check
1892 for (; d > 0; --d) {
1893 (*outputFunc)(outputStream, eol, eolLen);
1900 // output the page, "undoing" the layout
1902 for (flow = flows; flow; flow = flow->next) {
1903 for (line = flow->lines; line; line = line->flowNext) {
1905 if (line->flowNext && line->hyphenated) {
1908 for (i = 0; i < n; ++i) {
1909 len = uMap->mapUnicode(line->text[i], buf, sizeof(buf));
1910 (*outputFunc)(outputStream, buf, len);
1912 if (line->flowNext && !line->hyphenated) {
1913 (*outputFunc)(outputStream, space, spaceLen);
1916 (*outputFunc)(outputStream, eol, eolLen);
1917 (*outputFunc)(outputStream, eol, eolLen);
1922 (*outputFunc)(outputStream, eop, eopLen);
1923 (*outputFunc)(outputStream, eol, eolLen);
1928 void TextPage::startPage(GfxState *state) {
1930 pageWidth = state->getPageWidth();
1931 pageHeight = state->getPageHeight();
1934 void TextPage::clear() {
1943 for (w1 = words; w1; w1 = w2) {
1948 for (f1 = flows; f1; f1 = f2) {
1953 deleteGList(fonts, TextFontInfo);
1960 words = wordPtr = NULL;
1963 fonts = new GList();
1968 //------------------------------------------------------------------------
1970 //------------------------------------------------------------------------
1972 static void outputToFile(void *stream, char *text, int len) {
1973 fwrite(text, 1, len, (FILE *)stream);
1976 TextOutputDev::TextOutputDev(char *fileName, GBool physLayoutA,
1977 GBool rawOrderA, GBool append) {
1979 physLayout = physLayoutA;
1980 rawOrder = rawOrderA;
1986 if (!strcmp(fileName, "-")) {
1987 outputStream = stdout;
1988 } else if ((outputStream = fopen(fileName, append ? "ab" : "wb"))) {
1991 error(-1, "Couldn't open text file '%s'", fileName);
1995 outputFunc = &outputToFile;
1997 outputStream = NULL;
2000 // set up text object
2001 text = new TextPage(rawOrderA);
2004 TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream,
2005 GBool physLayoutA, GBool rawOrderA) {
2007 outputStream = stream;
2009 physLayout = physLayoutA;
2010 rawOrder = rawOrderA;
2011 text = new TextPage(rawOrderA);
2015 TextOutputDev::~TextOutputDev() {
2018 ICS_MapRefNumAndAssign((short)((FILE *)outputStream)->handle);
2020 fclose((FILE *)outputStream);
2027 void TextOutputDev::startPage(int pageNum, GfxState *state) {
2028 text->startPage(state);
2031 void TextOutputDev::endPage() {
2034 text->dump(outputStream, outputFunc, physLayout);
2038 void TextOutputDev::updateFont(GfxState *state) {
2039 text->updateFont(state);
2042 void TextOutputDev::beginString(GfxState *state, GString *s) {
2043 text->beginWord(state, state->getCurX(), state->getCurY());
2046 void TextOutputDev::endString(GfxState *state) {
2050 void TextOutputDev::drawChar(GfxState *state, double x, double y,
2051 double dx, double dy,
2052 double originX, double originY,
2053 CharCode c, Unicode *u, int uLen) {
2054 text->addChar(state, x, y, dx, dy, c, u, uLen);
2057 GBool TextOutputDev::findText(Unicode *s, int len,
2058 GBool top, GBool bottom,
2059 double *xMin, double *yMin,
2060 double *xMax, double *yMax) {
2061 return text->findText(s, len, top, bottom, xMin, yMin, xMax, yMax);
2064 GString *TextOutputDev::getText(double xMin, double yMin,
2065 double xMax, double yMax) {
2066 return text->getText(xMin, yMin, xMax, yMax);