+ font = NULL;
+ }
+ if (!font) {
+ font = new TextFontInfo(state);
+ fonts->append(font);
+ }
+
+ // adjust the font size
+ gfxFont = state->getFont();
+ fontSize = state->getTransformedFontSize();
+ if (gfxFont && gfxFont->getType() == fontType3) {
+ // This is a hack which makes it possible to deal with some Type 3
+ // fonts. The problem is that it's impossible to know what the
+ // base coordinate system used in the font is without actually
+ // rendering the font. This code tries to guess by looking at the
+ // width of the character 'm' (which breaks if the font is a
+ // subset that doesn't contain 'm').
+ mCode = letterCode = anyCode = -1;
+ for (code = 0; code < 256; ++code) {
+ name = ((Gfx8BitFont *)gfxFont)->getCharName(code);
+ if (name && name[0] == 'm' && name[1] == '\0') {
+ mCode = code;
+ }
+ if (letterCode < 0 && name && name[1] == '\0' &&
+ ((name[0] >= 'A' && name[0] <= 'Z') ||
+ (name[0] >= 'a' && name[0] <= 'z'))) {
+ letterCode = code;
+ }
+ if (anyCode < 0 && name &&
+ ((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) {
+ anyCode = code;
+ }
+ }
+ if (mCode >= 0 &&
+ (w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) {
+ // 0.6 is a generic average 'm' width -- yes, this is a hack
+ fontSize *= w / 0.6;
+ } else if (letterCode >= 0 &&
+ (w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) {
+ // even more of a hack: 0.5 is a generic letter width
+ fontSize *= w / 0.5;
+ } else if (anyCode >= 0 &&
+ (w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) {
+ // better than nothing: 0.5 is a generic character width
+ fontSize *= w / 0.5;
+ }
+ fm = gfxFont->getFontMatrix();
+ if (fm[0] != 0) {
+ fontSize *= fabs(fm[3] / fm[0]);
+ }
+ }
+}
+
+void TextPage::beginWord(GfxState *state, double x0, double y0) {
+ // This check is needed because Type 3 characters can contain
+ // text-drawing operations (when TextPage is being used via
+ // XOutputDev rather than TextOutputDev).
+ if (curWord) {
+ ++nest;
+ return;
+ }
+
+ curWord = new TextWord(state, x0, y0, charPos, font, fontSize);
+}
+
+void TextPage::addChar(GfxState *state, double x, double y,
+ double dx, double dy,
+ CharCode c, Unicode *u, int uLen) {
+ double x1, y1, w1, h1, dx2, dy2, sp;
+ int n, i;
+
+ // if the previous char was a space, addChar will have called
+ // endWord, so we need to start a new word
+ if (!curWord) {
+ beginWord(state, x, y);
+ }
+
+ // throw away chars that aren't inside the page bounds
+ state->transform(x, y, &x1, &y1);
+ if (x1 < 0 || x1 > pageWidth ||
+ y1 < 0 || y1 > pageHeight) {
+ return;
+ }
+
+ // subtract char and word spacing from the dx,dy values
+ sp = state->getCharSpace();
+ if (c == (CharCode)0x20) {
+ sp += state->getWordSpace();
+ }
+ state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2);
+ dx -= dx2;
+ dy -= dy2;
+ state->transformDelta(dx, dy, &w1, &h1);
+
+ // check the tiny chars limit
+ if (!globalParams->getTextKeepTinyChars() &&
+ fabs(w1) < 3 && fabs(h1) < 3) {
+ if (++nTinyChars > 20000) {
+ return;
+ }
+ }
+
+ // break words at space character
+ if (uLen == 1 && u[0] == (Unicode)0x20) {
+ ++curWord->charLen;
+ ++charPos;
+ endWord();
+ return;
+ }
+
+ // large char spacing is sometimes used to move text around -- in
+ // this case, break text into individual chars and let the coalesce
+ // function deal with it later
+ n = curWord->len;
+ if (n > 0 && x1 - curWord->xRight[n-1] >
+ curWord->font->minSpaceWidth * curWord->fontSize) {
+ endWord();
+ beginWord(state, x, y);
+ }
+
+ // page rotation and/or transform matrices can cause text to be
+ // drawn in reverse order -- in this case, swap the begin/end
+ // coordinates and break text into individual chars
+ if (w1 < 0) {
+ endWord();
+ beginWord(state, x + dx, y + dy);
+ x1 += w1;
+ y1 += h1;
+ w1 = -w1;
+ h1 = -h1;
+ }
+
+ // add the characters to the current word
+ if (uLen != 0) {
+ w1 /= uLen;
+ h1 /= uLen;
+ }
+ for (i = 0; i < uLen; ++i) {
+ curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
+ }
+ ++curWord->charLen;
+ ++charPos;
+}
+
+void TextPage::endWord() {
+ // This check is needed because Type 3 characters can contain
+ // text-drawing operations (when TextPage is being used via
+ // XOutputDev rather than TextOutputDev).
+ if (nest > 0) {
+ --nest;
+ return;
+ }
+
+ if (curWord) {
+ addWord(curWord);
+ curWord = NULL;
+ }
+}
+
+void TextPage::addWord(TextWord *word) {
+ TextWord *p1, *p2;
+
+ // throw away zero-length words -- they don't have valid xMin/xMax
+ // values, and they're useless anyway
+ if (word->len == 0) {
+ delete word;
+ return;
+ }
+
+ // insert word in xy list
+ if (rawOrder) {
+ p1 = wordPtr;
+ p2 = NULL;
+ } else {
+ if (wordPtr && wordPtr->xyBefore(word)) {
+ p1 = wordPtr;
+ p2 = wordPtr->next;
+ } else {
+ p1 = NULL;
+ p2 = words;
+ }
+ for (; p2; p1 = p2, p2 = p2->next) {
+ if (word->xyBefore(p2)) {
+ break;
+ }
+ }
+ }
+ if (p1) {
+ p1->next = word;
+ } else {
+ words = word;
+ }
+ word->next = p2;
+ wordPtr = word;
+}
+
+void TextPage::coalesce(GBool physLayout) {
+ TextWord *word0, *word1, *word2;
+ TextLine *line0, *line1, *line2, *line3, *line4, *lineList;
+ TextBlock *blk0, *blk1, *blk2, *blk3, *blk4, *blk5, *blk6;
+ TextBlock *yxBlocks, *blocks, *blkStack;
+ TextFlow *flow0, *flow1;
+ double sz, xLimit, yLimit;
+ double fit1, fit2, sp1, sp2;
+ GBool found;
+ UnicodeMap *uMap;
+ GBool isUnicode;
+ char buf[8];
+ int col1, col2, d, i, j;
+
+#if 0 // for debugging
+ printf("*** initial word list ***\n");
+ for (word0 = words; word0; word0 = word0->next) {
+ printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- discard duplicated text (fake boldface, drop shadows)
+
+ word0 = words;
+ while (word0) {
+ sz = word0->fontSize;
+ xLimit = word0->xMin + sz * dupMaxDeltaX;
+ found = gFalse;
+ for (word1 = word0, word2 = word0->next;
+ word2 && word2->xMin < xLimit;
+ word1 = word2, word2 = word2->next) {
+ if (word2->len == word0->len &&
+ !memcmp(word2->text, word0->text, word0->len * sizeof(Unicode)) &&
+ fabs(word2->yMin - word0->yMin) < sz * dupMaxDeltaY &&
+ fabs(word2->yMax - word0->yMax) < sz * dupMaxDeltaY &&
+ fabs(word2->xMax - word0->xMax) < sz * dupMaxDeltaX) {
+ found = gTrue;
+ break;
+ }
+ }
+ if (found) {
+ word1->next = word2->next;
+ delete word2;
+ } else {
+ word0 = word0->next;
+ }
+ }
+
+#if 0 // for debugging
+ printf("*** words after removing duplicate text ***\n");
+ for (word0 = words; word0; word0 = word0->next) {
+ printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- merge words
+
+ word0 = words;
+ while (word0) {
+ sz = word0->fontSize;
+
+ // look for adjacent text which is part of the same word, and
+ // merge it into this word
+ xLimit = word0->xMax + sz * word0->font->minSpaceWidth;
+ if (rawOrder) {
+ word1 = word0;
+ word2 = word0->next;
+ found = word2 &&
+ word2->xMin < xLimit &&
+ word2->font == word0->font &&
+ fabs(word2->fontSize - sz) < 0.05 &&
+ fabs(word2->yBase - word0->yBase) < 0.05 &&
+ word2->charPos == word0->charPos + word0->charLen;
+ } else {
+ found = gFalse;
+ for (word1 = word0, word2 = word0->next;
+ word2 && word2->xMin < xLimit;
+ word1 = word2, word2 = word2->next) {
+ if (word2->font == word0->font &&
+ fabs(word2->fontSize - sz) < 0.05 &&
+ fabs(word2->yBase - word0->yBase) < 0.05 &&
+ word2->charPos == word0->charPos + word0->charLen) {
+ found = gTrue;
+ break;
+ }
+ }
+ }
+ if (found) {
+ word0->merge(word2);
+ word1->next = word2->next;
+ delete word2;
+ continue;
+ }
+
+ word0 = word0->next;
+ }
+
+#if 0 // for debugging
+ printf("*** after merging words ***\n");
+ for (word0 = words; word0; word0 = word0->next) {
+ printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- assemble words into lines
+
+ lineList = line0 = NULL;
+ while (words) {
+
+ // remove the first word from the word list
+ word0 = words;
+ words = words->next;
+ word0->next = NULL;
+
+ // find the best line (if any) for the word
+ if (rawOrder) {
+ if (line0 && lineFit(line0, word0, &sp2) >= 0) {
+ line1 = line0;
+ sp1 = sp2;
+ } else {
+ line1 = NULL;
+ sp1 = 0;
+ }
+ } else {
+ line1 = NULL;
+ fit1 = 0;
+ sp1 = 0;
+ for (line2 = lineList; line2; line2 = line2->next) {
+ fit2 = lineFit(line2, word0, &sp2);
+ if (fit2 >= 0 && (!line1 || fit2 < fit1)) {
+ line1 = line2;
+ fit1 = fit2;
+ sp1 = sp2;
+ }
+ }
+ }
+
+ // found a line: append the word
+ if (line1) {
+ word1 = line1->lastWord;
+ word1->next = word0;
+ line1->lastWord = word0;
+ if (word0->xMax > line1->xMax) {
+ line1->xMax = word0->xMax;
+ }
+ if (word0->yMin < line1->yMin) {
+ line1->yMin = word0->yMin;
+ }
+ if (word0->yMax > line1->yMax) {
+ line1->yMax = word0->yMax;
+ }
+ line1->len += word0->len;
+ if (sp1 > line1->fontSize * line1->font->minSpaceWidth) {
+ word1->spaceAfter = gTrue;
+ ++line1->len;
+ }
+
+ // didn't find a line: create a new line
+ } else {
+ line1 = new TextLine();
+ line1->words = line1->lastWord = word0;
+ line1->xMin = word0->xMin;
+ line1->xMax = word0->xMax;
+ line1->yMin = word0->yMin;
+ line1->yMax = word0->yMax;
+ line1->yBase = word0->yBase;
+ line1->font = word0->font;
+ line1->fontSize = word0->fontSize;
+ line1->len = word0->len;
+ if (line0) {
+ line0->next = line1;
+ } else {
+ lineList = line1;
+ }
+ line0 = line1;
+ }
+ }
+
+ // build the line text
+ uMap = globalParams->getTextEncoding();
+ isUnicode = uMap ? uMap->isUnicode() : gFalse;
+
+ for (line1 = lineList; line1; line1 = line1->next) {
+ line1->text = (Unicode *)gmalloc(line1->len * sizeof(Unicode));
+ line1->xRight = (double *)gmalloc(line1->len * sizeof(double));
+ line1->col = (int *)gmalloc(line1->len * sizeof(int));
+ i = 0;
+ for (word1 = line1->words; word1; word1 = word1->next) {
+ for (j = 0; j < word1->len; ++j) {
+ line1->text[i] = word1->text[j];
+ line1->xRight[i] = word1->xRight[j];
+ ++i;
+ }
+ if (word1->spaceAfter && word1->next) {
+ line1->text[i] = (Unicode)0x0020;
+ line1->xRight[i] = word1->next->xMin;
+ ++i;
+ }
+ }
+ line1->convertedLen = 0;
+ for (j = 0; j < line1->len; ++j) {
+ line1->col[j] = line1->convertedLen;
+ if (isUnicode) {
+ ++line1->convertedLen;
+ } else if (uMap) {
+ line1->convertedLen +=
+ uMap->mapUnicode(line1->text[j], buf, sizeof(buf));
+ }
+ }
+
+ // check for hyphen at end of line
+ //~ need to check for other chars used as hyphens
+ if (line1->text[line1->len - 1] == (Unicode)'-') {
+ line1->hyphenated = gTrue;
+ }
+
+ }
+
+ if (uMap) {
+ uMap->decRefCnt();
+ }
+
+#if 0 // for debugging
+ printf("*** lines in xy order ***\n");
+ for (line0 = lineList; line0; line0 = line0->next) {
+ printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n",
+ line0->xMin, line0->xMax, line0->yMin, line0->yMax,
+ line0->yBase, line0->len);
+ for (word0 = line0->words; word0; word0 = word0->next) {
+ printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax,
+ word0->yBase, word0->fontSize, word0->spaceAfter);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- column assignment
+
+ for (line1 = lineList; line1; line1 = line1->next) {
+ col1 = 0;
+ for (line2 = lineList; line2 != line1; line2 = line2->next) {
+ if (line1->xMin >= line2->xMax) {
+ d = (int)((line1->xMin - line2->xMax) /
+ (line1->font->maxSpaceWidth * line1->fontSize));
+ if (d > 4) {
+ d = 4;
+ }
+ col2 = line2->col[0] + line2->convertedLen + d;
+ if (col2 > col1) {
+ col1 = col2;
+ }
+ } else if (line1->xMin > line2->xMin) {
+ for (i = 0; i < line2->len && line1->xMin >= line2->xRight[i]; ++i) ;
+ col2 = line2->col[i];
+ if (col2 > col1) {
+ col1 = col2;
+ }
+ }
+ }
+ for (j = 0; j < line1->len; ++j) {
+ line1->col[j] += col1;
+ }
+ }
+
+#if 0 // for debugging
+ printf("*** lines in xy order, after column assignment ***\n");
+ for (line0 = lineList; line0; line0 = line0->next) {
+ printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f col=%d len=%d]\n",
+ line0->xMin, line0->xMax, line0->yMin, line0->yMax,
+ line0->yBase, line0->col[0], line0->len);
+ for (word0 = line0->words; word0; word0 = word0->next) {
+ printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax,
+ word0->yBase, word0->fontSize, word0->spaceAfter);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- assemble lines into blocks
+
+ if (rawOrder) {
+
+ lines = lineList;
+ for (line1 = lines; line1; line1 = line1->next) {
+ line1->xSpaceL = 0;
+ line1->xSpaceR = pageWidth;
+ }
+
+ } else {
+
+ // sort lines into yx order
+ lines = NULL;
+ while (lineList) {
+ line0 = lineList;
+ lineList = lineList->next;
+ for (line1 = NULL, line2 = lines;
+ line2 && !line0->yxBefore(line2);
+ line1 = line2, line2 = line2->next) ;
+ if (line1) {
+ line1->next = line0;
+ } else {
+ lines = line0;
+ }
+ line0->next = line2;
+ }
+
+ // compute whitespace to left and right of each line
+ line0 = lines;
+ for (line1 = lines; line1; line1 = line1->next) {
+
+ // find the first vertically overlapping line
+ for (; line0 && line0->yMax < line1->yMin; line0 = line0->next) ;
+
+ // check each vertically overlapping line -- look for the nearest
+ // on each side
+ line1->xSpaceL = 0;
+ line1->xSpaceR = pageWidth;
+ for (line2 = line0;
+ line2 && line2->yMin < line1->yMax;
+ line2 = line2->next) {
+ if (line2->yMax > line1->yMin) {
+ if (line2->xMax < line1->xMin) {
+ if (line2->xMax > line1->xSpaceL) {
+ line1->xSpaceL = line2->xMax;
+ }
+ } else if (line2->xMin > line1->xMax) {
+ if (line2->xMin < line1->xSpaceR) {
+ line1->xSpaceR = line2->xMin;
+ }
+ }
+ }
+ }
+ }
+ } // (!rawOrder)
+
+#if 0 // for debugging
+ printf("*** lines in yx order ***\n");
+ for (line0 = lines; line0; line0 = line0->next) {
+ printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f xSpaceL=%.2f xSpaceR=%.2f len=%d]\n",
+ line0->xMin, line0->xMax, line0->yMin, line0->yMax,
+ line0->yBase, line0->xSpaceL, line0->xSpaceR, line0->len);
+ for (word0 = line0->words; word0; word0 = word0->next) {
+ printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax,
+ word0->yBase, word0->fontSize, word0->spaceAfter);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ lineList = lines;
+ yxBlocks = NULL;
+ blk0 = NULL;
+ while (lineList) {
+
+ // build a new block object
+ line0 = lineList;
+ lineList = lineList->next;
+ line0->next = NULL;
+ blk1 = new TextBlock();
+ blk1->lines = line0;
+ blk1->xMin = line0->xMin;
+ blk1->xMax = line0->xMax;
+ blk1->yMin = line0->yMin;
+ blk1->yMax = line0->yMax;
+ blk1->xSpaceL = line0->xSpaceL;
+ blk1->xSpaceR = line0->xSpaceR;
+ blk1->maxFontSize = line0->fontSize;
+
+ // find subsequent lines in the block
+ while (lineList) {
+
+ // look for the first horizontally overlapping line below this
+ // one
+ yLimit = line0->yMax + blkMaxSpacing * line0->fontSize;
+ line3 = line4 = NULL;
+ if (rawOrder) {
+ if (lineList->yMin < yLimit &&
+ lineList->xMax > blk1->xMin &&
+ lineList->xMin < blk1->xMax) {
+ line3 = NULL;
+ line4 = lineList;
+ }
+ } else {
+ for (line1 = NULL, line2 = lineList;
+ line2 && line2->yMin < yLimit;
+ line1 = line2, line2 = line2->next) {
+ if (line2->xMax > blk1->xMin &&
+ line2->xMin < blk1->xMax) {
+ line3 = line1;
+ line4 = line2;
+ break;
+ }
+ }
+ }
+
+ // if there is an overlapping line and it fits in the block, add
+ // it to the block
+ if (line4 && blockFit(blk1, line4)) {
+ if (line3) {
+ line3->next = line4->next;
+ } else {
+ lineList = line4->next;
+ }
+ line0->next = line0->flowNext = line4;
+ line4->next = NULL;
+ if (line4->xMin < blk1->xMin) {
+ blk1->xMin = line4->xMin;
+ } else if (line4->xMax > blk1->xMax) {
+ blk1->xMax = line4->xMax;
+ }
+ if (line4->yMax > blk1->yMax) {
+ blk1->yMax = line4->yMax;
+ }
+ if (line4->xSpaceL > blk1->xSpaceL) {
+ blk1->xSpaceL = line4->xSpaceL;
+ }
+ if (line4->xSpaceR < blk1->xSpaceR) {
+ blk1->xSpaceR = line4->xSpaceR;
+ }
+ if (line4->fontSize > blk1->maxFontSize) {
+ blk1->maxFontSize = line4->fontSize;
+ }
+ line0 = line4;
+
+ // otherwise, we're done with this block
+ } else {
+ break;
+ }
+ }
+
+ // insert block on list, in yx order
+ if (rawOrder) {
+ blk2 = blk0;
+ blk3 = NULL;
+ blk0 = blk1;
+ } else {
+ for (blk2 = NULL, blk3 = yxBlocks;
+ blk3 && !blk1->yxBefore(blk3);
+ blk2 = blk3, blk3 = blk3->next) ;
+ }
+ blk1->next = blk3;
+ if (blk2) {
+ blk2->next = blk1;
+ } else {
+ yxBlocks = blk1;
+ }
+ }
+
+#if 0 // for debugging
+ printf("*** blocks in yx order ***\n");
+ for (blk0 = yxBlocks; blk0; blk0 = blk0->next) {
+ printf("[block: x=%.2f..%.2f y=%.2f..%.2f]\n",
+ blk0->xMin, blk0->xMax, blk0->yMin, blk0->yMax);
+ for (line0 = blk0->lines; line0; line0 = line0->next) {
+ printf(" [line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n",
+ line0->xMin, line0->xMax, line0->yMin, line0->yMax,
+ line0->yBase, line0->len);
+ for (word0 = line0->words; word0; word0 = word0->next) {
+ printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax,
+ word0->yBase, word0->spaceAfter);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ }
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- merge lines and blocks, sort blocks into reading order
+
+ if (rawOrder) {
+ blocks = yxBlocks;
+
+ } else {
+ blocks = NULL;
+ blk0 = NULL;
+ blkStack = NULL;
+ while (yxBlocks) {
+
+ // find the next two blocks:
+ // - if the depth-first traversal stack is empty, take the first
+ // (upper-left-most) two blocks on the yx-sorted block list
+ // - otherwise, find the two upper-left-most blocks under the top
+ // block on the stack
+ if (blkStack) {
+ blk3 = blk4 = blk5 = blk6 = NULL;
+ for (blk1 = NULL, blk2 = yxBlocks;
+ blk2;
+ blk1 = blk2, blk2 = blk2->next) {
+ if (blk2->yMin > blkStack->yMin &&
+ blk2->xMax > blkStack->xMin &&
+ blk2->xMin < blkStack->xMax) {
+ if (!blk4 || blk2->yxBefore(blk4)) {
+ blk5 = blk3;
+ blk6 = blk4;
+ blk3 = blk1;
+ blk4 = blk2;
+ } else if (!blk6 || blk2->yxBefore(blk6)) {
+ blk5 = blk1;
+ blk6 = blk2;
+ }
+ }
+ }
+ } else {
+ blk3 = NULL;
+ blk4 = yxBlocks;
+ blk5 = yxBlocks;
+ blk6 = yxBlocks->next;
+ }
+
+ // merge case 1:
+ // | | |
+ // | blkStack | | blkStack
+ // +---------------------+ --> +--------------
+ // +------+ +------+ +-----------+
+ // | blk4 | | blk6 | ... | blk4+blk6 |
+ // +------+ +------+ +-----------+
+ yLimit = 0; // make gcc happy
+ if (blkStack) {
+ yLimit = blkStack->yMax + blkMaxSpacing * blkStack->lines->fontSize;
+ }
+ if (blkStack && blk4 && blk6 &&
+ !blk4->lines->next && !blk6->lines->next &&
+ lineFit2(blk4->lines, blk6->lines) &&
+ blk4->yMin < yLimit &&
+ blk4->xMin > blkStack->xSpaceL &&
+ blkStack->xMin > blk4->xSpaceL &&
+ blk6->xMax < blkStack->xSpaceR) {
+ blk4->mergeRight(blk6);
+ if (blk5) {
+ blk5->next = blk6->next;
+ } else {
+ yxBlocks = blk6->next;
+ }
+ delete blk6;
+
+ // merge case 2:
+ // | | | |
+ // | blkStack | | |
+ // +---------------------+ --> | blkStack+blk2 |
+ // +---------------------+ | |
+ // | blk4 | | |
+ // | | | |
+ } else if (blkStack && blk4 &&
+ blk4->yMin < yLimit &&
+ blockFit2(blkStack, blk4)) {
+ blkStack->mergeBelow(blk4);
+ if (blk3) {
+ blk3->next = blk4->next;
+ } else {
+ yxBlocks = blk4->next;
+ }
+ delete blk4;
+
+ // if any of:
+ // 1. no block found
+ // 2. non-fully overlapping block found
+ // 3. large vertical gap above the overlapping block
+ // then pop the stack and try again
+ } else if (!blk4 ||
+ (blkStack && (blk4->xMin < blkStack->xSpaceL ||
+ blk4->xMax > blkStack->xSpaceR ||
+ blk4->yMin - blkStack->yMax >
+ blkMaxSortSpacing * blkStack->maxFontSize))) {
+ blkStack = blkStack->stackNext;
+
+ // add a block to the sorted list
+ } else {
+
+ // remove the block from the yx-sorted list
+ if (blk3) {
+ blk3->next = blk4->next;
+ } else {
+ yxBlocks = blk4->next;
+ }
+ blk4->next = NULL;
+
+ // append the block to the reading-order list
+ if (blk0) {
+ blk0->next = blk4;
+ } else {
+ blocks = blk4;
+ }
+ blk0 = blk4;
+
+ // push the block on the traversal stack
+ if (!physLayout) {
+ blk4->stackNext = blkStack;
+ blkStack = blk4;