+void TextLineFrag::init(TextLine *lineA, int startA, int lenA) {
+ line = lineA;
+ start = startA;
+ len = lenA;
+ col = line->col[start];
+}
+
+void TextLineFrag::computeCoords(GBool oneRot) {
+ TextBlock *blk;
+ double d0, d1, d2, d3, d4;
+
+ if (oneRot) {
+
+ switch (line->rot) {
+ case 0:
+ xMin = line->edge[start];
+ xMax = line->edge[start + len];
+ yMin = line->yMin;
+ yMax = line->yMax;
+ break;
+ case 1:
+ xMin = line->xMin;
+ xMax = line->xMax;
+ yMin = line->edge[start];
+ yMax = line->edge[start + len];
+ break;
+ case 2:
+ xMin = line->edge[start + len];
+ xMax = line->edge[start];
+ yMin = line->yMin;
+ yMax = line->yMax;
+ break;
+ case 3:
+ xMin = line->xMin;
+ xMax = line->xMax;
+ yMin = line->edge[start + len];
+ yMax = line->edge[start];
+ break;
+ }
+ base = line->base;
+
+ } else {
+
+ if (line->rot == 0 && line->blk->page->primaryRot == 0) {
+
+ xMin = line->edge[start];
+ xMax = line->edge[start + len];
+ yMin = line->yMin;
+ yMax = line->yMax;
+ base = line->base;
+
+ } else {
+
+ blk = line->blk;
+ d0 = line->edge[start];
+ d1 = line->edge[start + len];
+ d2 = d3 = d4 = 0; // make gcc happy
+
+ switch (line->rot) {
+ case 0:
+ d2 = line->yMin;
+ d3 = line->yMax;
+ d4 = line->base;
+ d0 = (d0 - blk->xMin) / (blk->xMax - blk->xMin);
+ d1 = (d1 - blk->xMin) / (blk->xMax - blk->xMin);
+ d2 = (d2 - blk->yMin) / (blk->yMax - blk->yMin);
+ d3 = (d3 - blk->yMin) / (blk->yMax - blk->yMin);
+ d4 = (d4 - blk->yMin) / (blk->yMax - blk->yMin);
+ break;
+ case 1:
+ d2 = line->xMax;
+ d3 = line->xMin;
+ d4 = line->base;
+ d0 = (d0 - blk->yMin) / (blk->yMax - blk->yMin);
+ d1 = (d1 - blk->yMin) / (blk->yMax - blk->yMin);
+ d2 = (blk->xMax - d2) / (blk->xMax - blk->xMin);
+ d3 = (blk->xMax - d3) / (blk->xMax - blk->xMin);
+ d4 = (blk->xMax - d4) / (blk->xMax - blk->xMin);
+ break;
+ case 2:
+ d2 = line->yMax;
+ d3 = line->yMin;
+ d4 = line->base;
+ d0 = (blk->xMax - d0) / (blk->xMax - blk->xMin);
+ d1 = (blk->xMax - d1) / (blk->xMax - blk->xMin);
+ d2 = (blk->yMax - d2) / (blk->yMax - blk->yMin);
+ d3 = (blk->yMax - d3) / (blk->yMax - blk->yMin);
+ d4 = (blk->yMax - d4) / (blk->yMax - blk->yMin);
+ break;
+ case 3:
+ d2 = line->xMin;
+ d3 = line->xMax;
+ d4 = line->base;
+ d0 = (blk->yMax - d0) / (blk->yMax - blk->yMin);
+ d1 = (blk->yMax - d1) / (blk->yMax - blk->yMin);
+ d2 = (d2 - blk->xMin) / (blk->xMax - blk->xMin);
+ d3 = (d3 - blk->xMin) / (blk->xMax - blk->xMin);
+ d4 = (d4 - blk->xMin) / (blk->xMax - blk->xMin);
+ break;
+ }
+
+ switch (line->blk->page->primaryRot) {
+ case 0:
+ xMin = blk->xMin + d0 * (blk->xMax - blk->xMin);
+ xMax = blk->xMin + d1 * (blk->xMax - blk->xMin);
+ yMin = blk->yMin + d2 * (blk->yMax - blk->yMin);
+ yMax = blk->yMin + d3 * (blk->yMax - blk->yMin);
+ base = blk->yMin + base * (blk->yMax - blk->yMin);
+ break;
+ case 1:
+ xMin = blk->xMax - d3 * (blk->xMax - blk->xMin);
+ xMax = blk->xMax - d2 * (blk->xMax - blk->xMin);
+ yMin = blk->yMin + d0 * (blk->yMax - blk->yMin);
+ yMax = blk->yMin + d1 * (blk->yMax - blk->yMin);
+ base = blk->xMax - d4 * (blk->xMax - blk->xMin);
+ break;
+ case 2:
+ xMin = blk->xMax - d1 * (blk->xMax - blk->xMin);
+ xMax = blk->xMax - d0 * (blk->xMax - blk->xMin);
+ yMin = blk->yMax - d3 * (blk->yMax - blk->yMin);
+ yMax = blk->yMax - d2 * (blk->yMax - blk->yMin);
+ base = blk->yMax - d4 * (blk->yMax - blk->yMin);
+ break;
+ case 3:
+ xMin = blk->xMin + d2 * (blk->xMax - blk->xMin);
+ xMax = blk->xMin + d3 * (blk->xMax - blk->xMin);
+ yMin = blk->yMax - d1 * (blk->yMax - blk->yMin);
+ yMax = blk->yMax - d0 * (blk->yMax - blk->yMin);
+ base = blk->xMin + d4 * (blk->xMax - blk->xMin);
+ break;
+ }
+
+ }
+ }
+}
+
+int TextLineFrag::cmpYXPrimaryRot(const void *p1, const void *p2) {
+ TextLineFrag *frag1 = (TextLineFrag *)p1;
+ TextLineFrag *frag2 = (TextLineFrag *)p2;
+ double cmp;
+
+ cmp = 0; // make gcc happy
+ switch (frag1->line->blk->page->primaryRot) {
+ case 0:
+ if ((cmp = frag1->yMin - frag2->yMin) == 0) {
+ cmp = frag1->xMin - frag2->xMin;
+ }
+ break;
+ case 1:
+ if ((cmp = frag2->xMax - frag1->xMax) == 0) {
+ cmp = frag1->yMin - frag2->yMin;
+ }
+ break;
+ case 2:
+ if ((cmp = frag2->yMin - frag1->yMin) == 0) {
+ cmp = frag2->xMax - frag1->xMax;
+ }
+ break;
+ case 3:
+ if ((cmp = frag1->xMax - frag2->xMax) == 0) {
+ cmp = frag2->yMax - frag1->yMax;
+ }
+ break;
+ }
+ return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
+}
+
+int TextLineFrag::cmpYXLineRot(const void *p1, const void *p2) {
+ TextLineFrag *frag1 = (TextLineFrag *)p1;
+ TextLineFrag *frag2 = (TextLineFrag *)p2;
+ double cmp;
+
+ cmp = 0; // make gcc happy
+ switch (frag1->line->rot) {
+ case 0:
+ if ((cmp = frag1->yMin - frag2->yMin) == 0) {
+ cmp = frag1->xMin - frag2->xMin;
+ }
+ break;
+ case 1:
+ if ((cmp = frag2->xMax - frag1->xMax) == 0) {
+ cmp = frag1->yMin - frag2->yMin;
+ }
+ break;
+ case 2:
+ if ((cmp = frag2->yMin - frag1->yMin) == 0) {
+ cmp = frag2->xMax - frag1->xMax;
+ }
+ break;
+ case 3:
+ if ((cmp = frag1->xMax - frag2->xMax) == 0) {
+ cmp = frag2->yMax - frag1->yMax;
+ }
+ break;
+ }
+ return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
+}
+
+int TextLineFrag::cmpXYLineRot(const void *p1, const void *p2) {
+ TextLineFrag *frag1 = (TextLineFrag *)p1;
+ TextLineFrag *frag2 = (TextLineFrag *)p2;
+ double cmp;
+
+ cmp = 0; // make gcc happy
+ switch (frag1->line->rot) {
+ case 0:
+ if ((cmp = frag1->xMin - frag2->xMin) == 0) {
+ cmp = frag1->yMin - frag2->yMin;
+ }
+ break;
+ case 1:
+ if ((cmp = frag1->yMin - frag2->yMin) == 0) {
+ cmp = frag2->xMax - frag1->xMax;
+ }
+ break;
+ case 2:
+ if ((cmp = frag2->xMax - frag1->xMax) == 0) {
+ cmp = frag2->yMin - frag1->yMin;
+ }
+ break;
+ case 3:
+ if ((cmp = frag2->yMax - frag1->yMax) == 0) {
+ cmp = frag1->xMax - frag2->xMax;
+ }
+ break;
+ }
+ return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
+}
+
+//------------------------------------------------------------------------
+// TextBlock
+//------------------------------------------------------------------------
+
+TextBlock::TextBlock(TextPage *pageA, int rotA) {
+ page = pageA;
+ rot = rotA;
+ xMin = yMin = 0;
+ xMax = yMax = -1;
+ priMin = 0;
+ priMax = page->pageWidth;
+ pool = new TextPool();
+ lines = NULL;
+ curLine = NULL;
+ next = NULL;
+ stackNext = NULL;
+}
+
+TextBlock::~TextBlock() {
+ TextLine *line;
+
+ delete pool;
+ while (lines) {
+ line = lines;
+ lines = lines->next;
+ delete line;
+ }
+}
+
+void TextBlock::addWord(TextWord *word) {
+ pool->addWord(word);
+ if (xMin > xMax) {
+ xMin = word->xMin;
+ xMax = word->xMax;
+ yMin = word->yMin;
+ yMax = word->yMax;
+ } else {
+ if (word->xMin < xMin) {
+ xMin = word->xMin;
+ }
+ if (word->xMax > xMax) {
+ xMax = word->xMax;
+ }
+ if (word->yMin < yMin) {
+ yMin = word->yMin;
+ }
+ if (word->yMax > yMax) {
+ yMax = word->yMax;
+ }
+ }
+}
+
+void TextBlock::coalesce(UnicodeMap *uMap) {
+ TextWord *word0, *word1, *word2, *bestWord0, *bestWord1, *lastWord;
+ TextLine *line, *line0, *line1;
+ int poolMinBaseIdx, startBaseIdx, minBaseIdx, maxBaseIdx;
+ int baseIdx, bestWordBaseIdx, idx0, idx1;
+ double minBase, maxBase;
+ double fontSize, delta, priDelta, secDelta;
+ TextLine **lineArray;
+ GBool found;
+ int col1, col2;
+ int i, j, k;
+
+ // discard duplicated text (fake boldface, drop shadows)
+ for (idx0 = pool->minBaseIdx; idx0 <= pool->maxBaseIdx; ++idx0) {
+ word0 = pool->getPool(idx0);
+ while (word0) {
+ priDelta = dupMaxPriDelta * word0->fontSize;
+ secDelta = dupMaxSecDelta * word0->fontSize;
+ if (rot == 0 || rot == 3) {
+ maxBaseIdx = pool->getBaseIdx(word0->base + secDelta);
+ } else {
+ maxBaseIdx = pool->getBaseIdx(word0->base - secDelta);
+ }
+ found = gFalse;
+ word1 = word2 = NULL; // make gcc happy
+ for (idx1 = idx0; idx1 <= maxBaseIdx; ++idx1) {
+ if (idx1 == idx0) {
+ word1 = word0;
+ word2 = word0->next;
+ } else {
+ word1 = NULL;
+ word2 = pool->getPool(idx1);
+ }
+ for (; word2; word1 = word2, word2 = word2->next) {
+ if (word2->len == word0->len &&
+ !memcmp(word2->text, word0->text,
+ word0->len * sizeof(Unicode))) {
+ switch (rot) {
+ case 0:
+ case 2:
+ found = fabs(word0->xMin - word2->xMin) < priDelta &&
+ fabs(word0->xMax - word2->xMax) < priDelta &&
+ fabs(word0->yMin - word2->yMin) < secDelta &&
+ fabs(word0->yMax - word2->yMax) < secDelta;
+ break;
+ case 1:
+ case 3:
+ found = fabs(word0->xMin - word2->xMin) < secDelta &&
+ fabs(word0->xMax - word2->xMax) < secDelta &&
+ fabs(word0->yMin - word2->yMin) < priDelta &&
+ fabs(word0->yMax - word2->yMax) < priDelta;
+ break;
+ }
+ }
+ if (found) {
+ break;
+ }
+ }
+ if (found) {
+ break;
+ }
+ }
+ if (found) {
+ if (word1) {
+ word1->next = word2->next;
+ } else {
+ pool->setPool(idx1, word2->next);
+ }
+ delete word2;
+ } else {
+ word0 = word0->next;
+ }
+ }
+ }
+
+ // build the lines
+ curLine = NULL;
+ poolMinBaseIdx = pool->minBaseIdx;
+ charCount = 0;
+ nLines = 0;
+ while (1) {
+
+ // find the first non-empty line in the pool
+ for (;
+ poolMinBaseIdx <= pool->maxBaseIdx && !pool->getPool(poolMinBaseIdx);
+ ++poolMinBaseIdx) ;
+ if (poolMinBaseIdx > pool->maxBaseIdx) {
+ break;
+ }
+
+ // look for the left-most word in the first four lines of the
+ // pool -- this avoids starting with a superscript word
+ startBaseIdx = poolMinBaseIdx;
+ for (baseIdx = poolMinBaseIdx + 1;
+ baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx;
+ ++baseIdx) {
+ if (!pool->getPool(baseIdx)) {
+ continue;
+ }
+ if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx))
+ < 0) {
+ startBaseIdx = baseIdx;
+ }
+ }
+
+ // create a new line
+ word0 = pool->getPool(startBaseIdx);
+ pool->setPool(startBaseIdx, word0->next);
+ word0->next = NULL;
+ line = new TextLine(this, word0->rot, word0->base);
+ line->addWord(word0);
+ lastWord = word0;
+
+ // compute the search range
+ fontSize = word0->fontSize;
+ minBase = word0->base - maxIntraLineDelta * fontSize;
+ maxBase = word0->base + maxIntraLineDelta * fontSize;
+ minBaseIdx = pool->getBaseIdx(minBase);
+ maxBaseIdx = pool->getBaseIdx(maxBase);
+
+ // find the rest of the words in this line
+ while (1) {
+
+ // find the left-most word whose baseline is in the range for
+ // this line
+ bestWordBaseIdx = 0;
+ bestWord0 = bestWord1 = NULL;
+ for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) {
+ for (word0 = NULL, word1 = pool->getPool(baseIdx);
+ word1;
+ word0 = word1, word1 = word1->next) {
+ if (word1->base >= minBase &&
+ word1->base <= maxBase &&
+ (delta = lastWord->primaryDelta(word1)) >=
+ minCharSpacing * fontSize) {
+ if (delta < maxWordSpacing * fontSize &&
+ (!bestWord1 || word1->primaryCmp(bestWord1) < 0)) {
+ bestWordBaseIdx = baseIdx;
+ bestWord0 = word0;
+ bestWord1 = word1;
+ }
+ break;
+ }
+ }
+ }
+ if (!bestWord1) {
+ break;
+ }
+
+ // remove it from the pool, and add it to the line
+ if (bestWord0) {
+ bestWord0->next = bestWord1->next;
+ } else {
+ pool->setPool(bestWordBaseIdx, bestWord1->next);
+ }
+ bestWord1->next = NULL;
+ line->addWord(bestWord1);
+ lastWord = bestWord1;
+ }
+
+ // add the line
+ if (curLine && line->cmpYX(curLine) > 0) {
+ line0 = curLine;
+ line1 = curLine->next;
+ } else {
+ line0 = NULL;
+ line1 = lines;
+ }
+ for (;
+ line1 && line->cmpYX(line1) > 0;
+ line0 = line1, line1 = line1->next) ;
+ if (line0) {
+ line0->next = line;
+ } else {
+ lines = line;
+ }
+ line->next = line1;
+ curLine = line;
+ line->coalesce(uMap);
+ charCount += line->len;
+ ++nLines;
+ }
+
+ // sort lines into xy order for column assignment
+ lineArray = (TextLine **)gmalloc(nLines * sizeof(TextLine *));
+ for (line = lines, i = 0; line; line = line->next, ++i) {
+ lineArray[i] = line;
+ }
+ qsort(lineArray, nLines, sizeof(TextLine *), &TextLine::cmpXY);
+
+ // column assignment
+ nColumns = 0;
+ for (i = 0; i < nLines; ++i) {
+ line0 = lineArray[i];
+ col1 = 0;
+ for (j = 0; j < i; ++j) {
+ line1 = lineArray[j];
+ if (line1->primaryDelta(line0) >= 0) {
+ col2 = line1->col[line1->len] + 1;
+ } else {
+ k = 0; // make gcc happy
+ switch (rot) {
+ case 0:
+ for (k = 0;
+ k < line1->len &&
+ line0->xMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]);
+ ++k) ;
+ break;
+ case 1:
+ for (k = 0;
+ k < line1->len &&
+ line0->yMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]);
+ ++k) ;
+ break;
+ case 2:
+ for (k = 0;
+ k < line1->len &&
+ line0->xMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]);
+ ++k) ;
+ break;
+ case 3:
+ for (k = 0;
+ k < line1->len &&
+ line0->yMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]);
+ ++k) ;
+ break;
+ }
+ col2 = line1->col[k];
+ }
+ if (col2 > col1) {
+ col1 = col2;
+ }
+ }
+ for (k = 0; k <= line0->len; ++k) {
+ line0->col[k] += col1;
+ }
+ if (line0->col[line0->len] > nColumns) {
+ nColumns = line0->col[line0->len];
+ }
+ }
+ gfree(lineArray);
+}
+
+void TextBlock::updatePriMinMax(TextBlock *blk) {
+ double newPriMin, newPriMax;
+ GBool gotPriMin, gotPriMax;
+
+ gotPriMin = gotPriMax = gFalse;
+ newPriMin = newPriMax = 0; // make gcc happy
+ switch (page->primaryRot) {
+ case 0:
+ case 2:
+ if (blk->yMin < yMax && blk->yMax > yMin) {
+ if (blk->xMin < xMin) {
+ newPriMin = blk->xMax;
+ gotPriMin = gTrue;
+ }
+ if (blk->xMax > xMax) {
+ newPriMax = blk->xMin;
+ gotPriMax = gTrue;
+ }
+ }
+ break;
+ case 1:
+ case 3:
+ if (blk->xMin < xMax && blk->xMax > xMin) {
+ if (blk->yMin < yMin) {
+ newPriMin = blk->yMax;
+ gotPriMin = gTrue;
+ }
+ if (blk->yMax > yMax) {
+ newPriMax = blk->yMin;
+ gotPriMax = gTrue;
+ }
+ }
+ break;
+ }
+ if (gotPriMin) {
+ if (newPriMin > xMin) {
+ newPriMin = xMin;
+ }
+ if (newPriMin > priMin) {
+ priMin = newPriMin;
+ }
+ }
+ if (gotPriMax) {
+ if (newPriMax < xMax) {
+ newPriMax = xMax;
+ }
+ if (newPriMax < priMax) {
+ priMax = newPriMax;
+ }
+ }
+}
+
+int TextBlock::cmpXYPrimaryRot(const void *p1, const void *p2) {
+ TextBlock *blk1 = *(TextBlock **)p1;
+ TextBlock *blk2 = *(TextBlock **)p2;
+ double cmp;
+
+ cmp = 0; // make gcc happy
+ switch (blk1->page->primaryRot) {
+ case 0:
+ if ((cmp = blk1->xMin - blk2->xMin) == 0) {
+ cmp = blk1->yMin - blk2->yMin;
+ }
+ break;
+ case 1:
+ if ((cmp = blk1->yMin - blk2->yMin) == 0) {
+ cmp = blk2->xMax - blk1->xMax;
+ }
+ break;
+ case 2:
+ if ((cmp = blk2->xMax - blk1->xMax) == 0) {
+ cmp = blk2->yMin - blk1->yMin;
+ }
+ break;
+ case 3:
+ if ((cmp = blk2->yMax - blk1->yMax) == 0) {
+ cmp = blk1->xMax - blk2->xMax;
+ }
+ break;
+ }
+ return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
+}
+
+int TextBlock::cmpYXPrimaryRot(const void *p1, const void *p2) {
+ TextBlock *blk1 = *(TextBlock **)p1;
+ TextBlock *blk2 = *(TextBlock **)p2;
+ double cmp;
+
+ cmp = 0; // make gcc happy
+ switch (blk1->page->primaryRot) {
+ case 0:
+ if ((cmp = blk1->yMin - blk2->yMin) == 0) {
+ cmp = blk1->xMin - blk2->xMin;
+ }
+ break;
+ case 1:
+ if ((cmp = blk2->xMax - blk1->xMax) == 0) {
+ cmp = blk1->yMin - blk2->yMin;
+ }
+ break;
+ case 2:
+ if ((cmp = blk2->yMin - blk1->yMin) == 0) {
+ cmp = blk2->xMax - blk1->xMax;
+ }
+ break;
+ case 3:
+ if ((cmp = blk1->xMax - blk2->xMax) == 0) {
+ cmp = blk2->yMax - blk1->yMax;
+ }
+ break;
+ }
+ return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
+}
+
+int TextBlock::primaryCmp(TextBlock *blk) {
+ double cmp;
+
+ cmp = 0; // make gcc happy
+ switch (rot) {
+ case 0:
+ cmp = xMin - blk->xMin;
+ break;
+ case 1:
+ cmp = yMin - blk->yMin;
+ break;
+ case 2:
+ cmp = blk->xMax - xMax;
+ break;
+ case 3:
+ cmp = blk->yMax - yMax;
+ break;
+ }
+ return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;
+}
+
+double TextBlock::secondaryDelta(TextBlock *blk) {
+ double delta;
+
+ delta = 0; // make gcc happy
+ switch (rot) {
+ case 0:
+ delta = blk->yMin - yMax;
+ break;
+ case 1:
+ delta = xMin - blk->xMax;
+ break;
+ case 2:
+ delta = yMin - blk->yMax;
+ break;
+ case 3:
+ delta = blk->xMin - xMax;
+ break;
+ }
+ return delta;
+}
+
+GBool TextBlock::isBelow(TextBlock *blk) {
+ GBool below;
+
+ below = gFalse; // make gcc happy
+ switch (page->primaryRot) {
+ case 0:
+ below = xMin >= blk->priMin && xMax <= blk->priMax &&
+ yMin > blk->yMin;
+ break;
+ case 1:
+ below = yMin >= blk->priMin && yMax <= blk->priMax &&
+ xMax < blk->xMax;
+ break;
+ case 2:
+ below = xMin >= blk->priMin && xMax <= blk->priMax &&
+ yMax < blk->yMax;
+ break;
+ case 3:
+ below = yMin >= blk->priMin && yMax <= blk->priMax &&
+ xMin > blk->xMin;
+ break;
+ }
+
+ return below;
+}
+
+//------------------------------------------------------------------------
+// TextFlow
+//------------------------------------------------------------------------
+
+TextFlow::TextFlow(TextPage *pageA, TextBlock *blk) {
+ page = pageA;
+ xMin = blk->xMin;
+ xMax = blk->xMax;
+ yMin = blk->yMin;
+ yMax = blk->yMax;
+ priMin = blk->priMin;
+ priMax = blk->priMax;
+ blocks = lastBlk = blk;