+
+ //----- merge words
+
+ word0 = words;
+ while (word0) {
+ sz = word0->fontSize;
+
+ // look for adjacent text which is part of the same word, and
+ // merge it into this word
+ xLimit = word0->xMax + sz * word0->font->minSpaceWidth;
+ if (rawOrder) {
+ word1 = word0;
+ word2 = word0->next;
+ found = word2 &&
+ word2->xMin < xLimit &&
+ word2->font == word0->font &&
+ fabs(word2->fontSize - sz) < 0.05 &&
+ fabs(word2->yBase - word0->yBase) < 0.05 &&
+ word2->charPos == word0->charPos + word0->charLen;
+ } else {
+ found = gFalse;
+ for (word1 = word0, word2 = word0->next;
+ word2 && word2->xMin < xLimit;
+ word1 = word2, word2 = word2->next) {
+ if (word2->font == word0->font &&
+ fabs(word2->fontSize - sz) < 0.05 &&
+ fabs(word2->yBase - word0->yBase) < 0.05 &&
+ word2->charPos == word0->charPos + word0->charLen) {
+ found = gTrue;
+ break;
+ }
+ }
+ }
+ if (found) {
+ word0->merge(word2);
+ word1->next = word2->next;
+ delete word2;
+ continue;
+ }
+
+ word0 = word0->next;
+ }
+
+#if 0 // for debugging
+ printf("*** after merging words ***\n");
+ for (word0 = words; word0; word0 = word0->next) {
+ printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- assemble words into lines
+
+ lineList = line0 = NULL;
+ while (words) {
+
+ // remove the first word from the word list
+ word0 = words;
+ words = words->next;
+ word0->next = NULL;
+
+ // find the best line (if any) for the word
+ if (rawOrder) {
+ if (line0 && lineFit(line0, word0, &sp2) >= 0) {
+ line1 = line0;
+ sp1 = sp2;
+ } else {
+ line1 = NULL;
+ sp1 = 0;
+ }
+ } else {
+ line1 = NULL;
+ fit1 = 0;
+ sp1 = 0;
+ for (line2 = lineList; line2; line2 = line2->next) {
+ fit2 = lineFit(line2, word0, &sp2);
+ if (fit2 >= 0 && (!line1 || fit2 < fit1)) {
+ line1 = line2;
+ fit1 = fit2;
+ sp1 = sp2;
+ }
+ }
+ }
+
+ // found a line: append the word
+ if (line1) {
+ word1 = line1->lastWord;
+ word1->next = word0;
+ line1->lastWord = word0;
+ if (word0->xMax > line1->xMax) {
+ line1->xMax = word0->xMax;
+ }
+ if (word0->yMin < line1->yMin) {
+ line1->yMin = word0->yMin;
+ }
+ if (word0->yMax > line1->yMax) {
+ line1->yMax = word0->yMax;
+ }
+ line1->len += word0->len;
+ if (sp1 > line1->fontSize * line1->font->minSpaceWidth) {
+ word1->spaceAfter = gTrue;
+ ++line1->len;
+ }
+
+ // didn't find a line: create a new line
+ } else {
+ line1 = new TextLine();
+ line1->words = line1->lastWord = word0;
+ line1->xMin = word0->xMin;
+ line1->xMax = word0->xMax;
+ line1->yMin = word0->yMin;
+ line1->yMax = word0->yMax;
+ line1->yBase = word0->yBase;
+ line1->font = word0->font;
+ line1->fontSize = word0->fontSize;
+ line1->len = word0->len;
+ if (line0) {
+ line0->next = line1;
+ } else {
+ lineList = line1;
+ }
+ line0 = line1;
+ }
+ }
+
+ // build the line text
+ uMap = globalParams->getTextEncoding();
+ isUnicode = uMap ? uMap->isUnicode() : gFalse;
+
+ for (line1 = lineList; line1; line1 = line1->next) {
+ line1->text = (Unicode *)gmalloc(line1->len * sizeof(Unicode));
+ line1->xRight = (double *)gmalloc(line1->len * sizeof(double));
+ line1->col = (int *)gmalloc(line1->len * sizeof(int));
+ i = 0;
+ for (word1 = line1->words; word1; word1 = word1->next) {
+ for (j = 0; j < word1->len; ++j) {
+ line1->text[i] = word1->text[j];
+ line1->xRight[i] = word1->xRight[j];
+ ++i;
+ }
+ if (word1->spaceAfter && word1->next) {
+ line1->text[i] = (Unicode)0x0020;
+ line1->xRight[i] = word1->next->xMin;
+ ++i;
+ }
+ }
+ line1->convertedLen = 0;
+ for (j = 0; j < line1->len; ++j) {
+ line1->col[j] = line1->convertedLen;
+ if (isUnicode) {
+ ++line1->convertedLen;
+ } else if (uMap) {
+ line1->convertedLen +=
+ uMap->mapUnicode(line1->text[j], buf, sizeof(buf));
+ }
+ }
+
+ // check for hyphen at end of line
+ //~ need to check for other chars used as hyphens
+ if (line1->text[line1->len - 1] == (Unicode)'-') {
+ line1->hyphenated = gTrue;
+ }
+
+ }
+
+ if (uMap) {
+ uMap->decRefCnt();
+ }
+
+#if 0 // for debugging
+ printf("*** lines in xy order ***\n");
+ for (line0 = lineList; line0; line0 = line0->next) {
+ printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n",
+ line0->xMin, line0->xMax, line0->yMin, line0->yMax,
+ line0->yBase, line0->len);
+ for (word0 = line0->words; word0; word0 = word0->next) {
+ printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax,
+ word0->yBase, word0->fontSize, word0->spaceAfter);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- column assignment
+
+ for (line1 = lineList; line1; line1 = line1->next) {
+ col1 = 0;
+ for (line2 = lineList; line2 != line1; line2 = line2->next) {
+ if (line1->xMin >= line2->xMax) {
+ d = (int)((line1->xMin - line2->xMax) /
+ (line1->font->maxSpaceWidth * line1->fontSize));
+ if (d > 4) {
+ d = 4;
+ }
+ col2 = line2->col[0] + line2->convertedLen + d;
+ if (col2 > col1) {
+ col1 = col2;
+ }
+ } else if (line1->xMin > line2->xMin) {
+ for (i = 0; i < line2->len && line1->xMin >= line2->xRight[i]; ++i) ;
+ col2 = line2->col[i];
+ if (col2 > col1) {
+ col1 = col2;
+ }
+ }
+ }
+ for (j = 0; j < line1->len; ++j) {
+ line1->col[j] += col1;
+ }
+ }
+
+#if 0 // for debugging
+ printf("*** lines in xy order, after column assignment ***\n");
+ for (line0 = lineList; line0; line0 = line0->next) {
+ printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f col=%d len=%d]\n",
+ line0->xMin, line0->xMax, line0->yMin, line0->yMax,
+ line0->yBase, line0->col[0], line0->len);
+ for (word0 = line0->words; word0; word0 = word0->next) {
+ printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax,
+ word0->yBase, word0->fontSize, word0->spaceAfter);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- assemble lines into blocks
+
+ if (rawOrder) {
+
+ lines = lineList;
+ for (line1 = lines; line1; line1 = line1->next) {
+ line1->xSpaceL = 0;
+ line1->xSpaceR = pageWidth;
+ }
+
+ } else {
+
+ // sort lines into yx order
+ lines = NULL;
+ while (lineList) {
+ line0 = lineList;
+ lineList = lineList->next;
+ for (line1 = NULL, line2 = lines;
+ line2 && !line0->yxBefore(line2);
+ line1 = line2, line2 = line2->next) ;
+ if (line1) {
+ line1->next = line0;
+ } else {
+ lines = line0;
+ }
+ line0->next = line2;
+ }
+
+ // compute whitespace to left and right of each line
+ line0 = lines;
+ for (line1 = lines; line1; line1 = line1->next) {
+
+ // find the first vertically overlapping line
+ for (; line0 && line0->yMax < line1->yMin; line0 = line0->next) ;
+
+ // check each vertically overlapping line -- look for the nearest
+ // on each side
+ line1->xSpaceL = 0;
+ line1->xSpaceR = pageWidth;
+ for (line2 = line0;
+ line2 && line2->yMin < line1->yMax;
+ line2 = line2->next) {
+ if (line2->yMax > line1->yMin) {
+ if (line2->xMax < line1->xMin) {
+ if (line2->xMax > line1->xSpaceL) {
+ line1->xSpaceL = line2->xMax;
+ }
+ } else if (line2->xMin > line1->xMax) {
+ if (line2->xMin < line1->xSpaceR) {
+ line1->xSpaceR = line2->xMin;
+ }
+ }
+ }
+ }
+ }
+ } // (!rawOrder)
+
+#if 0 // for debugging
+ printf("*** lines in yx order ***\n");
+ for (line0 = lines; line0; line0 = line0->next) {
+ printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f xSpaceL=%.2f xSpaceR=%.2f len=%d]\n",
+ line0->xMin, line0->xMax, line0->yMin, line0->yMax,
+ line0->yBase, line0->xSpaceL, line0->xSpaceR, line0->len);
+ for (word0 = line0->words; word0; word0 = word0->next) {
+ printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax,
+ word0->yBase, word0->fontSize, word0->spaceAfter);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ lineList = lines;
+ yxBlocks = NULL;
+ blk0 = NULL;
+ while (lineList) {
+
+ // build a new block object
+ line0 = lineList;
+ lineList = lineList->next;
+ line0->next = NULL;
+ blk1 = new TextBlock();
+ blk1->lines = line0;
+ blk1->xMin = line0->xMin;
+ blk1->xMax = line0->xMax;
+ blk1->yMin = line0->yMin;
+ blk1->yMax = line0->yMax;
+ blk1->xSpaceL = line0->xSpaceL;
+ blk1->xSpaceR = line0->xSpaceR;
+ blk1->maxFontSize = line0->fontSize;
+
+ // find subsequent lines in the block
+ while (lineList) {
+
+ // look for the first horizontally overlapping line below this
+ // one
+ yLimit = line0->yMax + blkMaxSpacing * line0->fontSize;
+ line3 = line4 = NULL;
+ if (rawOrder) {
+ if (lineList->yMin < yLimit &&
+ lineList->xMax > blk1->xMin &&
+ lineList->xMin < blk1->xMax) {
+ line3 = NULL;
+ line4 = lineList;
+ }
+ } else {
+ for (line1 = NULL, line2 = lineList;
+ line2 && line2->yMin < yLimit;
+ line1 = line2, line2 = line2->next) {
+ if (line2->xMax > blk1->xMin &&
+ line2->xMin < blk1->xMax) {
+ line3 = line1;
+ line4 = line2;
+ break;
+ }
+ }
+ }
+
+ // if there is an overlapping line and it fits in the block, add
+ // it to the block
+ if (line4 && blockFit(blk1, line4)) {
+ if (line3) {
+ line3->next = line4->next;
+ } else {
+ lineList = line4->next;
+ }
+ line0->next = line0->flowNext = line4;
+ line4->next = NULL;
+ if (line4->xMin < blk1->xMin) {
+ blk1->xMin = line4->xMin;
+ } else if (line4->xMax > blk1->xMax) {
+ blk1->xMax = line4->xMax;
+ }
+ if (line4->yMax > blk1->yMax) {
+ blk1->yMax = line4->yMax;
+ }
+ if (line4->xSpaceL > blk1->xSpaceL) {
+ blk1->xSpaceL = line4->xSpaceL;
+ }
+ if (line4->xSpaceR < blk1->xSpaceR) {
+ blk1->xSpaceR = line4->xSpaceR;
+ }
+ if (line4->fontSize > blk1->maxFontSize) {
+ blk1->maxFontSize = line4->fontSize;
+ }
+ line0 = line4;
+
+ // otherwise, we're done with this block
+ } else {
+ break;