+ yxBlocks = blk1;
+ }
+ }
+
+#if 0 // for debugging
+ printf("*** blocks in yx order ***\n");
+ for (blk0 = yxBlocks; blk0; blk0 = blk0->next) {
+ printf("[block: x=%.2f..%.2f y=%.2f..%.2f]\n",
+ blk0->xMin, blk0->xMax, blk0->yMin, blk0->yMax);
+ for (line0 = blk0->lines; line0; line0 = line0->next) {
+ printf(" [line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n",
+ line0->xMin, line0->xMax, line0->yMin, line0->yMax,
+ line0->yBase, line0->len);
+ for (word0 = line0->words; word0; word0 = word0->next) {
+ printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax,
+ word0->yBase, word0->spaceAfter);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ }
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- merge lines and blocks, sort blocks into reading order
+
+ if (rawOrder) {
+ blocks = yxBlocks;
+
+ } else {
+ blocks = NULL;
+ blk0 = NULL;
+ blkStack = NULL;
+ while (yxBlocks) {
+
+ // find the next two blocks:
+ // - if the depth-first traversal stack is empty, take the first
+ // (upper-left-most) two blocks on the yx-sorted block list
+ // - otherwise, find the two upper-left-most blocks under the top
+ // block on the stack
+ if (blkStack) {
+ blk3 = blk4 = blk5 = blk6 = NULL;
+ for (blk1 = NULL, blk2 = yxBlocks;
+ blk2;
+ blk1 = blk2, blk2 = blk2->next) {
+ if (blk2->yMin > blkStack->yMin &&
+ blk2->xMax > blkStack->xMin &&
+ blk2->xMin < blkStack->xMax) {
+ if (!blk4 || blk2->yxBefore(blk4)) {
+ blk5 = blk3;
+ blk6 = blk4;
+ blk3 = blk1;
+ blk4 = blk2;
+ } else if (!blk6 || blk2->yxBefore(blk6)) {
+ blk5 = blk1;
+ blk6 = blk2;
+ }
+ }
+ }
+ } else {
+ blk3 = NULL;
+ blk4 = yxBlocks;
+ blk5 = yxBlocks;
+ blk6 = yxBlocks->next;
+ }
+
+ // merge case 1:
+ // | | |
+ // | blkStack | | blkStack
+ // +---------------------+ --> +--------------
+ // +------+ +------+ +-----------+
+ // | blk4 | | blk6 | ... | blk4+blk6 |
+ // +------+ +------+ +-----------+
+ yLimit = 0; // make gcc happy
+ if (blkStack) {
+ yLimit = blkStack->yMax + blkMaxSpacing * blkStack->lines->fontSize;
+ }
+ if (blkStack && blk4 && blk6 &&
+ !blk4->lines->next && !blk6->lines->next &&
+ lineFit2(blk4->lines, blk6->lines) &&
+ blk4->yMin < yLimit &&
+ blk4->xMin > blkStack->xSpaceL &&
+ blkStack->xMin > blk4->xSpaceL &&
+ blk6->xMax < blkStack->xSpaceR) {
+ blk4->mergeRight(blk6);
+ if (blk5) {
+ blk5->next = blk6->next;
+ } else {
+ yxBlocks = blk6->next;
+ }
+ delete blk6;
+
+ // merge case 2:
+ // | | | |
+ // | blkStack | | |
+ // +---------------------+ --> | blkStack+blk2 |
+ // +---------------------+ | |
+ // | blk4 | | |
+ // | | | |
+ } else if (blkStack && blk4 &&
+ blk4->yMin < yLimit &&
+ blockFit2(blkStack, blk4)) {
+ blkStack->mergeBelow(blk4);
+ if (blk3) {
+ blk3->next = blk4->next;
+ } else {
+ yxBlocks = blk4->next;
+ }
+ delete blk4;
+
+ // if any of:
+ // 1. no block found
+ // 2. non-fully overlapping block found
+ // 3. large vertical gap above the overlapping block
+ // then pop the stack and try again
+ } else if (!blk4 ||
+ (blkStack && (blk4->xMin < blkStack->xSpaceL ||
+ blk4->xMax > blkStack->xSpaceR ||
+ blk4->yMin - blkStack->yMax >
+ blkMaxSortSpacing * blkStack->maxFontSize))) {
+ blkStack = blkStack->stackNext;
+
+ // add a block to the sorted list
+ } else {
+
+ // remove the block from the yx-sorted list
+ if (blk3) {
+ blk3->next = blk4->next;
+ } else {
+ yxBlocks = blk4->next;
+ }
+ blk4->next = NULL;
+
+ // append the block to the reading-order list
+ if (blk0) {
+ blk0->next = blk4;
+ } else {
+ blocks = blk4;
+ }
+ blk0 = blk4;
+
+ // push the block on the traversal stack
+ if (!physLayout) {
+ blk4->stackNext = blkStack;
+ blkStack = blk4;
+ }
+ }
+ }
+ } // (!rawOrder)
+
+#if 0 // for debugging
+ printf("*** blocks in reading order (after merging) ***\n");
+ for (blk0 = blocks; blk0; blk0 = blk0->next) {
+ printf("[block: x=%.2f..%.2f y=%.2f..%.2f]\n",
+ blk0->xMin, blk0->xMax, blk0->yMin, blk0->yMax);
+ for (line0 = blk0->lines; line0; line0 = line0->next) {
+ printf(" [line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n",
+ line0->xMin, line0->xMax, line0->yMin, line0->yMax,
+ line0->yBase, line0->len);
+ for (word0 = line0->words; word0; word0 = word0->next) {
+ printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax,
+ word0->yBase, word0->spaceAfter);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ }
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- assemble blocks into flows
+
+ if (rawOrder) {
+
+ // one flow per block
+ flow0 = NULL;
+ while (blocks) {
+ flow1 = new TextFlow();
+ flow1->blocks = blocks;
+ flow1->lines = blocks->lines;
+ flow1->yMin = blocks->yMin;
+ flow1->yMax = blocks->yMax;
+ blocks = blocks->next;
+ flow1->blocks->next = NULL;
+ if (flow0) {
+ flow0->next = flow1;
+ } else {
+ flows = flow1;
+ }
+ flow0 = flow1;
+ }
+
+ } else {
+
+ // compute whitespace above and below each block
+ for (blk0 = blocks; blk0; blk0 = blk0->next) {
+ blk0->ySpaceT = 0;
+ blk0->ySpaceB = pageHeight;
+
+ // check each horizontally overlapping block
+ for (blk1 = blocks; blk1; blk1 = blk1->next) {
+ if (blk1 != blk0 &&
+ blk1->xMin < blk0->xMax &&
+ blk1->xMax > blk0->xMin) {
+ if (blk1->yMax < blk0->yMin) {
+ if (blk1->yMax > blk0->ySpaceT) {
+ blk0->ySpaceT = blk1->yMax;
+ }
+ } else if (blk1->yMin > blk0->yMax) {
+ if (blk1->yMin < blk0->ySpaceB) {
+ blk0->ySpaceB = blk1->yMin;
+ }
+ }
+ }
+ }
+ }
+
+ flow0 = NULL;
+ while (blocks) {
+
+ // build a new flow object
+ flow1 = new TextFlow();
+ flow1->blocks = blocks;
+ flow1->lines = blocks->lines;
+ flow1->yMin = blocks->yMin;
+ flow1->yMax = blocks->yMax;
+ flow1->ySpaceT = blocks->ySpaceT;
+ flow1->ySpaceB = blocks->ySpaceB;
+
+ // find subsequent blocks in the flow
+ for (blk1 = blocks, blk2 = blocks->next;
+ blk2 && flowFit(flow1, blk2);
+ blk1 = blk2, blk2 = blk2->next) {
+ if (blk2->yMin < flow1->yMin) {
+ flow1->yMin = blk2->yMin;
+ }
+ if (blk2->yMax > flow1->yMax) {
+ flow1->yMax = blk2->yMax;
+ }
+ if (blk2->ySpaceT > flow1->ySpaceT) {
+ flow1->ySpaceT = blk2->ySpaceT;
+ }
+ if (blk2->ySpaceB < flow1->ySpaceB) {
+ flow1->ySpaceB = blk2->ySpaceB;
+ }
+ for (line1 = blk1->lines; line1->next; line1 = line1->next) ;
+ line1->flowNext = blk2->lines;
+ }
+
+ // chop the block list
+ blocks = blk1->next;
+ blk1->next = NULL;
+
+ // append the flow to the list
+ if (flow0) {
+ flow0->next = flow1;
+ } else {
+ flows = flow1;
+ }
+ flow0 = flow1;
+ }
+ }
+
+#if 0 // for debugging
+ printf("*** flows ***\n");
+ for (flow0 = flows; flow0; flow0 = flow0->next) {
+ printf("[flow]\n");
+ for (blk0 = flow0->blocks; blk0; blk0 = blk0->next) {
+ printf(" [block: x=%.2f..%.2f y=%.2f..%.2f ySpaceT=%.2f ySpaceB=%.2f]\n",
+ blk0->xMin, blk0->xMax, blk0->yMin, blk0->yMax,
+ blk0->ySpaceT, blk0->ySpaceB);
+ for (line0 = blk0->lines; line0; line0 = line0->next) {
+ printf(" [line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n",
+ line0->xMin, line0->xMax, line0->yMin, line0->yMax,
+ line0->yBase, line0->len);
+ for (word0 = line0->words; word0; word0 = word0->next) {
+ printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax,
+ word0->yBase, word0->spaceAfter);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ }
+ }
+ }
+ printf("\n");
+ fflush(stdout);
+#endif
+
+ //----- sort lines into yx order
+
+ // (the block/line merging process doesn't maintain the full-page
+ // linked list of lines)
+
+ lines = NULL;
+ if (rawOrder) {
+ line0 = NULL;
+ for (flow0 = flows; flow0; flow0 = flow0->next) {
+ for (line1 = flow0->lines; line1; line1 = line1->flowNext) {
+ if (line0) {
+ line0->pageNext = line1;
+ } else {
+ lines = line1;
+ }
+ line0 = line1;
+ }
+ }
+ } else {
+ for (flow0 = flows; flow0; flow0 = flow0->next) {
+ for (line0 = flow0->lines; line0; line0 = line0->flowNext) {
+ for (line1 = NULL, line2 = lines;
+ line2 && !line0->yxBefore(line2);
+ line1 = line2, line2 = line2->pageNext) ;
+ if (line1) {
+ line1->pageNext = line0;
+ } else {
+ lines = line0;
+ }
+ line0->pageNext = line2;
+ }
+ }
+ }
+
+#if 0 // for debugging
+ printf("*** lines in yx order ***\n");
+ for (line0 = lines; line0; line0 = line0->pageNext) {
+ printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f xSpaceL=%.2f xSpaceR=%.2f col=%d len=%d]\n",
+ line0->xMin, line0->xMax, line0->yMin, line0->yMax,
+ line0->yBase, line0->xSpaceL, line0->xSpaceR, line0->col[0],
+ line0->len);
+ for (word0 = line0->words; word0; word0 = word0->next) {
+ printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '",
+ word0->xMin, word0->xMax, word0->yMin, word0->yMax,
+ word0->yBase, word0->spaceAfter);
+ for (i = 0; i < word0->len; ++i) {
+ fputc(word0->text[i] & 0xff, stdout);
+ }
+ printf("'\n");