1 //========================================================================
5 // Copyright 1997 Derek B. Noonburg
7 //========================================================================
10 #pragma implementation
22 #include "FontEncoding.h"
23 #include "TextOutputDev.h"
26 // needed for setting type/creator of MacOS files
27 #include "ICSupport.h"
30 #include "TextOutputFontInfo.h"
32 //------------------------------------------------------------------------
33 // Character substitutions
34 //------------------------------------------------------------------------
36 static char *generalSubstNames[] = {
59 static FontEncoding generalSubstEncoding(generalSubstNames,
60 sizeof(generalSubstNames) /
63 static char *generalSubst[] = {
86 static char *ascii7Subst[] = {
87 "A", "A", "A", "A", // A{acute,circumflex,dieresis,grave}
88 "A", "A", // A{ring,tilde}
91 "E", "E", "E", "E", // E{acute,circumflex,dieresis,grave}
92 "I", "I", "I", "I", // I{acute,circumflex,dieresis,grave}
95 "O", "O", "O", "O", // O{acute,circumflex,dieresis,grave}
96 "O", "O", // O{slash,tilde}
99 "U", "U", "U", "U", // U{acute,circumflex,dieresis,grave}
100 "Y", "Y", // T{acute,dieresis}
102 "a", "a", "a", "a", // a{acute,circumflex,dieresis,grave}
103 "a", "a", // a{ring,tilde}
106 "e", "e", "e", "e", // e{acute,circumflex,dieresis,grave}
107 "fi", "fl", // ligatures
108 "ff", "ffi", "ffl", // ligatures
110 "i", "i", "i", "i", // i{acute,circumflex,dieresis,grave}
113 "o", "o", "o", "o", // o{acute,circumflex,dieresis,grave}
114 "o", "o", // o{slash,tilde}
117 "u", "u", "u", "u", // u{acute,circumflex,dieresis,grave}
118 "y", "y", // t{acute,dieresis}
123 "-", "-", "-", // emdash, endash, hyphen
124 "\"", "\"", // quotedblleft, quotedblright
130 static char *isoLatin1Subst[] = {
136 "fi", "fl", // ligatures
137 "ff", "ffi", "ffl", // ligatures
145 "-", "-", // emdash, hyphen
146 "\"", "\"", // quotedblleft, quotedblright
151 static char *isoLatin2Subst[] = {
152 "fi", "fl", // ligatures
153 "ff", "ffi", "ffl", // ligatures
156 "-", "-", // emdash, hyphen
157 "\"", "\"", // quotedblleft, quotedblright
162 static char **isoLatin5Subst = isoLatin1Subst;
164 //------------------------------------------------------------------------
166 //------------------------------------------------------------------------
171 static Gushort japan12Map[96] = {
172 0x2121, 0x2121, 0x212a, 0x2149, 0x2174, 0x2170, 0x2173, 0x2175, // 00 .. 07
173 0x2147, 0x214a, 0x214b, 0x2176, 0x215c, 0x2124, 0x213e, 0x2123, // 08 .. 0f
174 0x213f, 0x2330, 0x2331, 0x2332, 0x2333, 0x2334, 0x2335, 0x2336, // 10 .. 17
175 0x2337, 0x2338, 0x2339, 0x2127, 0x2128, 0x2163, 0x2161, 0x2164, // 18 .. 1f
176 0x2129, 0x2177, 0x2341, 0x2342, 0x2343, 0x2344, 0x2345, 0x2346, // 20 .. 27
177 0x2347, 0x2348, 0x2349, 0x234a, 0x234b, 0x234c, 0x234d, 0x234e, // 28 .. 2f
178 0x234f, 0x2350, 0x2351, 0x2352, 0x2353, 0x2354, 0x2355, 0x2356, // 30 .. 37
179 0x2357, 0x2358, 0x2359, 0x235a, 0x214e, 0x216f, 0x214f, 0x2130, // 38 .. 3f
180 0x2132, 0x2146, 0x2361, 0x2362, 0x2363, 0x2364, 0x2365, 0x2366, // 40 .. 47
181 0x2367, 0x2368, 0x2369, 0x236a, 0x236b, 0x236c, 0x236d, 0x236e, // 48 .. 4f
182 0x236f, 0x2370, 0x2371, 0x2372, 0x2373, 0x2374, 0x2375, 0x2376, // 50 .. 57
183 0x2377, 0x2378, 0x2379, 0x237a, 0x2150, 0x2143, 0x2151, 0x2141 // 58 .. 5f
187 static Gushort japan12KanaMap1[97] = {
188 0x2131, 0x2121, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572,
189 0x2521, 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567,
190 0x2543, 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b,
191 0x252d, 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b,
192 0x253d, 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b,
193 0x254c, 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b,
194 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568,
195 0x2569, 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b,
196 0x212c, 0x212e, 0x2570, 0x2571, 0x256e, 0x2575, 0x2576, 0x2574,
197 0x252c, 0x252e, 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a,
198 0x253c, 0x253e, 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x2550,
199 0x2551, 0x2553, 0x2554, 0x2556, 0x2557, 0x2559, 0x255a, 0x255c,
204 static Gushort japan12KanaMap2[98] = {
205 0x212d, 0x212f, 0x216d, 0x214c, 0x214d, 0x2152, 0x2153, 0x2154,
206 0x2155, 0x2158, 0x2159, 0x215a, 0x215b, 0x213d, 0x2121, 0x2472,
207 0x2421, 0x2423, 0x2425, 0x2427, 0x2429, 0x2463, 0x2465, 0x2467,
208 0x2443, 0x2422, 0x2424, 0x2426, 0x2428, 0x242a, 0x242b, 0x242d,
209 0x242f, 0x2431, 0x2433, 0x2435, 0x2437, 0x2439, 0x243b, 0x243d,
210 0x243f, 0x2441, 0x2444, 0x2446, 0x2448, 0x244a, 0x244b, 0x244c,
211 0x244d, 0x244e, 0x244f, 0x2452, 0x2455, 0x2458, 0x245b, 0x245e,
212 0x245f, 0x2460, 0x2461, 0x2462, 0x2464, 0x2466, 0x2468, 0x2469,
213 0x246a, 0x246b, 0x246c, 0x246d, 0x246f, 0x2473, 0x2470, 0x2471,
214 0x246e, 0x242c, 0x242e, 0x2430, 0x2432, 0x2434, 0x2436, 0x2438,
215 0x243a, 0x243c, 0x243e, 0x2440, 0x2442, 0x2445, 0x2447, 0x2449,
216 0x2450, 0x2451, 0x2453, 0x2454, 0x2456, 0x2457, 0x2459, 0x245a,
220 static char *japan12Roman[10] = {
221 "I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", "X"
224 static char *japan12Abbrev1[6] = {
225 "mm", "cm", "km", "mg", "kg", "cc"
230 #if CHINESE_CNS_SUPPORT
232 static Gushort cns13Map1[99] = {
234 0, 0xa140, 0xa149, 0xa1a8, 0xa1ad, 0xa243, 0xa248, 0xa1ae,
235 0xa1a6, 0xa15d, 0xa15e, 0xa1af, 0xa1cf, 0xa141, 0xa1df, 0xa144,
236 0xa241, 0xa2af, 0xa2b0, 0xa2b1, 0xa2b2, 0xa2b3, 0xa2b4, 0xa2b5,
237 0xa2b6, 0xa2b7, 0xa2b8, 0xa147, 0xa146, 0xa1d5, 0xa1d7, 0xa1d6,
238 0xa148, 0xa249, 0xa2cf, 0xa2d0, 0xa2d1, 0xa2d2, 0xa2d3, 0xa2d4,
239 0xa2d5, 0xa2d6, 0xa2d7, 0xa2d8, 0xa2d9, 0xa2da, 0xa2db, 0xa2dc,
240 0xa2dd, 0xa2de, 0xa2df, 0xa2e0, 0xa2e1, 0xa2e2, 0xa2e3, 0xa2e4,
241 0xa2e5, 0xa2e6, 0xa2e7, 0xa2e8, 0xa165, 0xa242, 0xa166, 0xa173,
242 0xa15a, 0xa1a5, 0xa2e9, 0xa2ea, 0xa2eb, 0xa2ec, 0xa2ed, 0xa2ee,
243 0xa2ef, 0xa2f0, 0xa2f1, 0xa2f2, 0xa2f3, 0xa2f4, 0xa2f5, 0xa2f6,
244 0xa2f7, 0xa2f8, 0xa2f9, 0xa2fa, 0xa2fb, 0xa2fc, 0xa2fd, 0xa2fe,
245 0xa340, 0xa341, 0xa342, 0xa343, 0xa161, 0xa159, 0xa162, 0xa1e3,
249 static Gushort cns13Map2[95] = {
251 0xa140, 0xa149, 0xa1a8, 0xa1ad, 0xa244, 0xa248, 0xa1ae,
252 0xa1a6, 0xa15d, 0xa15e, 0xa1af, 0xa1cf, 0xa141, 0xa1df, 0xa144,
253 0xa241, 0xa2af, 0xa2b0, 0xa2b1, 0xa2b2, 0xa2b3, 0xa2b4, 0xa2b5,
254 0xa2b6, 0xa2b7, 0xa2b8, 0xa147, 0xa146, 0xa1d5, 0xa1d7, 0xa1d6,
255 0xa148, 0xa249, 0xa2cf, 0xa2d0, 0xa2d1, 0xa2d2, 0xa2d3, 0xa2d4,
256 0xa2d5, 0xa2d6, 0xa2d7, 0xa2d8, 0xa2d9, 0xa2da, 0xa2db, 0xa2dc,
257 0xa2dd, 0xa2de, 0xa2df, 0xa2e0, 0xa2e1, 0xa2e2, 0xa2e3, 0xa2e4,
258 0xa2e5, 0xa2e6, 0xa2e7, 0xa2e8, 0xa165, 0xa242, 0xa166, 0xa173,
259 0xa15a, 0xa1a5, 0xa2e9, 0xa2ea, 0xa2eb, 0xa2ec, 0xa2ed, 0xa2ee,
260 0xa2ef, 0xa2f0, 0xa2f1, 0xa2f2, 0xa2f3, 0xa2f4, 0xa2f5, 0xa2f6,
261 0xa2f7, 0xa2f8, 0xa2f9, 0xa2fa, 0xa2fb, 0xa2fc, 0xa2fd, 0xa2fe,
262 0xa340, 0xa341, 0xa342, 0xa343, 0xa161, 0xa159, 0xa162, 0xa1c3
267 //------------------------------------------------------------------------
269 //------------------------------------------------------------------------
271 TextString::TextString(GfxState *state, GBool hexCodes1) {
274 state->transform(state->getCurX(), state->getCurY(), &x, &y);
275 h = state->getTransformedFontSize();
276 //~ yMin/yMax computation should use font ascent/descent values
278 yMax = yMin + 1.3 * h;
280 text = new GString();
284 hexCodes = hexCodes1;
287 TextString::~TextString() {
292 void TextString::addChar(GfxState *state, double x, double y,
293 double dx, double dy,
294 Guchar c, TextOutputCharSet charSet) {
295 char *charName, *sub;
300 i = text->getLength();
302 // append translated character(s) to string
305 if ((charName = state->getFont()->getCharName(c))) {
306 if ((c1 = generalSubstEncoding.getCharCode(charName)) >= 0) {
307 charName = generalSubst[c1];
311 c1 = ascii7Encoding.getCharCode(charName);
314 c1 = isoLatin1Encoding.getCharCode(charName);
317 c1 = isoLatin2Encoding.getCharCode(charName);
320 c1 = isoLatin5Encoding.getCharCode(charName);
324 m = strlen(charName);
325 if (hexCodes && m == 3 &&
326 (charName[0] == 'B' || charName[0] == 'C' ||
327 charName[0] == 'G') &&
328 isxdigit(charName[1]) && isxdigit(charName[2])) {
329 sscanf(charName+1, "%x", &c1);
330 } else if (hexCodes && m == 2 &&
331 isxdigit(charName[0]) && isxdigit(charName[1])) {
332 sscanf(charName, "%x", &c1);
333 } else if (!hexCodes && m >= 2 && m <= 3 &&
334 isdigit(charName[0]) && isdigit(charName[1])) {
339 } else if (m >= 3 && m <= 5 && isdigit(charName[1])) {
340 c1 = atoi(charName+1);
345 //~ this is a kludge -- is there a standard internal encoding
346 //~ used by all/most Type 1 fonts?
347 if (c1 == 262) // hyphen
349 else if (c1 == 266) // emdash
352 charName = isoLatin1Encoding.getCharName(c1);
356 c1 = ascii7Encoding.getCharCode(charName);
362 c1 = isoLatin2Encoding.getCharCode(charName);
365 c1 = isoLatin5Encoding.getCharCode(charName);
376 sub = ascii7Subst[c1 - 128];
382 sub = isoLatin1Subst[c1 - 256];
388 sub = isoLatin2Subst[c1 - 256];
394 sub = isoLatin5Subst[c1 - 256];
405 text->append((char)c1);
409 // update position information
410 if (i+n > ((i+15) & ~15))
411 xRight = (double *)grealloc(xRight, ((i+n+15) & ~15) * sizeof(double));
414 for (j = 0; j < n; ++j)
415 xRight[i+j] = x + ((j+1) * dx) / n;
419 void TextString::addChar16(GfxState *state, double x, double y,
420 double dx, double dy,
421 int c, GfxFontCharSet16 charSet) {
429 i = text->getLength();
431 // convert the 16-bit character
436 // convert Adobe-Japan1-2 to JIS X 0208-1983
437 case font16AdobeJapan12:
440 c1 = 0x8080 + japan12Map[c];
441 } else if (c <= 632) {
445 c1 = 0x8080 + japan12Map[c - 230];
447 c1 = 0x8080 + japan12KanaMap1[c - 325];
451 c1 = 0x8080 + japan12KanaMap2[c - 501];
454 } else if (c <= 1124) {
457 c1 = 0xa1a1 + (c - 633);
459 c1 = 0xa2a1 + (c - 727);
461 c1 = 0xa2ba + (c - 741);
463 c1 = 0xa2ca + (c - 749);
465 c1 = 0xa2dc + (c - 756);
467 c1 = 0xa2f2 + (c - 771);
470 } else if (c <= 841) {
472 c1 = 0xa3b0 + (c - 780);
474 c1 = 0xa3c1 + (c - 790);
476 c1 = 0xa3e1 + (c - 816);
477 } else if (c <= 1010) {
479 c1 = 0xa4a1 + (c - 842);
481 c1 = 0xa5a1 + (c - 925);
484 c1 = 0xa6a1 + (c - 1011);
486 c1 = 0xa6c1 + (c - 1035);
488 c1 = 0xa7a1 + (c - 1059);
490 c1 = 0xa7d1 + (c - 1092);
492 } else if (c <= 4089) {
493 t1 = (c - 1125) / 94;
494 t2 = (c - 1125) % 94;
495 c1 = 0xb0a1 + (t1 << 8) + t2;
496 } else if (c <= 7477) {
497 t1 = (c - 4090) / 94;
498 t2 = (c - 4090) % 94;
499 c1 = 0xd0a1 + (t1 << 8) + t2;
500 } else if (c <= 7554) {
502 } else if (c <= 7563) { // circled Arabic numbers 1..9
503 c1 = 0xa3b1 + (c - 7555);
504 } else if (c <= 7574) { // circled Arabic numbers 10..20
506 sub[0] = 0xa3b0 + (t1 / 10);
507 sub[1] = 0xa3b0 + (t1 % 10);
510 } else if (c <= 7584) { // Roman numbers I..X
511 for (p = japan12Roman[c - 7575], q = sub; *p; ++p, ++q) {
516 } else if (c <= 7632) {
519 } else if (c <= 7606) {
520 for (p = japan12Abbrev1[c - 7601], q = sub; *p; ++p, ++q) {
533 error(-1, "Unsupported Adobe-Japan1-2 character: %d", c);
536 #endif // JAPANESE_SUPPORT
539 case font16AdobeGB12:
540 #if CHINESE_GB_SUPPORT
544 case font16AdobeCNS13:
545 #if CHINESE_CNS_SUPPORT
548 } else if (c <= 502) {
551 } else if (c == 248) {
557 c1 = 0xa140 + (t1 << 8) + t2;
559 c1 = 0xa162 + (t1 << 8) + t2;
562 } else if (c <= 505) {
563 c1 = 0xa3bd + (c - 503);
564 } else if (c <= 594) {
566 } else if (c <= 5995) {
569 } else if (c == 4308) {
571 } else if (c == 5221) {
573 } else if (c == 5495) {
575 } else if (c == 5550) {
577 } else if (c == 5551) {
580 if (c >= 2007 && c <= 2430) {
582 } else if (c >= 4309 && c <= 4695) {
584 } else if (c >= 5222 && c <= 5410) {
586 } else if (c >= 5496 && c <= 5641) {
594 c1 = 0xa440 + (t1 << 8) + t2;
596 c1 = 0xa462 + (t1 << 8) + t2;
599 } else if (c <= 13645) {
602 } else if (c == 6134) {
604 } else if (c == 8142) {
606 } else if (c == 8788) {
608 } else if (c == 8889) {
610 } else if (c == 10926) {
612 } else if (c == 11073) {
614 } else if (c == 11361) {
616 } else if (c == 11719) {
618 } else if (c == 12308) {
620 } else if (c == 12526) {
622 } else if (c == 12640) {
624 } else if (c == 12783) {
626 } else if (c == 12900) {
628 } else if (c == 13585) {
630 } else if (c == 13641) {
633 if (c >= 6006 && c <= 6038) {
635 } else if (c >= 6088 && c <= 6133) {
637 } else if (c >= 6302 && c <= 8250) {
639 } else if (c >= 8251 && c <= 8888) {
641 } else if (c >= 8890 && c <= 9288) {
643 } else if (c >= 9289 && c <= 10925) {
645 } else if (c >= 10927 && c <= 11072) {
647 } else if (c >= 11362 && c <= 11477) {
649 } else if (c >= 11615 && c <= 11718) {
651 } else if (c >= 11942 && c <= 12139) {
653 } else if (c >= 12140 && c <= 12221) {
655 } else if (c >= 12222 && c <= 12307) {
657 } else if (c >= 12309 && c <= 12316) {
659 } else if (c >= 12317 && c <= 12469) {
661 } else if (c >= 12470 && c <= 12525) {
663 } else if (c >= 12527 && c <= 12639) {
665 } else if (c >= 12641 && c <= 12782) {
667 } else if (c >= 12784 && c <= 12828) {
669 } else if (c >= 12829 && c <= 12899) {
671 } else if (c >= 12901 && c <= 13094) {
673 } else if (c >= 13095 && c <= 13584) {
675 } else if (c >= 13586 && c <= 13628) {
677 } else if (c == 13629) {
679 } else if (c >= 13630 && c <= 13640) {
681 } else if (c >= 13642 && c <= 13645) {
689 c1 = 0xc940 + (t1 << 8) + t2;
691 c1 = 0xc962 + (t1 << 8) + t2;
694 } else if (c == 13646) {
696 } else if (c == 13647) {
698 } else if (c <= 13742) {
699 c1 = cns13Map2[c - 13648];
700 } else if (c <= 13746) {
701 c1 = 0xa159 + (c - 13743);
702 } else if (c <= 14055) {
704 } else if (c <= 14062) {
705 c1 = 0xf9d6 + (c - 14056);
709 error(-1, "Unsupported Adobe-CNS1-3 character: %d", c);
716 // append converted character to string
721 text->append(c1 >> 8);
722 text->append(c1 & 0xff);
726 for (q = sub; *q; ++q) {
727 text->append(*q >> 8);
728 text->append(*q & 0xff);
733 // update position information
734 if (i+n > ((i+15) & ~15)) {
735 xRight = (double *)grealloc(xRight, ((i+n+15) & ~15) * sizeof(double));
740 for (j = 0; j < n; ++j) {
741 xRight[i+j] = x + dx;
746 //------------------------------------------------------------------------
748 //------------------------------------------------------------------------
750 TextPage::TextPage(TextOutputCharSet charSet, GBool rawOrder) {
751 this->charSet = charSet;
752 this->rawOrder = rawOrder;
756 yxCur1 = yxCur2 = NULL;
760 TextPage::~TextPage() {
764 void TextPage::beginString(GfxState *state, GString *s, GBool hexCodes) {
765 // This check is needed because Type 3 characters can contain
766 // text-drawing operations.
772 curStr = new TextString(state, hexCodes);
775 void TextPage::addChar(GfxState *state, double x, double y,
776 double dx, double dy, Guchar c) {
777 double x1, y1, w1, h1, dx2, dy2;
781 state->transform(x, y, &x1, &y1);
782 state->textTransformDelta(state->getCharSpace(), 0, &dx2, &dy2);
785 state->transformDelta(dx, dy, &w1, &h1);
786 n = curStr->text->getLength();
788 x1 - curStr->xRight[n-1] > 0.1 * (curStr->yMax - curStr->yMin)) {
789 hexCodes = curStr->hexCodes;
791 beginString(state, NULL, hexCodes);
793 curStr->addChar(state, x1, y1, w1, h1, c, charSet);
796 void TextPage::addChar16(GfxState *state, double x, double y,
797 double dx, double dy, int c,
798 GfxFontCharSet16 charSet) {
799 double x1, y1, w1, h1, dx2, dy2;
803 state->transform(x, y, &x1, &y1);
804 state->textTransformDelta(state->getCharSpace(), 0, &dx2, &dy2);
807 state->transformDelta(dx, dy, &w1, &h1);
808 n = curStr->text->getLength();
810 x1 - curStr->xRight[n-1] > 0.1 * (curStr->yMax - curStr->yMin)) {
811 hexCodes = curStr->hexCodes;
813 beginString(state, NULL, hexCodes);
815 curStr->addChar16(state, x1, y1, w1, h1, c, charSet);
818 void TextPage::endString() {
827 // throw away zero-length strings -- they don't have valid xMin/xMax
828 // values, and they're useless anyway
829 if (curStr->text->getLength() == 0) {
835 // insert string in y-major list
836 h = curStr->yMax - curStr->yMin;
837 y1 = curStr->yMin + 0.5 * h;
838 y2 = curStr->yMin + 0.8 * h;
842 } else if ((!yxCur1 ||
843 (y1 >= yxCur1->yMin &&
844 (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) &&
846 (y1 < yxCur2->yMin ||
847 (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
851 for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
852 if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin))
866 void TextPage::coalesce() {
867 TextString *str1, *str2;
871 #if 0 //~ for debugging
872 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
873 printf("x=%3d..%3d y=%3d..%3d size=%2d '%s'\n",
874 (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
875 (int)(str1->yMax - str1->yMin), str1->text->getCString());
877 printf("\n------------------------------------------------------------\n\n");
880 while (str1 && (str2 = str1->yxNext)) {
881 space = str1->yMax - str1->yMin;
882 d = str2->xMin - str1->xMax;
884 ((str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) ||
885 (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax))) ||
886 (!rawOrder && str2->yMin < str1->yMax)) &&
887 d > -0.5 * space && d < space) {
888 n = str1->text->getLength();
890 str1->text->append(' ');
891 str1->text->append(str2->text);
892 str1->xRight = (double *)
893 grealloc(str1->xRight,
894 ((str1->text->getLength() + 15) & ~15) * sizeof(double));
896 str1->xRight[n++] = str2->xMin;
897 for (i = 0; i < str2->text->getLength(); ++i)
898 str1->xRight[n++] = str2->xRight[i];
899 if (str2->xMax > str1->xMax)
900 str1->xMax = str2->xMax;
901 if (str2->yMax > str1->yMax)
902 str1->yMax = str2->yMax;
903 str1->yxNext = str2->yxNext;
911 GBool TextPage::findText(char *s, GBool top, GBool bottom,
912 double *xMin, double *yMin,
913 double *xMax, double *yMax) {
919 // scan all strings on page
921 for (str = yxStrings; str; str = str->yxNext) {
923 // check: above top limit?
924 if (!top && (str->yMax < *yMin ||
925 (str->yMin < *yMin && str->xMax <= *xMin)))
928 // check: below bottom limit?
929 if (!bottom && (str->yMin > *yMax ||
930 (str->yMax > *yMax && str->xMin >= *xMax)))
933 // search each position in this string
934 m = str->text->getLength();
935 for (i = 0, p = str->text->getCString(); i <= m - n; ++i, ++p) {
937 // check: above top limit?
938 if (!top && str->yMin < *yMin) {
939 x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2;
944 // check: below bottom limit?
945 if (!bottom && str->yMax > *yMax) {
946 x = (((i == 0) ? str->xMin : str->xRight[i-1]) + str->xRight[i]) / 2;
951 // compare the strings
952 for (p1 = p, q = s; *q; ++p1, ++q) {
953 if (tolower(*p1) != tolower(*q))
959 *xMin = (i == 0) ? str->xMin : str->xRight[i-1];
960 *xMax = str->xRight[i+n-1];
970 GString *TextPage::getText(double xMin, double yMin,
971 double xMax, double yMax) {
974 double x0, x1, x2, y;
982 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
983 y = 0.5 * (str1->yMin + str1->yMax);
986 if (y > yMin && str1->xMin < xMax && str1->xMax > xMin) {
987 x0 = x1 = x2 = str1->xMin;
988 for (i1 = 0; i1 < str1->text->getLength(); ++i1) {
989 x0 = (i1==0) ? str1->xMin : str1->xRight[i1-1];
990 x1 = str1->xRight[i1];
991 if (0.5 * (x0 + x1) >= xMin)
994 for (i2 = str1->text->getLength() - 1; i2 > i1; --i2) {
995 x1 = (i2==0) ? str1->xMin : str1->xRight[i2-1];
996 x2 = str1->xRight[i2];
997 if (0.5 * (x1 + x2) <= xMax)
1000 if (s->getLength() > 0) {
1001 if (x0 < xPrev || str1->yMin > yPrev) {
1012 s->append(str1->text->getCString() + i1, i2 - i1 + 1);
1027 void TextPage::dump(FILE *f) {
1028 TextString *str1, *str2, *str3;
1033 // build x-major list
1035 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
1036 for (str2 = NULL, str3 = xyStrings;
1038 str2 = str3, str3 = str3->xyNext) {
1039 if (str1->xMin < str3->xMin ||
1040 (str1->xMin == str3->xMin && str1->yMin < str3->yMin))
1044 str2->xyNext = str1;
1047 str1->xyNext = str3;
1050 // do column assignment
1051 for (str1 = xyStrings; str1; str1 = str1->xyNext) {
1053 for (str2 = xyStrings; str2 != str1; str2 = str2->xyNext) {
1054 if (str1->xMin >= str2->xMax) {
1055 col2 = str2->col + str2->text->getLength() + 4;
1058 } else if (str1->xMin > str2->xMin) {
1060 (int)(((str1->xMin - str2->xMin) / (str2->xMax - str2->xMin)) *
1061 str2->text->getLength());
1070 #if 0 //~ for debugging
1071 fprintf(f, "~~~~~~~~~~\n");
1072 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
1073 fprintf(f, "(%4d,%4d) - (%4d,%4d) [%3d] %s\n",
1074 (int)str1->xMin, (int)str1->yMin, (int)str1->xMax, (int)str1->yMax,
1075 str1->col, str1->text->getCString());
1077 fprintf(f, "~~~~~~~~~~\n");
1082 yMax = yxStrings ? yxStrings->yMax : 0;
1083 for (str1 = yxStrings; str1; str1 = str1->yxNext) {
1085 // line this string up with the correct column
1086 if (rawOrder && col1 == 0) {
1089 for (; col1 < str1->col; ++col1) {
1095 fputs(str1->text->getCString(), f);
1098 col1 += str1->text->getLength();
1100 // update yMax for this line
1101 if (str1->yMax > yMax)
1104 // if we've hit the end of the line...
1105 if (!(str1->yxNext &&
1106 !(rawOrder && str1->yxNext->yMax < str1->yMin) &&
1107 str1->yxNext->yMin < 0.2*str1->yMin + 0.8*str1->yMax &&
1108 str1->yxNext->xMin >= str1->xMax)) {
1113 // print extra vertical space if necessary
1116 // find yMin for next line
1117 yMin = str1->yxNext->yMin;
1118 for (str2 = str1->yxNext; str2; str2 = str2->yxNext) {
1119 if (str2->yMin < yMin)
1121 if (!(str2->yxNext && str2->yxNext->yMin < str2->yMax &&
1122 str2->yxNext->xMin >= str2->xMax))
1127 d = (int)((yMin - yMax) / (str1->yMax - str1->yMin) + 0.5);
1128 if (rawOrder && d > 2) {
1131 for (; d > 0; --d) {
1136 // set up for next line
1138 yMax = str1->yxNext ? str1->yxNext->yMax : 0;
1143 void TextPage::clear() {
1144 TextString *p1, *p2;
1150 for (p1 = yxStrings; p1; p1 = p2) {
1156 yxCur1 = yxCur2 = NULL;
1159 //------------------------------------------------------------------------
1161 //------------------------------------------------------------------------
1163 TextOutputDev::TextOutputDev(char *fileName, TextOutputCharSet charSet,
1166 this->rawOrder = rawOrder;
1172 if (!strcmp(fileName, "-")) {
1174 } else if ((f = fopen(fileName, "w"))) {
1177 error(-1, "Couldn't open text file '%s'", fileName);
1185 // set up text object
1186 text = new TextPage(charSet, rawOrder);
1189 TextOutputDev::~TextOutputDev() {
1192 ICS_MapRefNumAndAssign((short)f->handle);
1201 void TextOutputDev::startPage(int pageNum, GfxState *state) {
1205 void TextOutputDev::endPage() {
1215 void TextOutputDev::updateFont(GfxState *state) {
1220 // look for hex char codes in subsetted font
1222 if ((font = state->getFont()) && !font->is16Bit()) {
1223 for (c = 0; c < 256; ++c) {
1224 if ((charName = font->getCharName(c))) {
1225 if ((charName[0] == 'B' || charName[0] == 'C' ||
1226 charName[0] == 'G') &&
1227 strlen(charName) == 3 &&
1228 isxdigit(charName[1]) && isxdigit(charName[2]) &&
1229 ((charName[1] >= 'a' && charName[1] <= 'f') ||
1230 (charName[1] >= 'A' && charName[1] <= 'F') ||
1231 (charName[2] >= 'a' && charName[2] <= 'f') ||
1232 (charName[2] >= 'A' && charName[2] <= 'F'))) {
1235 } else if ((strlen(charName) == 2) &&
1236 isxdigit(charName[0]) && isxdigit(charName[1]) &&
1237 ((charName[0] >= 'a' && charName[0] <= 'f') ||
1238 (charName[0] >= 'A' && charName[0] <= 'F') ||
1239 (charName[1] >= 'a' && charName[1] <= 'f') ||
1240 (charName[1] >= 'A' && charName[1] <= 'F'))) {
1249 void TextOutputDev::beginString(GfxState *state, GString *s) {
1250 text->beginString(state, s, hexCodes);
1253 void TextOutputDev::endString(GfxState *state) {
1257 void TextOutputDev::drawChar(GfxState *state, double x, double y,
1258 double dx, double dy, Guchar c) {
1259 text->addChar(state, x, y, dx, dy, c);
1262 void TextOutputDev::drawChar16(GfxState *state, double x, double y,
1263 double dx, double dy, int c) {
1264 text->addChar16(state, x, y, dx, dy, c, state->getFont()->getCharSet16());
1267 GBool TextOutputDev::findText(char *s, GBool top, GBool bottom,
1268 double *xMin, double *yMin,
1269 double *xMax, double *yMax) {
1270 return text->findText(s, top, bottom, xMin, yMin, xMax, yMax);