1 //========================================================================
3 // CharCodeToUnicode.cc
5 // Copyright 2001-2003 Glyph & Cog, LLC
7 //========================================================================
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
21 #include "GlobalParams.h"
22 #include "PSTokenizer.h"
23 #include "CharCodeToUnicode.h"
25 //------------------------------------------------------------------------
27 #define maxUnicodeString 8
29 struct CharCodeToUnicodeString {
31 Unicode u[maxUnicodeString];
35 //------------------------------------------------------------------------
37 static int getCharFromString(void *data) {
51 static int getCharFromFile(void *data) {
52 return fgetc((FILE *)data);
55 //------------------------------------------------------------------------
57 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) {
60 CharCode size, mapLenA;
63 CharCodeToUnicode *ctu;
65 if (!(f = globalParams->getCIDToUnicodeFile(collectionA))) {
66 error(-1, "Couldn't find cidToUnicode file for the '%s' collection",
67 collectionA->getCString());
72 mapA = (Unicode *)gmalloc(size * sizeof(Unicode));
75 while (getLine(buf, sizeof(buf), f)) {
76 if (mapLenA == size) {
78 mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode));
80 if (sscanf(buf, "%x", &u) == 1) {
83 error(-1, "Bad line (%d) in cidToUnicode file for the '%s' collection",
84 (int)(mapLenA + 1), collectionA->getCString());
91 ctu = new CharCodeToUnicode(collectionA->copy(), mapA, mapLenA, gTrue,
97 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
98 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0);
101 CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
102 CharCodeToUnicode *ctu;
105 ctu = new CharCodeToUnicode(NULL);
106 p = buf->getCString();
107 ctu->parseCMap1(&getCharFromString, &p, nBits);
111 void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
114 char tok1[256], tok2[256], tok3[256];
115 int nDigits, n1, n2, n3;
117 CharCode code1, code2;
125 pst = new PSTokenizer(getCharFunc, data);
126 pst->getToken(tok1, sizeof(tok1), &n1);
127 while (pst->getToken(tok2, sizeof(tok2), &n2)) {
128 if (!strcmp(tok2, "usecmap")) {
129 if (tok1[0] == '/') {
130 name = new GString(tok1 + 1);
131 if ((f = globalParams->findToUnicodeFile(name))) {
132 parseCMap1(&getCharFromFile, f, nBits);
135 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
140 pst->getToken(tok1, sizeof(tok1), &n1);
141 } else if (!strcmp(tok2, "beginbfchar")) {
142 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
143 if (!strcmp(tok1, "endbfchar")) {
146 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
147 !strcmp(tok2, "endbfchar")) {
148 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
151 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
152 tok2[0] == '<' && tok2[n2 - 1] == '>')) {
153 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
156 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
157 if (sscanf(tok1 + 1, "%x", &code1) != 1) {
158 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
161 if (code1 >= mapLen) {
163 mapLen = (code1 + 256) & ~255;
164 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
165 for (i = oldLen; i < mapLen; ++i) {
170 if (sscanf(tok2 + 1, "%x", &u) != 1) {
171 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
177 if (sMapLen == sMapSize) {
179 sMap = (CharCodeToUnicodeString *)
180 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
182 sMap[sMapLen].c = code1;
183 sMap[sMapLen].len = (n2 - 2) / 4;
184 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
185 strncpy(uHex, tok2 + 1 + j*4, 4);
187 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
188 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
194 pst->getToken(tok1, sizeof(tok1), &n1);
195 } else if (!strcmp(tok2, "beginbfrange")) {
196 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
197 if (!strcmp(tok1, "endbfrange")) {
200 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
201 !strcmp(tok2, "endbfrange") ||
202 !pst->getToken(tok3, sizeof(tok3), &n3) ||
203 !strcmp(tok3, "endbfrange")) {
204 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
207 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
208 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' &&
209 tok3[0] == '<' && tok3[n3 - 1] == '>')) {
210 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
213 tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0';
214 if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
215 sscanf(tok2 + 1, "%x", &code2) != 1) {
216 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
219 if (code2 >= mapLen) {
221 mapLen = (code2 + 256) & ~255;
222 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
223 for (i = oldLen; i < mapLen; ++i) {
228 if (sscanf(tok3 + 1, "%x", &u) != 1) {
229 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
232 for (; code1 <= code2; ++code1) {
236 if (sMapLen + (int)(code2 - code1 + 1) > sMapSize) {
237 sMapSize = (sMapSize + (code2 - code1 + 1) + 7) & ~7;
238 sMap = (CharCodeToUnicodeString *)
239 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
241 for (i = 0; code1 <= code2; ++code1, ++i) {
243 sMap[sMapLen].c = code1;
244 sMap[sMapLen].len = (n3 - 2) / 4;
245 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
246 strncpy(uHex, tok3 + 1 + j*4, 4);
248 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
249 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
252 sMap[sMapLen].u[sMap[sMapLen].len - 1] += i;
257 pst->getToken(tok1, sizeof(tok1), &n1);
265 CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) {
268 collection = collectionA;
270 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
271 for (i = 0; i < mapLen; ++i) {
275 sMapLen = sMapSize = 0;
279 CharCodeToUnicode::CharCodeToUnicode(GString *collectionA, Unicode *mapA,
280 CharCode mapLenA, GBool copyMap,
281 CharCodeToUnicodeString *sMapA,
283 collection = collectionA;
286 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
287 memcpy(map, mapA, mapLen * sizeof(Unicode));
292 sMapLen = sMapSize = sMapLenA;
296 CharCodeToUnicode::~CharCodeToUnicode() {
306 void CharCodeToUnicode::incRefCnt() {
310 void CharCodeToUnicode::decRefCnt() {
316 GBool CharCodeToUnicode::match(GString *collectionA) {
317 return collection && !collection->cmp(collectionA);
320 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
330 for (i = 0; i < sMapLen; ++i) {
331 if (sMap[i].c == c) {
332 for (j = 0; j < sMap[i].len && j < size; ++j) {
341 //------------------------------------------------------------------------
343 CIDToUnicodeCache::CIDToUnicodeCache() {
346 for (i = 0; i < cidToUnicodeCacheSize; ++i) {
351 CIDToUnicodeCache::~CIDToUnicodeCache() {
354 for (i = 0; i < cidToUnicodeCacheSize; ++i) {
356 cache[i]->decRefCnt();
361 CharCodeToUnicode *CIDToUnicodeCache::getCIDToUnicode(GString *collection) {
362 CharCodeToUnicode *ctu;
365 if (cache[0] && cache[0]->match(collection)) {
366 cache[0]->incRefCnt();
369 for (i = 1; i < cidToUnicodeCacheSize; ++i) {
370 if (cache[i] && cache[i]->match(collection)) {
372 for (j = i; j >= 1; --j) {
373 cache[j] = cache[j - 1];
380 if ((ctu = CharCodeToUnicode::parseCIDToUnicode(collection))) {
381 if (cache[cidToUnicodeCacheSize - 1]) {
382 cache[cidToUnicodeCacheSize - 1]->decRefCnt();
384 for (j = cidToUnicodeCacheSize - 1; j >= 1; --j) {
385 cache[j] = cache[j - 1];