1 //========================================================================
5 // Copyright 1997 Derek B. Noonburg
7 //========================================================================
9 #ifndef TEXTOUTPUTDEV_H
10 #define TEXTOUTPUTDEV_H
19 #include "OutputDev.h"
24 //------------------------------------------------------------------------
26 enum TextOutputCharSet {
33 //------------------------------------------------------------------------
35 //------------------------------------------------------------------------
41 TextString(GfxState *state, GBool hexCodes1);
46 // Add a character to the string.
47 void addChar(GfxState *state, double x, double y,
49 Guchar c, TextOutputCharSet charSet);
51 // Add a 16-bit character to the string.
52 void addChar16(GfxState *state, double x, double y,
54 int c, GfxFontCharSet16 charSet);
58 double xMin, xMax; // bounding box x coordinates
59 double yMin, yMax; // bounding box y coordinates
60 int col; // starting column
61 GString *text; // the text
62 double *xRight; // right-hand x coord of each char
63 TextString *yxNext; // next string in y-major order
64 TextString *xyNext; // next string in x-major order
65 GBool hexCodes; // subsetted font with hex char codes
67 friend class TextPage;
70 //------------------------------------------------------------------------
72 //------------------------------------------------------------------------
78 TextPage(TextOutputCharSet charSet, GBool rawOrder);
83 // Begin a new string.
84 void beginString(GfxState *state, GString *s, GBool hex1);
86 // Add a character to the current string.
87 void addChar(GfxState *state, double x, double y,
88 double dx, double dy, Guchar c);
90 // Add a 16-bit character to the current string.
91 void addChar16(GfxState *state, double x, double y,
92 double dx, double dy, int c,
93 GfxFontCharSet16 charSet);
95 // End the current string, sorting it into the list of strings.
98 // Coalesce strings that look like parts of the same line.
101 // Find a string. If <top> is true, starts looking at top of page;
102 // otherwise starts looking at <xMin>,<yMin>. If <bottom> is true,
103 // stops looking at bottom of page; otherwise stops looking at
104 // <xMax>,<yMax>. If found, sets the text bounding rectange and
105 // returns true; otherwise returns false.
106 GBool findText(char *s, GBool top, GBool bottom,
107 double *xMin, double *yMin,
108 double *xMax, double *yMax);
110 // Get the text which is inside the specified rectangle.
111 GString *getText(double xMin, double yMin,
112 double xMax, double yMax);
114 // Dump contents of page to a file.
122 TextOutputCharSet charSet; // character set
123 GBool rawOrder; // keep strings in content stream order
125 TextString *curStr; // currently active string
127 TextString *yxStrings; // strings in y-major order
128 TextString *xyStrings; // strings in x-major order
129 TextString *yxCur1, *yxCur2; // cursors for yxStrings list
131 int nest; // current nesting level (for Type 3 fonts)
134 //------------------------------------------------------------------------
136 //------------------------------------------------------------------------
138 class TextOutputDev: public OutputDev {
141 // Open a text output file. If <fileName> is NULL, no file is
142 // written (this is useful, e.g., for searching text). Text is
143 // converted to the character set specified by <charSet>. This
144 // should be set to textOutASCII7 for Japanese (EUC-JP) text. If
145 // <rawOrder> is true, the text is kept in content stream order.
146 TextOutputDev(char *fileName, TextOutputCharSet charSet,
150 virtual ~TextOutputDev();
152 // Check if file was successfully created.
153 virtual GBool isOk() { return ok; }
155 //---- get info about output device
157 // Does this device use upside-down coordinates?
158 // (Upside-down means (0,0) is the top left corner of the page.)
159 virtual GBool upsideDown() { return gTrue; }
161 // Does this device use drawChar() or drawString()?
162 virtual GBool useDrawChar() { return gTrue; }
164 //----- initialization and control
167 virtual void startPage(int pageNum, GfxState *state);
170 virtual void endPage();
172 //----- update text state
173 virtual void updateFont(GfxState *state);
176 virtual void beginString(GfxState *state, GString *s);
177 virtual void endString(GfxState *state);
178 virtual void drawChar(GfxState *state, double x, double y,
179 double dx, double dy, Guchar c);
180 virtual void drawChar16(GfxState *state, double x, double y,
181 double dx, double dy, int c);
183 //----- special access
185 // Find a string. If <top> is true, starts looking at top of page;
186 // otherwise starts looking at <xMin>,<yMin>. If <bottom> is true,
187 // stops looking at bottom of page; otherwise stops looking at
188 // <xMax>,<yMax>. If found, sets the text bounding rectange and
189 // returns true; otherwise returns false.
190 GBool findText(char *s, GBool top, GBool bottom,
191 double *xMin, double *yMin,
192 double *xMax, double *yMax);
196 FILE *f; // text file
197 GBool needClose; // need to close the file?
198 TextPage *text; // text for the current page
199 GBool rawOrder; // keep text in content stream order
200 GBool hexCodes; // subsetted font with hex char codes
201 GBool ok; // set up ok?