X-Git-Url: https://www.fi.muni.cz/~kas/git//home/kas/public_html/git/?a=blobdiff_plain;f=pdf%2Fxpdf%2FTextOutputDev.h;h=e0c22c2270277f2ec7020c9345f29a8e9384c64b;hb=884f739665dc56e66f51e104350f2affd33f2dd8;hp=daab3c4ebb93a05f669694d9f17b7b1f02a5d40a;hpb=2a393c134fe3fe8eb85bf818cb7ad6ae4396322a;p=evince.git diff --git a/pdf/xpdf/TextOutputDev.h b/pdf/xpdf/TextOutputDev.h index daab3c4e..e0c22c22 100644 --- a/pdf/xpdf/TextOutputDev.h +++ b/pdf/xpdf/TextOutputDev.h @@ -2,14 +2,16 @@ // // TextOutputDev.h // -// Copyright 1997-2002 Glyph & Cog, LLC +// Copyright 1997-2003 Glyph & Cog, LLC // //======================================================================== #ifndef TEXTOUTPUTDEV_H #define TEXTOUTPUTDEV_H -#ifdef __GNUC__ +#include + +#ifdef USE_GCC_PRAGMAS #pragma interface #endif @@ -18,45 +20,179 @@ #include "GfxFont.h" #include "OutputDev.h" -class GfxState; class GString; +class GList; +class GfxFont; +class GfxState; //------------------------------------------------------------------------ typedef void (*TextOutputFunc)(void *stream, char *text, int len); + +//------------------------------------------------------------------------ +// TextFontInfo +//------------------------------------------------------------------------ + +class TextFontInfo { +public: + + TextFontInfo(GfxState *state); + ~TextFontInfo(); + + GBool matches(GfxState *state); + +private: + + GfxFont *gfxFont; + double horizScaling; + + double minSpaceWidth; // min width for inter-word space, as a + // fraction of the font size + double maxSpaceWidth; // max width for inter-word space, as a + // fraction of the font size + + + friend class TextWord; + friend class TextPage; +}; + //------------------------------------------------------------------------ -// TextString +// TextWord //------------------------------------------------------------------------ -class TextString { +class TextWord { public: // Constructor. - TextString(GfxState *state, double fontSize); + TextWord(GfxState *state, double x0, double y0, int charPosA, + TextFontInfo *fontA, double fontSize); + // Destructor. - ~TextString(); + ~TextWord(); - // Add a character to the string. + // Add a character to the word. void addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u); + private: + GBool xyBefore(TextWord *word2); + void merge(TextWord *word2); + double xMin, xMax; // bounding box x coordinates double yMin, yMax; // bounding box y coordinates - int col; // starting column + double yBase; // baseline y coordinate Unicode *text; // the text double *xRight; // right-hand x coord of each char int len; // length of text and xRight int size; // size of text and xRight arrays - TextString *yxNext; // next string in y-major order - TextString *xyNext; // next string in x-major order + int charPos; // character position (within content stream) + int charLen; // number of content stream characters in + // this word + TextFontInfo *font; // font information + double fontSize; // font size + GBool spaceAfter; // set if there is a space between this + // word and the next word on the line + TextWord *next; // next word in line (before lines are + // assembled: next word in xy order) + + + friend class TextLine; + friend class TextPage; +}; + +//------------------------------------------------------------------------ +// TextLine +//------------------------------------------------------------------------ + +class TextLine { +public: + + TextLine(); + ~TextLine(); + +private: + + GBool yxBefore(TextLine *line2); + void merge(TextLine *line2); + double xMin, xMax; // bounding box x coordinates + double yMin, yMax; // bounding box y coordinates + double yBase; // primary baseline y coordinate + double xSpaceL, xSpaceR; // whitespace to left and right of this line + TextFontInfo *font; // primary font + double fontSize; // primary font size + TextWord *words; // words in this line + TextWord *lastWord; // last word in this line + Unicode *text; // Unicode text of the line, including + // spaces between words + double *xRight; // right-hand x coord of each Unicode char + int *col; // starting column number of each Unicode char + int len; // number of Unicode chars + int convertedLen; // total number of converted characters + GBool hyphenated; // set if last char is a hyphen + TextLine *pageNext; // next line on page + TextLine *next; // next line in block + TextLine *flowNext; // next line in flow + + friend class TextBlock; friend class TextPage; }; +//------------------------------------------------------------------------ +// TextBlock +//------------------------------------------------------------------------ + +class TextBlock { +public: + + TextBlock(); + ~TextBlock(); + +private: + + GBool yxBefore(TextBlock *blk2); + void mergeRight(TextBlock *blk2); + void mergeBelow(TextBlock *blk2); + + double xMin, xMax; // bounding box x coordinates + double yMin, yMax; // bounding box y coordinates + double xSpaceL, xSpaceR; // whitespace to left and right of this block + double ySpaceT, ySpaceB; // whitespace above and below this block + double maxFontSize; // max primary font size + TextLine *lines; // lines in block + TextBlock *next; // next block in flow + TextBlock *stackNext; // next block on traversal stack + + friend class TextFlow; + friend class TextPage; +}; + +//------------------------------------------------------------------------ +// TextFlow +//------------------------------------------------------------------------ + +class TextFlow { +public: + + TextFlow(); + ~TextFlow(); + +private: + + double yMin, yMax; // bounding box y coordinates + double ySpaceT, ySpaceB; // whitespace above and below this flow + TextBlock *blocks; // blocks in flow + TextLine *lines; // lines in flow + TextFlow *next; // next flow on page + + friend class TextPage; +}; + + //------------------------------------------------------------------------ // TextPage //------------------------------------------------------------------------ @@ -65,7 +201,7 @@ class TextPage { public: // Constructor. - TextPage(GBool rawOrderA); + TextPage(GBool rawOrder); // Destructor. ~TextPage(); @@ -73,23 +209,29 @@ public: // Update the current font. void updateFont(GfxState *state); - // Begin a new string. - void beginString(GfxState *state); - // Add a character to the current string. + // Begin a new word. + void beginWord(GfxState *state, double x0, double y0); + + // Add a character to the current word. void addChar(GfxState *state, double x, double y, - double dx, double dy, Unicode *u, int uLen); + double dx, double dy, + CharCode c, Unicode *u, int uLen); + + // End the current word, sorting it into the list of words. + void endWord(); + + // Add a word, sorting it into the list of words. + void addWord(TextWord *word); - // End the current string, sorting it into the list of strings. - void endString(); // Coalesce strings that look like parts of the same line. - void coalesce(); + void coalesce(GBool physLayout); // Find a string. If is true, starts looking at top of page; // otherwise starts looking at ,. If is true, // stops looking at bottom of page; otherwise stops looking at - // ,. If found, sets the text bounding rectange and + // ,. If found, sets the text bounding rectangle and // returns true; otherwise returns false. GBool findText(Unicode *s, int len, GBool top, GBool bottom, @@ -100,24 +242,52 @@ public: GString *getText(double xMin, double yMin, double xMax, double yMax); + // Find a string by character position and length. If found, sets + // the text bounding rectangle and returns true; otherwise returns + // false. + GBool findCharRange(int pos, int length, + double *xMin, double *yMin, + double *xMax, double *yMax); + // Dump contents of page to a file. - void dump(void *outputStream, TextOutputFunc outputFunc); + void dump(void *outputStream, TextOutputFunc outputFunc, + GBool physLayout); + + // Start a new page. + void startPage(GfxState *state); - // Clear the page. - void clear(); private: - GBool rawOrder; // keep strings in content stream order + void clear(); + double lineFit(TextLine *line, TextWord *word, double *space); + GBool lineFit2(TextLine *line0, TextLine *line1); + GBool blockFit(TextBlock *blk, TextLine *line); + GBool blockFit2(TextBlock *blk0, TextBlock *blk1); + GBool flowFit(TextFlow *flow, TextBlock *blk); - TextString *curStr; // currently active string + GBool rawOrder; // keep text in content stream order + + double pageWidth, pageHeight; // width and height of current page + TextWord *curWord; // currently active string + int charPos; // next character position (within content + // stream) + TextFontInfo *font; // current font double fontSize; // current font size + int nest; // current nesting level (for Type 3 fonts) + int nTinyChars; // number of "tiny" chars seen so far + + TextWord *words; // words, in xy order (before they're + // sorted into lines) + TextWord *wordPtr; // cursor for the word list + + TextLine *lines; // lines, in xy order + TextFlow *flows; // flows, in reading order + + GList *fonts; // all font info objects used on this + // page [TextFontInfo] - TextString *yxStrings; // strings in y-major order - TextString *xyStrings; // strings in x-major order - TextString *yxCur1, *yxCur2; // cursors for yxStrings list - int nest; // current nesting level (for Type 3 fonts) }; //------------------------------------------------------------------------ @@ -129,12 +299,18 @@ public: // Open a text output file. If is NULL, no file is // written (this is useful, e.g., for searching text). If - // is true, the text is kept in content stream order. - TextOutputDev(char *fileName, GBool rawOrderA, GBool append); + // is true, the original physical layout of the text + // is maintained. If is true, the text is kept in + // content stream order. + TextOutputDev(char *fileName, GBool physLayoutA, + GBool rawOrderA, GBool append); // Create a TextOutputDev which will write to a generic stream. If - // is true, the text is kept in content stream order. - TextOutputDev(TextOutputFunc func, void *stream, GBool rawOrderA); + // is true, the original physical layout of the text + // is maintained. If is true, the text is kept in + // content stream order. + TextOutputDev(TextOutputFunc func, void *stream, + GBool physLayoutA, GBool rawOrderA); // Destructor. virtual ~TextOutputDev(); @@ -177,18 +353,32 @@ public: double originX, double originY, CharCode c, Unicode *u, int uLen); + //----- path painting + //----- special access // Find a string. If is true, starts looking at top of page; // otherwise starts looking at ,. If is true, // stops looking at bottom of page; otherwise stops looking at - // ,. If found, sets the text bounding rectange and + // ,. If found, sets the text bounding rectangle and // returns true; otherwise returns false. GBool findText(Unicode *s, int len, GBool top, GBool bottom, double *xMin, double *yMin, double *xMax, double *yMax); + // Get the text which is inside the specified rectangle. + GString *getText(double xMin, double yMin, + double xMax, double yMax); + + // Find a string by character position and length. If found, sets + // the text bounding rectangle and returns true; otherwise returns + // false. + GBool findCharRange(int pos, int length, + double *xMin, double *yMin, + double *xMax, double *yMax); + + private: TextOutputFunc outputFunc; // output function @@ -196,8 +386,11 @@ private: GBool needClose; // need to close the output file? // (only if outputStream is a FILE*) TextPage *text; // text for the current page + GBool physLayout; // maintain original physical layout when + // dumping text GBool rawOrder; // keep text in content stream order GBool ok; // set up ok? + }; #endif