00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef TEXTOUTPUTDEV_H
00010 #define TEXTOUTPUTDEV_H
00011
00012 #include <aconf.h>
00013
00014 #ifdef USE_GCC_PRAGMAS
00015 #pragma interface
00016 #endif
00017
00018 #include <stdio.h>
00019 #include "gtypes.h"
00020 #include "GfxFont.h"
00021 #include "OutputDev.h"
00022
00023 class GString;
00024 class GList;
00025 class GfxFont;
00026 class GfxState;
00027 class UnicodeMap;
00028
00029
00030
00031 typedef void (*TextOutputFunc)(void *stream, char *text, int len);
00032
00033
00034
00035
00036
00037 class TextFontInfo {
00038 public:
00039
00040 TextFontInfo(GfxState *state);
00041 ~TextFontInfo();
00042
00043 GBool matches(GfxState *state);
00044
00045 private:
00046
00047 GfxFont *gfxFont;
00048 #if TEXTOUT_WORD_LIST
00049 GString *fontName;
00050 #endif
00051
00052 friend class TextWord;
00053 friend class TextPage;
00054 };
00055
00056
00057
00058
00059
00060 class TextWord {
00061 public:
00062
00063
00064 TextWord(GfxState *state, int rotA, double x0, double y0,
00065 int charPosA, TextFontInfo *fontA, double fontSize);
00066
00067
00068 ~TextWord();
00069
00070
00071 void addChar(GfxState *state, double x, double y,
00072 double dx, double dy, Unicode u);
00073
00074
00075 void merge(TextWord *word);
00076
00077
00078
00079 int primaryCmp(TextWord *word);
00080
00081
00082
00083 double primaryDelta(TextWord *word);
00084
00085 static int cmpYX(const void *p1, const void *p2);
00086
00087 #if TEXTOUT_WORD_LIST
00088 int getLength() { return len; }
00089 Unicode getChar(int idx) { return text[idx]; }
00090 GString *getText();
00091 GString *getFontName() { return font->fontName; }
00092 void getColor(double *r, double *g, double *b)
00093 { *r = colorR; *g = colorG; *b = colorB; }
00094 void getBBox(double *xMinA, double *yMinA, double *xMaxA, double *yMaxA)
00095 { *xMinA = xMin; *yMinA = yMin; *xMaxA = xMax; *yMaxA = yMax; }
00096 int getCharPos() { return charPos; }
00097 int getCharLen() { return charLen; }
00098 #endif
00099
00100 private:
00101
00102 int rot;
00103
00104 double xMin, xMax;
00105 double yMin, yMax;
00106 double base;
00107 Unicode *text;
00108 double *edge;
00109
00110 int len;
00111 int size;
00112 int charPos;
00113 int charLen;
00114
00115 TextFontInfo *font;
00116 double fontSize;
00117 GBool spaceAfter;
00118
00119 TextWord *next;
00120
00121 #if TEXTOUT_WORD_LIST
00122 double colorR,
00123 colorG,
00124 colorB;
00125 #endif
00126
00127 friend class TextPool;
00128 friend class TextLine;
00129 friend class TextBlock;
00130 friend class TextFlow;
00131 friend class TextWordList;
00132 friend class TextPage;
00133 };
00134
00135
00136
00137
00138
00139 class TextPool {
00140 public:
00141
00142 TextPool();
00143 ~TextPool();
00144
00145 TextWord *getPool(int baseIdx) { return pool[baseIdx - minBaseIdx]; }
00146 void setPool(int baseIdx, TextWord *p) { pool[baseIdx - minBaseIdx] = p; }
00147
00148 int getBaseIdx(double base);
00149
00150 void addWord(TextWord *word);
00151
00152 private:
00153
00154 int minBaseIdx;
00155 int maxBaseIdx;
00156 TextWord **pool;
00157
00158 TextWord *cursor;
00159 int cursorBaseIdx;
00160
00161 friend class TextBlock;
00162 friend class TextPage;
00163 };
00164
00165
00166
00167
00168
00169 class TextLine {
00170 public:
00171
00172 TextLine(TextBlock *blkA, int rotA, double baseA);
00173 ~TextLine();
00174
00175 void addWord(TextWord *word);
00176
00177
00178
00179 double primaryDelta(TextLine *line);
00180
00181
00182
00183 int primaryCmp(TextLine *line);
00184
00185
00186
00187
00188 int secondaryCmp(TextLine *line);
00189
00190 int cmpYX(TextLine *line);
00191
00192 static int cmpXY(const void *p1, const void *p2);
00193
00194 void coalesce(UnicodeMap *uMap);
00195
00196 private:
00197
00198 TextBlock *blk;
00199 int rot;
00200 double xMin, xMax;
00201 double yMin, yMax;
00202 double base;
00203 TextWord *words;
00204 TextWord *lastWord;
00205 Unicode *text;
00206
00207 double *edge;
00208
00209 int *col;
00210 int len;
00211 int convertedLen;
00212 GBool hyphenated;
00213 TextLine *next;
00214
00215 friend class TextLineFrag;
00216 friend class TextBlock;
00217 friend class TextFlow;
00218 friend class TextWordList;
00219 friend class TextPage;
00220 };
00221
00222
00223
00224
00225
00226 class TextBlock {
00227 public:
00228
00229 TextBlock(TextPage *pageA, int rotA);
00230 ~TextBlock();
00231
00232 void addWord(TextWord *word);
00233
00234 void coalesce(UnicodeMap *uMap);
00235
00236
00237 void updatePriMinMax(TextBlock *blk);
00238
00239 static int cmpXYPrimaryRot(const void *p1, const void *p2);
00240
00241 static int cmpYXPrimaryRot(const void *p1, const void *p2);
00242
00243 int primaryCmp(TextBlock *blk);
00244
00245 double secondaryDelta(TextBlock *blk);
00246
00247
00248
00249 GBool isBelow(TextBlock *blk);
00250
00251 private:
00252
00253 TextPage *page;
00254 int rot;
00255 double xMin, xMax;
00256 double yMin, yMax;
00257 double priMin, priMax;
00258
00259 TextPool *pool;
00260
00261 TextLine *lines;
00262 TextLine *curLine;
00263 int nLines;
00264 int charCount;
00265 int col;
00266 int nColumns;
00267
00268 TextBlock *next;
00269 TextBlock *stackNext;
00270
00271 friend class TextLine;
00272 friend class TextLineFrag;
00273 friend class TextFlow;
00274 friend class TextWordList;
00275 friend class TextPage;
00276 };
00277
00278
00279
00280
00281
00282 class TextFlow {
00283 public:
00284
00285 TextFlow(TextPage *pageA, TextBlock *blk);
00286 ~TextFlow();
00287
00288
00289 void addBlock(TextBlock *blk);
00290
00291
00292
00293
00294
00295 GBool blockFits(TextBlock *blk, TextBlock *prevBlk);
00296
00297 private:
00298
00299 TextPage *page;
00300 double xMin, xMax;
00301 double yMin, yMax;
00302 double priMin, priMax;
00303 TextBlock *blocks;
00304 TextBlock *lastBlk;
00305 TextFlow *next;
00306
00307 friend class TextWordList;
00308 friend class TextPage;
00309 };
00310
00311 #if TEXTOUT_WORD_LIST
00312
00313
00314
00315
00316
00317 class TextWordList {
00318 public:
00319
00320
00321
00322
00323
00324 TextWordList(TextPage *text, GBool physLayout);
00325
00326 ~TextWordList();
00327
00328
00329 int getLength();
00330
00331
00332 TextWord *get(int idx);
00333
00334 private:
00335
00336 GList *words;
00337 };
00338
00339 #endif // TEXTOUT_WORD_LIST
00340
00341
00342
00343
00344
00345 class TextPage {
00346 public:
00347
00348
00349 TextPage(GBool rawOrderA);
00350
00351
00352 ~TextPage();
00353
00354
00355 void startPage(GfxState *state);
00356
00357
00358 void endPage();
00359
00360
00361 void updateFont(GfxState *state);
00362
00363
00364 void beginWord(GfxState *state, double x0, double y0);
00365
00366
00367 void addChar(GfxState *state, double x, double y,
00368 double dx, double dy,
00369 CharCode c, Unicode *u, int uLen);
00370
00371
00372 void endWord();
00373
00374
00375 void addWord(TextWord *word);
00376
00377
00378 void coalesce(GBool physLayout);
00379
00380
00381
00382
00383
00384
00385
00386
00387 GBool findText(Unicode *s, int len,
00388 GBool startAtTop, GBool stopAtBottom,
00389 GBool startAtLast, GBool stopAtLast,
00390 double *xMin, double *yMin,
00391 double *xMax, double *yMax);
00392
00393
00394 GString *getText(double xMin, double yMin,
00395 double xMax, double yMax);
00396
00397
00398
00399
00400 GBool findCharRange(int pos, int length,
00401 double *xMin, double *yMin,
00402 double *xMax, double *yMax);
00403
00404
00405 void dump(void *outputStream, TextOutputFunc outputFunc,
00406 GBool physLayout);
00407
00408 #if TEXTOUT_WORD_LIST
00409
00410
00411
00412
00413 TextWordList *makeWordList(GBool physLayout);
00414 #endif
00415
00416 private:
00417
00418 void clear();
00419 void assignColumns(TextLineFrag *frags, int nFrags, int rot);
00420 int dumpFragment(Unicode *text, int len, UnicodeMap *uMap, GString *s);
00421
00422 GBool rawOrder;
00423
00424 double pageWidth, pageHeight;
00425 TextWord *curWord;
00426 int charPos;
00427
00428 TextFontInfo *curFont;
00429 double curFontSize;
00430 int nest;
00431 int nTinyChars;
00432 GBool lastCharOverlap;
00433
00434
00435 TextPool *pools[4];
00436 TextFlow *flows;
00437 TextBlock **blocks;
00438 int nBlocks;
00439 int primaryRot;
00440 GBool primaryLR;
00441
00442 TextWord *rawWords;
00443
00444 TextWord *rawLastWord;
00445
00446 GList *fonts;
00447
00448
00449 double lastFindXMin,
00450 lastFindYMin;
00451 GBool haveLastFind;
00452
00453 friend class TextLine;
00454 friend class TextLineFrag;
00455 friend class TextBlock;
00456 friend class TextFlow;
00457 friend class TextWordList;
00458 };
00459
00460
00461
00462
00463
00464 class TextOutputDev: public OutputDev {
00465 public:
00466
00467
00468
00469
00470
00471
00472 TextOutputDev(char *fileName, GBool physLayoutA,
00473 GBool rawOrderA, GBool append);
00474
00475
00476
00477
00478
00479 TextOutputDev(TextOutputFunc func, void *stream,
00480 GBool physLayoutA, GBool rawOrderA);
00481
00482
00483 virtual ~TextOutputDev();
00484
00485
00486 virtual GBool isOk() { return ok; }
00487
00488
00489
00490
00491
00492 virtual GBool upsideDown() { return gTrue; }
00493
00494
00495 virtual GBool useDrawChar() { return gTrue; }
00496
00497
00498
00499 virtual GBool interpretType3Chars() { return gFalse; }
00500
00501
00502 virtual GBool needNonText() { return gFalse; }
00503
00504
00505
00506
00507 virtual void startPage(int pageNum, GfxState *state);
00508
00509
00510 virtual void endPage();
00511
00512
00513 virtual void updateFont(GfxState *state);
00514
00515
00516 virtual void beginString(GfxState *state, GString *s);
00517 virtual void endString(GfxState *state);
00518 virtual void drawChar(GfxState *state, double x, double y,
00519 double dx, double dy,
00520 double originX, double originY,
00521 CharCode c, Unicode *u, int uLen);
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532 GBool findText(Unicode *s, int len,
00533 GBool startAtTop, GBool stopAtBottom,
00534 GBool startAtLast, GBool stopAtLast,
00535 double *xMin, double *yMin,
00536 double *xMax, double *yMax);
00537
00538
00539 GString *getText(double xMin, double yMin,
00540 double xMax, double yMax);
00541
00542
00543
00544
00545 GBool findCharRange(int pos, int length,
00546 double *xMin, double *yMin,
00547 double *xMax, double *yMax);
00548
00549 #if TEXTOUT_WORD_LIST
00550
00551
00552
00553
00554 TextWordList *makeWordList();
00555 #endif
00556
00557 private:
00558
00559 TextOutputFunc outputFunc;
00560 void *outputStream;
00561 GBool needClose;
00562
00563 TextPage *text;
00564 GBool physLayout;
00565
00566 GBool rawOrder;
00567 GBool ok;
00568 };
00569
00570 #endif