Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

DocOffsetParser.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002-2003 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 #ifndef _DOCOFFSETPARSER_HPP
00013 #define _DOCOFFSETPARSER_HPP
00014 #include "Parser.hpp"
00015 #include "Match.hpp"
00016 
00019 class DocOffsetParser : public TextHandler {
00020 
00021 public:
00023   DocOffsetParser(Parser *parser) : p(parser) { 
00024     p->setTextHandler(this);
00025   }
00026   
00027   virtual ~DocOffsetParser(){}
00028 
00030   virtual char *handleWord(char * word) {
00031     if (word != NULL) {
00032       int end = p->fileTell() - 1;
00033       int start = (end - strlen(word)) + 1;
00034       Match m;
00035       m.start = start;
00036       m.end = end;    
00037       offsets.push_back(m);
00038     }
00039     return word;
00040   }
00042   virtual void parseString(char *buffer) {
00043     offsets.clear();
00044     p->parseBuffer(buffer, strlen(buffer));
00045   }
00047   vector <Match> getOffsets() { return offsets; }
00048 private:
00050   vector <Match> offsets;
00051   Parser *p;
00052 };
00053 
00054 #endif

Generated on Wed Nov 3 12:58:54 2004 for Lemur Toolkit by doxygen1.2.18