00001 /*========================================================================== 00002 * Copyright (c) 2002-2003 University of Massachusetts. All Rights Reserved. 00003 * 00004 * Use of the Lemur Toolkit for Language Modeling and Information Retrieval 00005 * is subject to the terms of the software license set forth in the LICENSE 00006 * file included with this software, and also available at 00007 * http://www.lemurproject.org/license.html 00008 * 00009 *========================================================================== 00010 */ 00011 00012 #ifndef _DOCOFFSETPARSER_HPP 00013 #define _DOCOFFSETPARSER_HPP 00014 #include "Parser.hpp" 00015 #include "Match.hpp" 00016 00019 class DocOffsetParser : public TextHandler { 00020 00021 public: 00023 DocOffsetParser(Parser *parser) : p(parser) { 00024 p->setTextHandler(this); 00025 } 00026 00027 virtual ~DocOffsetParser(){} 00028 00030 virtual char *handleWord(char * word) { 00031 if (word != NULL) { 00032 int end = p->fileTell() - 1; 00033 int start = (end - strlen(word)) + 1; 00034 Match m; 00035 m.start = start; 00036 m.end = end; 00037 offsets.push_back(m); 00038 } 00039 return word; 00040 } 00042 virtual void parseString(char *buffer) { 00043 offsets.clear(); 00044 p->parseBuffer(buffer, strlen(buffer)); 00045 } 00047 vector <Match> getOffsets() { return offsets; } 00048 private: 00050 vector <Match> offsets; 00051 Parser *p; 00052 }; 00053 00054 #endif