Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

ContextCountAccumulator.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // ContextCountAccumulator
00015 //
00016 // 24 February 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_CONTEXTCOUNTACCUMULATOR_HPP
00020 #define INDRI_CONTEXTCOUNTACCUMULATOR_HPP
00021 
00022 #include "indri/ListIteratorNode.hpp"
00023 #include "lemur-platform.h"
00024 #include "lemur-compat.hpp"
00025 #include <vector>
00026 #include "indri/EvaluatorNode.hpp"
00027 #include "indri/IndriIndex.hpp"
00028 #include "indri/QuerySpec.hpp"
00029 #include "indri/DocumentCount.hpp"
00030 #include "indri/ListCache.hpp"
00031 
00032 class ContextCountAccumulator : public EvaluatorNode {
00033 private:
00034   // this is a ListIteratorNode that contains extents
00035   // that compose the context of the operation. _contextSize
00036   // is the sum of the lengths of the extents in _context.
00037   ListIteratorNode* _context;
00038 
00039   // this is a ListIteratorNode that emits an extent only for a
00040   // true query match; therefore _occurrences is just a simple count
00041   // the number of extents in _matches.
00042   ListIteratorNode* _matches; 
00043 
00044   std::string _name;
00045   UINT64 _occurrences;
00046   UINT64 _contextSize;
00047 
00048   UINT64 _maximumContextSize;
00049   UINT64 _minimumContextSize;
00050   UINT64 _maximumOccurrences;
00051 
00052   double _maximumContextFraction;
00053 
00054   EvaluatorNode::MResults _results;
00055 
00056   // cache support
00057   ListCache* _listCache;
00058   ListCache::CachedList* _cache;
00059   SimpleCopier _query;
00060 
00061 public:
00062   ContextCountAccumulator( const std::string& name, UINT64 occurrences, UINT64 contextSize );
00063   ContextCountAccumulator( const std::string& name, UINT64 occurrences, UINT64 contextSize, UINT64 maximumOccurrences, UINT64 minimumContextSize, UINT64 maximumContextSize, double maximumContextFraction );
00064   ContextCountAccumulator( const std::string& name, ListCache* listCache, ListCache::CachedList* cache, ListIteratorNode* matches, UINT64 collectionSize, UINT64 maxDocumentLength );
00065   ContextCountAccumulator( const std::string& name, ListCache* listCache, ListCache::CachedList* cache, ListIteratorNode* matches, ListIteratorNode* context );
00066 
00067   ~ContextCountAccumulator();
00068 
00069   UINT64 getOccurrences() const;
00070   UINT64 getContextSize() const;
00071 
00072   const ListIteratorNode* getContextNode() const;
00073   const ListIteratorNode* getMatchesNode() const;
00074   const std::string& getName() const;
00075 
00076   const EvaluatorNode::MResults& getResults();
00077   void evaluate( int documentID, int documentLength );
00078   int nextCandidateDocument();
00079 };
00080 
00081 #endif // INDRI_CONTEXTCOUNTACCUMULATOR_HPP
00082 
00083 

Generated on Wed Nov 3 12:58:52 2004 for Lemur Toolkit by doxygen1.2.18