Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

FrequencyListCopier.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 //
00013 // FrequencyListCopier
00014 //
00015 // 24 August 2004 -- tds
00016 //
00017 // Finds IndexTerm nodes that only need to return frequency information,
00018 // not positions, and inserts IndexFrequencyTerms instead.
00019 //
00020 
00021 #ifndef INDRI_FREQUENCYLISTCOPIER_HPP
00022 #define INDRI_FREQUENCYLISTCOPIER_HPP
00023 
00024 #include "ListCache.hpp"
00025 
00026 class FrequencyListCopier : public indri::lang::Copier {
00027 private:
00028   std::vector<indri::lang::Node*> _nodes;
00029   std::stack<indri::lang::Node*> _disqualifiers;
00030   indri::lang::IndexTerm* _lastTerm;
00031   bool _disqualifiedTree;
00032 
00033   ListCache* _listCache;
00034 
00035 public:
00036   FrequencyListCopier( ListCache* listCache ) : _listCache(listCache), _lastTerm(0), _disqualifiedTree(false) {}
00037 
00038   indri::lang::Node* defaultAfter( indri::lang::Node* oldNode, indri::lang::Node* newNode ) {
00039     if( _disqualifiers.size() && oldNode == _disqualifiers.top() )
00040       _disqualifiers.pop();
00041     
00042     _nodes.push_back( newNode );
00043     return newNode;
00044   }
00045 
00046   void before( indri::lang::ExtentAnd* exAnd ) {
00047     _disqualifiers.push(exAnd);
00048   }
00049 
00050   void before( indri::lang::ExtentOr* exOr ) {
00051     _disqualifiedTree = true;
00052   }
00053 
00054   void before( indri::lang::ExtentInside* exInside ) {
00055     _disqualifiedTree = true;
00056   }
00057 
00058   void before( indri::lang::ExtentRestriction* exRestrict ) {
00059     _disqualifiers.push(exRestrict);
00060   }
00061 
00062   void before( indri::lang::ContextCounterNode* context ) {
00063     if( context->getContext() != NULL ) {
00064       _disqualifiedTree = true;
00065     }
00066   }
00067 
00068   void before( indri::lang::ODNode* odNode ) {
00069     _disqualifiedTree = true;
00070   }
00071 
00072   void before( indri::lang::UWNode* uwNode ) {
00073     _disqualifiedTree = true;
00074   }
00075 
00076   indri::lang::Node* after( indri::lang::IndexTerm* oldNode, indri::lang::IndexTerm* newNode ) {
00077     _lastTerm = newNode;
00078     return defaultAfter( oldNode, newNode );
00079   }
00080 
00081   void before( indri::lang::RawScorerNode* oldNode, indri::lang::RawScorerNode* newNode ) {
00082     _lastTerm = 0;
00083     _disqualifiedTree = false;
00084   }
00085 
00086   indri::lang::Node* after( indri::lang::RawScorerNode* oldNode, indri::lang::RawScorerNode* newNode ) {
00087     indri::lang::Node* result = 0;
00088 
00089     if( _lastTerm && !_disqualifiers.size() && !_disqualifiedTree && oldNode->getContext() == NULL ) {
00090       indri::lang::TermFrequencyScorerNode* scorerNode;
00091       // there's a term to score, and nothing to disqualify us from doing frequency scoring
00092       scorerNode = new indri::lang::TermFrequencyScorerNode( _lastTerm->getText(),
00093                                                              _lastTerm->getStemmed() );
00094 
00095       scorerNode->setNodeName( oldNode->nodeName() );
00096       scorerNode->setSmoothing( oldNode->getSmoothing() );
00097       scorerNode->setStatistics( oldNode->getOccurrences(), oldNode->getContextSize(),
00098                                  oldNode->getMaxOccurrences(), oldNode->getMinContextLength(),
00099                                  oldNode->getMaxContextLength(),
00100                                  oldNode->getMaxContextFraction() );
00101 
00102       delete newNode;
00103       result = defaultAfter( oldNode, scorerNode );
00104     } else if( !_disqualifiers.size() ) {
00105       ListCache::CachedList* list = 0; 
00106 
00107       if( _listCache )
00108         list = _listCache->find( newNode->getRawExtent(), newNode->getContext() );
00109       
00110       if( list ) {
00111         indri::lang::CachedFrequencyScorerNode* cachedNode;
00112         cachedNode = new indri::lang::CachedFrequencyScorerNode( newNode->getRawExtent(), newNode->getContext() );
00113         cachedNode->setNodeName( newNode->nodeName() );
00114         cachedNode->setSmoothing( newNode->getSmoothing() );
00115         cachedNode->setList( list );
00116 
00117         delete newNode;
00118         result = defaultAfter( oldNode, cachedNode );
00119       } else {
00120         result = defaultAfter( oldNode, newNode );
00121       }
00122     } else {
00123       result = defaultAfter( oldNode, newNode );
00124     }
00125 
00126     _disqualifiedTree = false;
00127     return result; 
00128   }
00129 };
00130 
00131 #endif // INDRI_FREQUENCYLISTCOPIER_HPP
00132 

Generated on Wed Nov 3 12:58:55 2004 for Lemur Toolkit by doxygen1.2.18