Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

UnknownRemoverCopier.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // UnknownRemoverCopier
00015 // 
00016 // 8 March 2004 -- tds
00017 //
00018 // Makes a copy of a query tree that does not contain
00019 // any fields or terms that don't exist in the index.
00020 // This may propogate up the tree and delete
00021 // proximity operators that will obviously not match
00022 // anything anymore.
00023 //
00024 
00025 #ifndef INDRI_UNKNOWNREMOVERCOPIER_HPP
00026 #define INDRI_UNKNOWNREMOVERCOPIER_HPP
00027 
00028 #include "indri/Repository.hpp"
00029 #include "indri/QuerySpec.hpp"
00030 #include "indri/Copier.hpp"
00031 
00032 class UnknownRemoverCopier : public indri::lang::Copier {
00033 private:
00034   Repository& _repository;
00035   std::vector<indri::lang::Node*> _nodes;
00036 
00037   template<class T>
00038   indri::lang::Node* afterExtentCombine( T* oldNode, T* newNode ) {
00039     std::vector<indri::lang::RawExtentNode*>& children = newNode->getChildren();
00040     std::vector<indri::lang::RawExtentNode*> nonNullChildren;
00041 
00042     for( unsigned int i=0; i<children.size(); i++ ) {
00043       if( children[i] )
00044         nonNullChildren.push_back( children[i] );
00045     }
00046 
00047     indri::lang::Node* result;
00048 
00049     if( children.size() == nonNullChildren.size() ) {
00050       // all children are legit, so there's no need to change this node
00051       result = newNode;
00052       _nodes.push_back( result );
00053     } else if( nonNullChildren.size() >= 2 ) {
00054       // some of the children aren't legitimate, so make a new node with fewer children
00055       result = new T( nonNullChildren );
00056       _nodes.push_back( result );
00057     } else if( nonNullChildren.size() == 1 ) {
00058       // only one child remains, so there's no need for an and/or node
00059       result = nonNullChildren[0];
00060     } else {
00061       // no children remain, so propogate 0 up the chain
00062       result = 0;
00063     }
00064 
00065     if( result != newNode ) {
00066       delete newNode;
00067     }
00068 
00069     return result;
00070   }
00071 
00072 
00073   template<class T>
00074   indri::lang::Node* afterWindow( T* oldNode, T* newNode ) {
00075     const std::vector<indri::lang::RawExtentNode*>& children = newNode->getChildren();
00076     std::vector<indri::lang::RawExtentNode*> nonNullChildren;
00077 
00078     for( unsigned int i=0; i<children.size(); i++ ) {
00079       if( children[i] )
00080         nonNullChildren.push_back( children[i] );
00081     }
00082 
00083     indri::lang::Node* result;
00084 
00085     if( children.size() == nonNullChildren.size() ) {
00086       // all children are legit, so there's no need to change this node
00087       result = newNode;
00088       _nodes.push_back(result);
00089     } else if( nonNullChildren.size() >= 2 ) {
00090       // some of the children aren't legitimate, so make a new node with fewer children
00091       result = new T( newNode->getWindowSize(), nonNullChildren );
00092       _nodes.push_back(result);
00093     } else if( nonNullChildren.size() == 1 ) {
00094       // only one child remains, so there's no need for a window node
00095       result = nonNullChildren[0];
00096     } else {
00097       // no children remain, so propogate 0 up the chain
00098       result = 0;
00099     }
00100 
00101     if( result != newNode ) {
00102       delete newNode;
00103     }
00104 
00105     return result;
00106   }
00107 
00108 public:
00109   UnknownRemoverCopier( Repository& repository ) :
00110     _repository(repository)
00111   {
00112   }
00113 
00114   indri::lang::Node* defaultAfter( indri::lang::Node* oldNode, indri::lang::Node* newNode ) {
00115     _nodes.push_back( newNode );
00116     return newNode;
00117   }
00118 
00119   indri::lang::Node* after( indri::lang::IndexTerm* oldTerm, indri::lang::IndexTerm* newTerm ) {
00120     std::string processed = newTerm->getText();
00121     if( newTerm->getStemmed() == false )
00122       processed = _repository.processTerm( newTerm->getText() );
00123 
00124     if( processed.length() == 0 ) {
00125       delete newTerm;
00126       newTerm = 0;
00127     } else {
00128       _nodes.push_back( newTerm );
00129     }
00130 
00131     return newTerm;
00132   }
00133 
00134   indri::lang::Node* after( indri::lang::Field* oldField, indri::lang::Field* newField ) {
00135     int fieldID = _repository.index()->field( newField->getFieldName().c_str() );
00136 
00137     if( !fieldID ) {
00138       delete newField;
00139       newField = 0;
00140     } else {
00141       _nodes.push_back( newField );
00142     }
00143 
00144     return newField;
00145   }
00146 
00147   indri::lang::Node* after( indri::lang::ExtentAnd* oldNode, indri::lang::ExtentAnd* newNode ) {
00148     return afterExtentCombine<indri::lang::ExtentAnd>( oldNode, newNode );
00149   }
00150 
00151   indri::lang::Node* after( indri::lang::ExtentOr* oldNode, indri::lang::ExtentOr* newNode ) {
00152     return afterExtentCombine<indri::lang::ExtentOr>( oldNode, newNode );
00153   }
00154 
00155   indri::lang::Node* after( indri::lang::ExtentInside* oldNode, indri::lang::ExtentInside* newNode ) {
00156     if( newNode->getInner() && newNode->getOuter() ) {
00157       _nodes.push_back( newNode );
00158       return newNode;
00159     } else {
00160       delete newNode;
00161       return 0;
00162     }
00163   }
00164 
00165   indri::lang::Node* after( indri::lang::ODNode* oldNode, indri::lang::ODNode* newNode ) {
00166     return afterWindow<indri::lang::ODNode>(oldNode, newNode);
00167   }
00168 
00169   indri::lang::Node* after( indri::lang::UWNode* oldNode, indri::lang::UWNode* newNode ) {
00170     return afterWindow<indri::lang::UWNode>(oldNode, newNode);
00171   }
00172 
00173   indri::lang::Node* after( indri::lang::RawScorerNode* oldNode, indri::lang::RawScorerNode* newNode ) {
00174     _nodes.push_back( newNode );
00175     return newNode;
00176   }
00177 
00178   indri::lang::Node* after( indri::lang::ContextCounterNode* oldNode, indri::lang::ContextCounterNode* newNode ) {
00179     _nodes.push_back( newNode );
00180     return newNode;
00181   }
00182 
00183   indri::lang::Node* after( indri::lang::ScoredExtentNode* oldNode, indri::lang::ScoredExtentNode* newNode ) {
00184     _nodes.push_back( newNode );
00185     return newNode;
00186   }
00187 };
00188 
00189 #endif // INDRI_UNKNOWNREMOVERCOPIER_HPP
00190 
00191 

Generated on Wed Nov 3 12:59:07 2004 for Lemur Toolkit by doxygen1.2.18