00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef INDRI_QUERYSPEC_HPP
00014 #define INDRI_QUERYSPEC_HPP
00015
00016 #include <vector>
00017 #include <string>
00018 #include <sstream>
00019 #include <indri/greedy_vector>
00020 #include <algorithm>
00021 #include "lemur-platform.h"
00022
00023 #include "indri/Walker.hpp"
00024 #include "indri/Copier.hpp"
00025 #include "indri/Packer.hpp"
00026 #include "indri/Unpacker.hpp"
00027
00028 #include "Exception.hpp"
00029
00030 template<class T>
00031 bool equal( const std::vector<T>& one, const std::vector<T>& two ) {
00032 if( one.size() != two.size() )
00033 return false;
00034
00035 for( unsigned int i=0; i<one.size(); i++ ) {
00036 if( *one[i] == *two[i] )
00037 continue;
00038
00039 return false;
00040 }
00041
00042 return true;
00043 }
00044
00045 template<class T>
00046 bool unordered_equal( std::vector<T>& one, std::vector<T>& two ) {
00047 if( one.size() != two.size() )
00048 return false;
00049
00050 std::vector<T> one_copy;
00051 for( unsigned int i=0; i<one.size(); i++ ) {
00052 one_copy.push_back( one[i] );
00053 }
00054
00055
00056
00057 for( unsigned int i=0; i<two.size(); i++ ) {
00058 for( unsigned int j=0; j<one_copy.size(); j++ ) {
00059 if( *one_copy[j] == *two[i] ) {
00060
00061 one_copy.erase( one_copy.begin() + j );
00062 break;
00063 }
00064 }
00065 }
00066
00067 return one_copy.size() == 0;
00068 }
00069
00070 namespace indri {
00071 namespace lang {
00072 class Node {
00073 protected:
00074 std::string _name;
00075
00076 public:
00077 Node() {
00078 std::stringstream s;
00079 s << PTR_TO_INT(this);
00080 _name = s.str();
00081 }
00082
00083 virtual ~Node() {
00084 }
00085
00086 void setNodeName( const std::string& name ) {
00087 _name = name;
00088 }
00089
00090 const std::string& nodeName() const {
00091 return _name;
00092 }
00093
00094 virtual std::string typeName() const {
00095 return "Node";
00096 }
00097
00098 virtual std::string queryText() const = 0;
00099
00100 virtual bool operator < ( Node& other ) {
00101
00102 if( typeName() != other.typeName() )
00103 return typeName() < other.typeName();
00104
00105 return queryText() < other.queryText();
00106 }
00107
00108 virtual bool operator== ( Node& other ) {
00109 return &other == this;
00110 }
00111
00112 virtual void pack( Packer& packer ) = 0;
00113 virtual void walk( Walker& walker ) = 0;
00114 virtual Node* copy( Copier& copier ) = 0;
00115 };
00116
00117 class RawExtentNode : public Node {};
00118 class ScoredExtentNode : public Node {};
00119 class AccumulatorNode : public Node {};
00120
00121 class IndexTerm : public RawExtentNode {
00122 private:
00123 std::string _text;
00124 bool _stemmed;
00125
00126 public:
00127 IndexTerm( const std::string& text, bool stemmed = false ) : _text(text), _stemmed(stemmed)
00128 {
00129 }
00130
00131 IndexTerm( Unpacker& unpacker ) {
00132 _text = unpacker.getString( "termName" );
00133 _stemmed = unpacker.getBoolean( "stemmed" );
00134 }
00135
00136 const std::string& getText() { return _text; }
00137
00138 bool operator==( Node& node ) {
00139 IndexTerm* other = dynamic_cast<IndexTerm*>(&node);
00140
00141 if( !other )
00142 return false;
00143
00144 if( other == this )
00145 return true;
00146
00147 return other->_text == _text;
00148 }
00149
00150 std::string typeName() const {
00151 return "IndexTerm";
00152 }
00153
00154 std::string queryText() const {
00155 std::stringstream qtext;
00156
00157 if( _stemmed ) {
00158 qtext << '"' << _text << '"';
00159 } else {
00160 qtext << _text;
00161 }
00162
00163 return qtext.str();
00164 }
00165
00166 void setStemmed(bool stemmed) {
00167 _stemmed = stemmed;
00168 }
00169
00170 bool getStemmed() const {
00171 return _stemmed;
00172 }
00173
00174 void pack( Packer& packer ) {
00175 packer.before(this);
00176 packer.put( "termName", _text );
00177 packer.put( "stemmed", _stemmed );
00178 packer.after(this);
00179 }
00180
00181 void walk( Walker& walker ) {
00182 walker.before(this);
00183 walker.after(this);
00184 }
00185
00186 Node* copy( Copier& copier ) {
00187 copier.before(this);
00188 IndexTerm* termCopy = new IndexTerm(*this);
00189 return copier.after(this, termCopy);
00190 }
00191 };
00192
00193 class Field : public RawExtentNode {
00194 private:
00195 std::string _fieldName;
00196
00197 public:
00198 Field( const std::string& name ) : _fieldName(name)
00199 {
00200 }
00201
00202 Field( Unpacker& unpacker ) {
00203 _fieldName = unpacker.getString( "fieldName" );
00204 }
00205
00206 const std::string& getFieldName() const { return _fieldName; }
00207
00208 std::string typeName() const {
00209 return "Field";
00210 }
00211
00212 std::string queryText() const {
00213 return _fieldName;
00214 }
00215
00216 void pack( Packer& packer ) {
00217 packer.before(this);
00218 packer.put( "fieldName", _fieldName );
00219 packer.after(this);
00220 }
00221
00222 void walk( Walker& walker ) {
00223 walker.before(this);
00224 walker.after(this);
00225 }
00226
00227 Node* copy( Copier& copier ) {
00228 copier.before(this);
00229 Field* newField = new Field(*this);
00230 return copier.after(this, newField);
00231 }
00232
00233 bool operator== ( Node& other ) {
00234 Field* otherField = dynamic_cast<Field*>(&other);
00235
00236 if( !otherField )
00237 return false;
00238
00239 return otherField->getFieldName() == getFieldName();
00240 }
00241 };
00242
00243 class ExtentInside : public RawExtentNode {
00244 private:
00245 RawExtentNode* _inner;
00246 RawExtentNode* _outer;
00247
00248 public:
00249 ExtentInside( RawExtentNode* inner, RawExtentNode* outer ) :
00250 _inner(inner),
00251 _outer(outer)
00252 {
00253 }
00254
00255 ExtentInside( Unpacker& unpacker ) {
00256 _inner = unpacker.getRawExtentNode( "inner" );
00257 _outer = unpacker.getRawExtentNode( "outer" );
00258 }
00259
00260 bool operator== ( Node& o ) {
00261 ExtentInside* other = dynamic_cast<ExtentInside*>(&o);
00262
00263 return other &&
00264 *_inner == *other->_inner &&
00265 *_outer == *other->_outer;
00266 }
00267
00268 std::string typeName() const {
00269 return "ExtentInside";
00270 }
00271
00272 std::string queryText() const {
00273 std::stringstream qtext;
00274 qtext << _inner->queryText()
00275 << "."
00276 << _outer->queryText();
00277
00278 return qtext.str();
00279 }
00280
00281 RawExtentNode* getInner() {
00282 return _inner;
00283 }
00284
00285 RawExtentNode* getOuter() {
00286 return _outer;
00287 }
00288
00289 void pack( Packer& packer ) {
00290 packer.before(this);
00291 packer.put( "inner", _inner );
00292 packer.put( "outer", _outer );
00293 packer.after(this);
00294 }
00295
00296 void walk( Walker& walker ) {
00297 walker.before(this);
00298 _inner->walk(walker);
00299 _outer->walk(walker);
00300 walker.after(this);
00301 }
00302
00303 Node* copy( Copier& copier ) {
00304 copier.before(this);
00305
00306 RawExtentNode* newInner = dynamic_cast<RawExtentNode*>(_inner->copy(copier));
00307 RawExtentNode* newOuter = dynamic_cast<RawExtentNode*>(_outer->copy(copier));
00308 ExtentInside* extentInsideCopy = new ExtentInside( newInner, newOuter );
00309 extentInsideCopy->setNodeName( nodeName() );
00310
00311 return copier.after(this, extentInsideCopy);
00312 }
00313 };
00314
00315 class ExtentOr : public RawExtentNode {
00316 private:
00317 std::vector<RawExtentNode*> _children;
00318
00319 public:
00320 ExtentOr() {}
00321 ExtentOr( const std::vector<RawExtentNode*>& children ) :
00322 _children(children)
00323 {
00324 }
00325
00326 ExtentOr( Unpacker& unpacker ) {
00327 _children = unpacker.getRawExtentVector( "children" );
00328 }
00329
00330 std::string typeName() const {
00331 return "ExtentOr";
00332 }
00333
00334 std::string queryText() const {
00335 std::stringstream qtext;
00336
00337 for( unsigned int i=0; i<_children.size(); i++ ) {
00338 if(i>0) qtext << " ";
00339 qtext << _children[i]->queryText();
00340 }
00341
00342 return qtext.str();
00343 }
00344
00345 void addChild( RawExtentNode* node ) {
00346 _children.push_back(node);
00347 }
00348
00349 std::vector<RawExtentNode*>& getChildren() {
00350 return _children;
00351 }
00352
00353 bool operator== ( Node& node ) {
00354 ExtentOr* other = dynamic_cast<ExtentOr*>(&node);
00355
00356 if( other == this )
00357 return true;
00358
00359 if( !other )
00360 return false;
00361
00362 return unordered_equal( other->_children, _children );
00363 }
00364
00365 void pack( Packer& packer ) {
00366 packer.before(this);
00367 packer.put( "children", _children );
00368 packer.after(this);
00369 }
00370
00371 void walk( Walker& walker ) {
00372 walker.before(this);
00373 for( unsigned int i=0; i<_children.size(); i++ ) {
00374 _children[i]->walk(walker);
00375 }
00376 walker.after(this);
00377 }
00378
00379 Node* copy( Copier& copier ) {
00380 copier.before(this);
00381
00382 ExtentOr* duplicate = new ExtentOr();
00383 duplicate->setNodeName( nodeName() );
00384 for( unsigned int i=0; i<_children.size(); i++ ) {
00385 indri::lang::Node* childNode = _children[i]->copy(copier);
00386 duplicate->addChild( dynamic_cast<RawExtentNode*>(childNode) );
00387 }
00388
00389 return copier.after(this, duplicate);
00390 }
00391 };
00392
00393 class ExtentAnd : public RawExtentNode {
00394 private:
00395 std::vector<RawExtentNode*> _children;
00396
00397 public:
00398 ExtentAnd() {}
00399 ExtentAnd( const std::vector<RawExtentNode*>& children ) :
00400 _children(children)
00401 {
00402 }
00403
00404 ExtentAnd( Unpacker& unpacker ) {
00405 _children = unpacker.getRawExtentVector( "children" );
00406 }
00407
00408 std::string typeName() const {
00409 return "ExtentAnd";
00410 }
00411
00412 std::string queryText() const {
00413 std::stringstream qtext;
00414
00415 for( unsigned int i=0; i<_children.size(); i++ ) {
00416 if(i>0) qtext << ",";
00417 qtext << _children[i]->queryText();
00418 }
00419
00420 return qtext.str();
00421 }
00422
00423 void addChild( RawExtentNode* node ) {
00424 _children.push_back(node);
00425 }
00426
00427 std::vector<RawExtentNode*>& getChildren() {
00428 return _children;
00429 }
00430
00431 bool operator== ( Node& node ) {
00432 ExtentAnd* other = dynamic_cast<ExtentAnd*>(&node);
00433
00434 if( other == this )
00435 return true;
00436
00437 if( !other )
00438 return false;
00439
00440 return unordered_equal( other->_children, _children );
00441 }
00442
00443 void pack( Packer& packer ) {
00444 packer.before(this);
00445 packer.put( "children", _children );
00446 packer.after(this);
00447 }
00448
00449 void walk( Walker& walker ) {
00450 walker.before(this);
00451 for( unsigned int i=0; i<_children.size(); i++ ) {
00452 _children[i]->walk(walker);
00453 }
00454 walker.after(this);
00455 }
00456
00457 Node* copy( Copier& copier ) {
00458 copier.before(this);
00459
00460 ExtentAnd* duplicate = new ExtentAnd();
00461 duplicate->setNodeName( nodeName() );
00462 for( unsigned int i=0; i<_children.size(); i++ ) {
00463 Node* child = _children[i]->copy(copier);
00464 duplicate->addChild( dynamic_cast<RawExtentNode*>(child) );
00465 }
00466
00467 return copier.after(this, duplicate);
00468 }
00469 };
00470
00471 class UWNode : public RawExtentNode {
00472 private:
00473 std::vector<RawExtentNode*> _children;
00474 int _windowSize;
00475
00476 public:
00477 UWNode() :
00478 _windowSize(-1)
00479 {
00480 }
00481
00482 UWNode( int windowSize, std::vector<RawExtentNode*>& children ) :
00483 _windowSize(windowSize),
00484 _children(children)
00485 {
00486 }
00487
00488 UWNode( Unpacker& unpacker ) {
00489 _windowSize = (int) unpacker.getInteger( "windowSize" );
00490 _children = unpacker.getRawExtentVector( "children" );
00491 }
00492
00493 std::string typeName() const {
00494 return "UWNode";
00495 }
00496
00497 std::string queryText() const {
00498 std::stringstream qtext;
00499
00500 if( _windowSize >= 0 )
00501 qtext << "#uw" << _windowSize << "( ";
00502 else
00503 qtext << "#uw( ";
00504
00505 for( unsigned int i=0; i<_children.size(); i++ ) {
00506 qtext << _children[i]->queryText() << " ";
00507 }
00508 qtext << ")";
00509 return qtext.str();
00510 }
00511
00512 void setWindowSize( int windowSize ) {
00513 _windowSize = windowSize;
00514 }
00515
00516 void setWindowSize( const std::string& windowSize ) {
00517 setWindowSize( atoi( windowSize.c_str() ) );
00518 }
00519
00520 int getWindowSize() const {
00521 return _windowSize;
00522 }
00523
00524 const std::vector<RawExtentNode*>& getChildren() const {
00525 return _children;
00526 }
00527
00528 void addChild( RawExtentNode* node ) {
00529 _children.push_back( node );
00530 }
00531
00532 bool operator== ( Node& node ) {
00533 UWNode* other = dynamic_cast<UWNode*>(&node);
00534
00535 if( !other )
00536 return false;
00537
00538 if( other == this )
00539 return true;
00540
00541 if( other->_windowSize != _windowSize ) {
00542 return false;
00543 }
00544
00545 return unordered_equal( _children, other->_children );
00546 }
00547
00548 void pack( Packer& packer ) {
00549 packer.before(this);
00550 packer.put( "windowSize", _windowSize );
00551 packer.put( "children", _children );
00552 packer.after(this);
00553 }
00554
00555 void walk( Walker& walker ) {
00556 walker.before(this);
00557 for(unsigned int i=0; i<_children.size(); i++) {
00558 _children[i]->walk(walker);
00559 }
00560 walker.after(this);
00561 }
00562
00563 Node* copy( Copier& copier ) {
00564 copier.before(this);
00565
00566 UWNode* duplicate = new UWNode();
00567 duplicate->setNodeName( nodeName() );
00568 duplicate->setWindowSize( _windowSize );
00569 for(unsigned int i=0; i<_children.size(); i++) {
00570 Node* child = _children[i]->copy(copier);
00571 duplicate->addChild( dynamic_cast<RawExtentNode*>(child) );
00572 }
00573
00574 return copier.after(this, duplicate);
00575 }
00576 };
00577
00578 class ODNode : public RawExtentNode {
00579 private:
00580 int _windowSize;
00581 std::vector<RawExtentNode*> _children;
00582
00583 public:
00584 ODNode( int windowSize, std::vector<RawExtentNode*>& children ) :
00585 _windowSize(windowSize),
00586 _children(children)
00587 {
00588 }
00589
00590 ODNode() :
00591 _windowSize(-1)
00592 {
00593 }
00594
00595 ODNode( Unpacker& unpacker ) {
00596 _windowSize = (int) unpacker.getInteger( "windowSize" );
00597 _children = unpacker.getRawExtentVector( "children" );
00598 }
00599
00600 std::string typeName() const {
00601 return "ODNode";
00602 }
00603
00604 std::string queryText() const {
00605 std::stringstream qtext;
00606 if( _windowSize >= 0 )
00607 qtext << "#" << _windowSize << "( ";
00608 else
00609 qtext << "#od( ";
00610
00611 for( unsigned int i=0; i<_children.size(); i++ ) {
00612 qtext << _children[i]->queryText() << " ";
00613 }
00614 qtext << ")";
00615 return qtext.str();
00616 }
00617
00618 const std::vector<RawExtentNode*>& getChildren() const {
00619 return _children;
00620 }
00621
00622 void setWindowSize( int windowSize ) {
00623 _windowSize = windowSize;
00624 }
00625
00626 void setWindowSize( const std::string& windowSize ) {
00627 setWindowSize( atoi( windowSize.c_str() ) );
00628 }
00629
00630 int getWindowSize() const {
00631 return _windowSize;
00632 }
00633
00634 void addChild( RawExtentNode* node ) {
00635 _children.push_back( node );
00636 }
00637
00638 bool operator== ( Node& node ) {
00639 ODNode* other = dynamic_cast<ODNode*>(&node);
00640
00641 if( ! other )
00642 return false;
00643
00644 if( other == this )
00645 return true;
00646
00647 if( other->_windowSize != _windowSize )
00648 return false;
00649
00650 if( _children.size() != other->_children.size() )
00651 return false;
00652
00653 return equal( _children, other->_children );
00654 }
00655
00656 void pack( Packer& packer ) {
00657 packer.before(this);
00658 packer.put( "windowSize", _windowSize );
00659 packer.put( "children", _children );
00660 packer.after(this);
00661 }
00662
00663 void walk( Walker& walker ) {
00664 walker.before(this);
00665 for(unsigned int i=0; i<_children.size(); i++) {
00666 _children[i]->walk(walker);
00667 }
00668 walker.after(this);
00669 }
00670
00671 Node* copy( Copier& copier ) {
00672 copier.before(this);
00673
00674 ODNode* duplicate = new ODNode();
00675 duplicate->setWindowSize( _windowSize );
00676 for(unsigned int i=0; i<_children.size(); i++) {
00677 Node* child = _children[i]->copy(copier);
00678 duplicate->addChild( dynamic_cast<RawExtentNode*>(child) );
00679 }
00680
00681 return copier.after(this, duplicate);
00682 }
00683 };
00684
00685 class FilReqNode : public RawExtentNode {
00686 private:
00687 RawExtentNode* _filtered;
00688 RawExtentNode* _required;
00689
00690 public:
00691 FilReqNode( RawExtentNode* filtered, RawExtentNode* required ) {
00692 _filtered = filtered;
00693 _required = required;
00694 }
00695
00696 FilReqNode( Unpacker& unpacker ) {
00697 _filtered = unpacker.getRawExtentNode( "filtered" );
00698 _required = unpacker.getRawExtentNode( "required" );
00699 }
00700
00701 std::string typeName() const {
00702 return "FilReqNode";
00703 }
00704
00705 std::string queryText() const {
00706 std::stringstream qtext;
00707
00708 qtext << "#filreq("
00709 << _filtered->queryText()
00710 << " "
00711 << _required->queryText()
00712 << ")";
00713 return qtext.str();
00714 }
00715
00716 RawExtentNode* getFiltered() {
00717 return _filtered;
00718 }
00719
00720 RawExtentNode* getRequired() {
00721 return _required;
00722 }
00723
00724 bool operator== ( Node& node ) {
00725 FilReqNode* other = dynamic_cast<FilReqNode*>(&node);
00726
00727 if( !other )
00728 return false;
00729
00730 return (*_filtered) == (*other->getFiltered()) &&
00731 (*_required) == (*other->getRequired());
00732 }
00733
00734 void pack( Packer& packer ) {
00735 packer.before(this);
00736 packer.put("filtered", _filtered);
00737 packer.put("required", _required);
00738 packer.after(this);
00739 }
00740
00741 void walk( Walker& walker ) {
00742 walker.before(this);
00743 _filtered->walk(walker);
00744 _required->walk(walker);
00745 walker.after(this);
00746 }
00747
00748 Node* copy( Copier& copier ) {
00749 copier.before(this);
00750 RawExtentNode* filteredDuplicate = dynamic_cast<RawExtentNode*>(_filtered->copy(copier));
00751 RawExtentNode* requiredDuplicate = dynamic_cast<RawExtentNode*>(_required->copy(copier));
00752 FilReqNode* duplicate = new FilReqNode( filteredDuplicate, requiredDuplicate );
00753 return copier.after(this, duplicate);
00754 }
00755 };
00756
00757 class FilRejNode : public RawExtentNode {
00758 private:
00759 RawExtentNode* _filtered;
00760 RawExtentNode* _disallowed;
00761
00762 public:
00763 FilRejNode( RawExtentNode* filtered, RawExtentNode* disallowed ) {
00764 _filtered = filtered;
00765 _disallowed = disallowed;
00766 }
00767
00768 FilRejNode( Unpacker& unpacker ) {
00769 _filtered = unpacker.getRawExtentNode( "filtered" );
00770 _disallowed = unpacker.getRawExtentNode( "disallowed" );
00771 }
00772
00773 std::string typeName() const {
00774 return "FilRejNode";
00775 }
00776
00777 std::string queryText() const {
00778 std::stringstream qtext;
00779
00780 qtext << "#filrej("
00781 << _filtered->queryText()
00782 << " "
00783 << _disallowed->queryText()
00784 << ")";
00785
00786 return qtext.str();
00787 }
00788
00789 RawExtentNode* getFiltered() {
00790 return _filtered;
00791 }
00792
00793 RawExtentNode* getDisallowed() {
00794 return _disallowed;
00795 }
00796
00797 bool operator== ( Node& node ) {
00798 FilRejNode* other = dynamic_cast<FilRejNode*>(&node);
00799
00800 if( !other )
00801 return false;
00802
00803 return (*_filtered) == (*other->getFiltered()) &&
00804 (*_disallowed) == (*other->getDisallowed());
00805 }
00806
00807 void pack( Packer& packer ) {
00808 packer.before(this);
00809 packer.put("filtered", _filtered);
00810 packer.put("disallowed", _disallowed);
00811 packer.after(this);
00812 }
00813
00814 void walk( Walker& walker ) {
00815 walker.before(this);
00816 _filtered->walk(walker);
00817 _disallowed->walk(walker);
00818 walker.after(this);
00819 }
00820
00821 Node* copy( Copier& copier ) {
00822 copier.before(this);
00823 RawExtentNode* filteredDuplicate = dynamic_cast<RawExtentNode*>(_filtered->copy(copier));
00824 RawExtentNode* disallowedDuplicate = dynamic_cast<RawExtentNode*>(_disallowed->copy(copier));
00825 FilRejNode* duplicate = new FilRejNode( filteredDuplicate, disallowedDuplicate );
00826 return copier.after(this, duplicate);
00827 }
00828 };
00829
00830 class FieldLessNode : public RawExtentNode {
00831 private:
00832 RawExtentNode* _field;
00833 UINT64 _constant;
00834
00835 public:
00836 FieldLessNode( RawExtentNode* field, UINT64 constant ) :
00837 _field(field),
00838 _constant(constant) {
00839 }
00840
00841 FieldLessNode( Unpacker& unpacker ) {
00842 _field = unpacker.getRawExtentNode( "field" );
00843 _constant = unpacker.getInteger("constant");
00844 }
00845
00846 std::string typeName() const {
00847 return "FieldLessNode";
00848 }
00849
00850 std::string queryText() const {
00851 std::stringstream qtext;
00852 qtext << "#less(" << _field->queryText() << " " << _constant << ")";
00853 return qtext.str();
00854 }
00855
00856 UINT64 getConstant() const {
00857 return _constant;
00858 }
00859
00860 RawExtentNode* getField() {
00861 return _field;
00862 }
00863
00864 bool operator== ( Node& node ) {
00865 FieldLessNode* other = dynamic_cast<FieldLessNode*>(&node);
00866
00867 return other &&
00868 other->getConstant() == _constant &&
00869 *other->getField() == *_field;
00870 }
00871
00872 Node* copy( Copier& copier ) {
00873 copier.before(this);
00874 RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
00875 FieldLessNode* duplicate = new FieldLessNode( fieldDuplicate, _constant );
00876 return copier.after(this, duplicate);
00877 }
00878
00879 void walk( Walker& walker ) {
00880 walker.before(this);
00881 _field->walk(walker);
00882 walker.after(this);
00883 }
00884
00885 void pack( Packer& packer ) {
00886 packer.before(this);
00887 packer.put("field", _field);
00888 packer.put("constant", _constant);
00889 packer.after(this);
00890 }
00891 };
00892
00893 class FieldGreaterNode : public RawExtentNode {
00894 private:
00895 RawExtentNode* _field;
00896 UINT64 _constant;
00897
00898 public:
00899 FieldGreaterNode( RawExtentNode* field, UINT64 constant ) :
00900 _field(field),
00901 _constant(constant) {
00902 }
00903
00904 FieldGreaterNode( Unpacker& unpacker ) {
00905 _field = unpacker.getRawExtentNode( "field" );
00906 _constant = unpacker.getInteger("constant");
00907 }
00908
00909 std::string typeName() const {
00910 return "FieldGreaterNode";
00911 }
00912
00913 std::string queryText() const {
00914 std::stringstream qtext;
00915 qtext << "#greater(" << _field->queryText() << " " << _constant << ")";
00916 return qtext.str();
00917 }
00918
00919 UINT64 getConstant() const {
00920 return _constant;
00921 }
00922
00923 RawExtentNode* getField() {
00924 return _field;
00925 }
00926
00927 bool operator== ( Node& node ) {
00928 FieldGreaterNode* other = dynamic_cast<FieldGreaterNode*>(&node);
00929
00930 return other &&
00931 other->getConstant() == _constant &&
00932 *other->getField() == *_field;
00933 }
00934
00935 Node* copy( Copier& copier ) {
00936 copier.before(this);
00937 RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
00938 FieldGreaterNode* duplicate = new FieldGreaterNode( fieldDuplicate, _constant );
00939 return copier.after(this, duplicate);
00940 }
00941
00942 void walk( Walker& walker ) {
00943 walker.before(this);
00944 _field->walk(walker);
00945 walker.after(this);
00946 }
00947
00948 void pack( Packer& packer ) {
00949 packer.before(this);
00950 packer.put("field", _field);
00951 packer.put("constant", _constant);
00952 packer.after(this);
00953 }
00954 };
00955
00956 class FieldBetweenNode : public RawExtentNode {
00957 private:
00958 RawExtentNode* _field;
00959 UINT64 _low;
00960 UINT64 _high;
00961
00962 public:
00963 FieldBetweenNode( RawExtentNode* field, UINT64 low, UINT64 high ) :
00964 _field(field),
00965 _low(low),
00966 _high(high) {
00967 }
00968
00969 FieldBetweenNode( Unpacker& unpacker ) {
00970 _field = unpacker.getRawExtentNode( "field" );
00971 _low = unpacker.getInteger("low");
00972 _high = unpacker.getInteger("high");
00973 }
00974
00975 std::string typeName() const {
00976 return "FieldBetweenNode";
00977 }
00978
00979 std::string queryText() const {
00980 std::stringstream qtext;
00981 qtext << "#between(" << _field->queryText() << " " << _low << " " << _high << ")";
00982 return qtext.str();
00983 }
00984
00985 UINT64 getLow() const {
00986 return _low;
00987 }
00988
00989 UINT64 getHigh() const {
00990 return _high;
00991 }
00992
00993 RawExtentNode* getField() {
00994 return _field;
00995 }
00996
00997 bool operator== ( Node& node ) {
00998 FieldBetweenNode* other = dynamic_cast<FieldBetweenNode*>(&node);
00999
01000 return other &&
01001 other->getLow() == _low &&
01002 other->getHigh() == _high &&
01003 *other->getField() == *_field;
01004 }
01005
01006 Node* copy( Copier& copier ) {
01007 copier.before(this);
01008 RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
01009 FieldBetweenNode* duplicate = new FieldBetweenNode( fieldDuplicate, _low, _high );
01010 return copier.after(this, duplicate);
01011 }
01012
01013 void walk( Walker& walker ) {
01014 walker.before(this);
01015 _field->walk(walker);
01016 walker.after(this);
01017 }
01018
01019 void pack( Packer& packer ) {
01020 packer.before(this);
01021 packer.put("field", _field);
01022 packer.put("low", _low);
01023 packer.put("high", _high);
01024 packer.after(this);
01025 }
01026 };
01027
01028 class FieldEqualsNode : public RawExtentNode {
01029 private:
01030 RawExtentNode* _field;
01031 UINT64 _constant;
01032
01033 public:
01034 FieldEqualsNode( RawExtentNode* field, UINT64 constant ) :
01035 _field(field),
01036 _constant(constant) {
01037 }
01038
01039 FieldEqualsNode( Unpacker& unpacker ) {
01040 _field = unpacker.getRawExtentNode("field");
01041 _constant = unpacker.getInteger("constant");
01042 }
01043
01044 std::string typeName() const {
01045 return "FieldEqualsNode";
01046 }
01047
01048 std::string queryText() const {
01049 std::stringstream qtext;
01050 qtext << "#equals(" << _field->queryText() << " " << _constant << ")";
01051 return qtext.str();
01052 }
01053
01054 UINT64 getConstant() const {
01055 return _constant;
01056 }
01057
01058 RawExtentNode* getField() {
01059 return _field;
01060 }
01061
01062 bool operator== ( Node& node ) {
01063 FieldEqualsNode* other = dynamic_cast<FieldEqualsNode*>(&node);
01064
01065 return other &&
01066 other->getConstant() == _constant &&
01067 *other->getField() == *_field;
01068 }
01069
01070 Node* copy( Copier& copier ) {
01071 copier.before(this);
01072 RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
01073 FieldEqualsNode* duplicate = new FieldEqualsNode( fieldDuplicate, _constant );
01074 return copier.after(this, duplicate);
01075 }
01076
01077 void walk( Walker& walker ) {
01078 walker.before(this);
01079 _field->walk(walker);
01080 walker.after(this);
01081 }
01082
01083 void pack( Packer& packer ) {
01084 packer.before(this);
01085 packer.put("field", _field);
01086 packer.put("constant", _constant);
01087 packer.after(this);
01088 }
01089 };
01090
01091 class RawScorerNode : public ScoredExtentNode {
01092 private:
01093 UINT64 _occurrences;
01094 UINT64 _contextSize;
01095 UINT64 _maximumContextLength;
01096 UINT64 _minimumContextLength;
01097 UINT64 _maximumOccurrences;
01098 double _maximumContextFraction;
01099
01100 RawExtentNode* _raw;
01101 RawExtentNode* _context;
01102 std::string _smoothing;
01103
01104 public:
01105 RawScorerNode( RawExtentNode* raw, RawExtentNode* context, std::string smoothing = "method:dirichlet,mu:2500" ) {
01106 _raw = raw;
01107 _context = context;
01108
01109 _occurrences = 0;
01110 _contextSize = 0;
01111 _maximumContextLength = MAX_INT32;
01112 _maximumOccurrences = MAX_INT32;
01113 _minimumContextLength = 1;
01114 _maximumContextFraction = 1;
01115 _smoothing = smoothing;
01116 }
01117
01118 RawScorerNode( Unpacker& unpacker ) {
01119 _raw = unpacker.getRawExtentNode( "raw" );
01120 _context = unpacker.getRawExtentNode( "context" );
01121
01122 _occurrences = unpacker.getInteger( "occurrences" );
01123 _contextSize = unpacker.getInteger( "contextSize" );
01124 _maximumContextLength = unpacker.getInteger( "maximumContextLength" );
01125 _minimumContextLength = unpacker.getInteger( "minimumContextLength" );
01126 _maximumOccurrences = unpacker.getInteger( "maximumOccurrences" );
01127 _maximumContextFraction = unpacker.getDouble( "maximumContextFraction" );
01128 _smoothing = unpacker.getString( "smoothing" );
01129 }
01130
01131 std::string typeName() const {
01132 return "RawScorerNode";
01133 }
01134
01135 std::string queryText() const {
01136 std::stringstream qtext;
01137
01138 qtext << _raw->queryText();
01139 if( _context ) {
01140
01141 int dot = qtext.str().find('.');
01142 if( dot < 0 )
01143 qtext << '.';
01144
01145 qtext << "(" << _context->queryText() << ")";
01146 }
01147
01148 return qtext.str();
01149 }
01150
01151 UINT64 getOccurrences() const {
01152 return _occurrences;
01153 }
01154
01155 UINT64 getContextSize() const {
01156 return _contextSize;
01157 }
01158
01159 const std::string& getSmoothing() const {
01160 return _smoothing;
01161 }
01162
01163 UINT64 getMaxContextLength() const {
01164 return _maximumContextLength;
01165 }
01166
01167 UINT64 getMinContextLength() const {
01168 return _minimumContextLength;
01169 }
01170
01171 UINT64 getMaxOccurrences() const {
01172 return _maximumOccurrences;
01173 }
01174
01175 double getMaxContextFraction() const {
01176 return _maximumContextFraction;
01177 }
01178
01179 void setStatistics( UINT64 occurrences, UINT64 contextSize ) {
01180 _occurrences = occurrences;
01181 _contextSize = contextSize;
01182 }
01183
01184 void setStatistics( UINT64 occurrences, UINT64 contextSize,
01185 UINT64 maxOccurrences, UINT64 minContextLength, UINT64 maxContextLength,
01186 double maxContextFraction ) {
01187 _occurrences = occurrences;
01188 _contextSize = contextSize;
01189
01190 _maximumOccurrences = maxOccurrences;
01191 _minimumContextLength = minContextLength;
01192 _maximumContextLength = maxContextLength;
01193 _maximumContextFraction = maxContextFraction;
01194 }
01195
01196 void setContext( RawExtentNode* context ) {
01197 _context = context;
01198 }
01199
01200 void setRawExtent( RawExtentNode* rawExtent ) {
01201 _raw = rawExtent;
01202 }
01203
01204 void setSmoothing( const std::string& smoothing ) {
01205 _smoothing = smoothing;
01206 }
01207
01208 RawExtentNode* getContext() {
01209 return _context;
01210 }
01211
01212 RawExtentNode* getRawExtent() {
01213 return _raw;
01214 }
01215
01216 void pack( Packer& packer ) {
01217 packer.before(this);
01218 packer.put( "raw", _raw );
01219 packer.put( "context", _context );
01220
01221 packer.put( "occurrences", _occurrences );
01222 packer.put( "contextSize", _contextSize );
01223 packer.put( "minimumContextLength", _minimumContextLength );
01224 packer.put( "maximumContextLength", _maximumContextLength );
01225 packer.put( "maximumOccurrences", _maximumOccurrences );
01226 packer.put( "maximumContextFraction", _maximumContextFraction );
01227 packer.put( "smoothing", _smoothing );
01228 packer.after(this);
01229 }
01230
01231 void walk( Walker& walker ) {
01232 walker.before(this);
01233 if( _raw )
01234 _raw->walk(walker);
01235 if( _context )
01236 _context->walk(walker);
01237 walker.after(this);
01238 }
01239
01240 Node* copy( Copier& copier ) {
01241 copier.before(this);
01242
01243 RawExtentNode* duplicateContext = _context ? dynamic_cast<RawExtentNode*>(_context->copy(copier)) : 0;
01244 RawExtentNode* duplicateRaw = _raw ? dynamic_cast<RawExtentNode*>(_raw->copy(copier)) : 0;
01245 RawScorerNode* duplicate = new RawScorerNode(*this);
01246 duplicate->setRawExtent( duplicateRaw );
01247 duplicate->setContext( duplicateContext );
01248
01249 return copier.after(this, duplicate);
01250 }
01251 };
01252
01253 class TermFrequencyScorerNode : public ScoredExtentNode {
01254 private:
01255 UINT64 _occurrences;
01256 UINT64 _contextSize;
01257 UINT64 _maximumContextLength;
01258 UINT64 _minimumContextLength;
01259 UINT64 _maximumOccurrences;
01260 double _maximumContextFraction;
01261
01262 std::string _text;
01263 std::string _smoothing;
01264 bool _stemmed;
01265
01266 public:
01267 TermFrequencyScorerNode( const std::string& text, bool stemmed ) {
01268 _occurrences = 0;
01269 _contextSize = 0;
01270 _maximumContextLength = MAX_INT32;
01271 _maximumOccurrences = MAX_INT32;
01272 _minimumContextLength = 1;
01273 _maximumContextFraction = 1;
01274 _smoothing = "";
01275 _text = text;
01276 _stemmed = stemmed;
01277 }
01278
01279 TermFrequencyScorerNode( Unpacker& unpacker ) {
01280 _occurrences = unpacker.getInteger( "occurrences" );
01281 _contextSize = unpacker.getInteger( "contextSize" );
01282 _maximumContextLength = unpacker.getInteger( "maximumContextLength" );
01283 _minimumContextLength = unpacker.getInteger( "minimumContextLength" );
01284 _maximumOccurrences = unpacker.getInteger( "maximumOccurrences" );
01285 _maximumContextFraction = unpacker.getDouble( "maximumContextFraction" );
01286 _smoothing = unpacker.getString( "smoothing" );
01287 _text = unpacker.getString( "text" );
01288 _stemmed = unpacker.getBoolean( "stemmed" );
01289 }
01290
01291 const std::string& getText() const {
01292 return _text;
01293 }
01294
01295 bool getStemmed() const {
01296 return _stemmed;
01297 }
01298
01299 std::string typeName() const {
01300 return "TermFrequencyScorerNode";
01301 }
01302
01303 std::string queryText() const {
01304 std::stringstream qtext;
01305
01306 if( !_stemmed )
01307 qtext << _text;
01308 else
01309 qtext << "\"" << _text << "\"";
01310
01311 return qtext.str();
01312 }
01313
01314 UINT64 getOccurrences() const {
01315 return _occurrences;
01316 }
01317
01318 UINT64 getContextSize() const {
01319 return _contextSize;
01320 }
01321
01322 const std::string& getSmoothing() const {
01323 return _smoothing;
01324 }
01325
01326 UINT64 getMaxContextLength() const {
01327 return _maximumContextLength;
01328 }
01329
01330 UINT64 getMinContextLength() const {
01331 return _minimumContextLength;
01332 }
01333
01334 UINT64 getMaxOccurrences() const {
01335 return _maximumOccurrences;
01336 }
01337
01338 double getMaxContextFraction() const {
01339 return _maximumContextFraction;
01340 }
01341
01342 void setStatistics( UINT64 occurrences, UINT64 contextSize,
01343 UINT64 maxOccurrences,
01344 UINT64 minContextLength, UINT64 maxContextLength,
01345 double maxContextFraction ) {
01346 _occurrences = occurrences;
01347 _contextSize = contextSize;
01348
01349 _maximumOccurrences = maxOccurrences;
01350 _minimumContextLength = minContextLength;
01351 _maximumContextLength = maxContextLength;
01352 _maximumContextFraction = maxContextFraction;
01353 }
01354
01355 void setSmoothing( const std::string& smoothing ) {
01356 _smoothing = smoothing;
01357 }
01358
01359 void pack( Packer& packer ) {
01360 packer.before(this);
01361 packer.put( "occurrences", _occurrences );
01362 packer.put( "contextSize", _contextSize );
01363 packer.put( "minimumContextLength", _minimumContextLength );
01364 packer.put( "maximumContextLength", _maximumContextLength );
01365 packer.put( "maximumOccurrences", _maximumOccurrences );
01366 packer.put( "maximumContextFraction", _maximumContextFraction );
01367 packer.put( "text", _text );
01368 packer.put( "stemmed", _stemmed );
01369 packer.put( "smoothing", _smoothing );
01370 packer.after(this);
01371 }
01372
01373 void walk( Walker& walker ) {
01374 walker.before(this);
01375 walker.after(this);
01376 }
01377
01378 Node* copy( Copier& copier ) {
01379 copier.before(this);
01380 TermFrequencyScorerNode* duplicate = new TermFrequencyScorerNode(*this);
01381 return copier.after(this, duplicate);
01382 }
01383 };
01384
01385
01386
01387 class CachedFrequencyScorerNode : public indri::lang::ScoredExtentNode {
01388 private:
01389 indri::lang::Node* _raw;
01390 indri::lang::Node* _context;
01391 std::string _smoothing;
01392 void* _list;
01393
01394 public:
01395 CachedFrequencyScorerNode( indri::lang::Node* raw, indri::lang::Node* context )
01396 :
01397 _raw(raw),
01398 _context(context),
01399 _list(0)
01400 {
01401 }
01402
01403 CachedFrequencyScorerNode( Unpacker& unpacker ) {
01404 LEMUR_THROW( LEMUR_RUNTIME_ERROR, "CachedFrequencyScorerNode should not be used on the network" );
01405
01406 _raw = unpacker.getRawExtentNode( "raw" );
01407 _context = unpacker.getRawExtentNode( "context" );
01408 _smoothing = unpacker.getString( "smoothing" );
01409 }
01410
01411 void setList( void* list ) {
01412 _list = list;
01413 }
01414
01415 void* getList() {
01416 return _list;
01417 }
01418
01419 std::string typeName() const {
01420 return "CachedFrequencyScorerNode";
01421 }
01422
01423 std::string queryText() const {
01424 std::stringstream qtext;
01425
01426 qtext << _raw->queryText();
01427 if( _context ) {
01428
01429 int dot = qtext.str().find('.');
01430 if( dot < 0 )
01431 qtext << '.';
01432
01433 qtext << "(" << _context->queryText() << ")";
01434 }
01435
01436 return qtext.str();
01437 }
01438
01439 void setSmoothing( const std::string& smoothing ) {
01440 _smoothing = smoothing;
01441 }
01442
01443 const std::string& getSmoothing() const {
01444 return _smoothing;
01445 }
01446
01447 indri::lang::Node* getRaw() {
01448 return _raw;
01449 }
01450
01451 indri::lang::Node* getContext() {
01452 return _context;
01453 }
01454
01455 void pack( Packer& packer ) {
01456 packer.before(this);
01457 packer.put( "raw", _raw );
01458 packer.put( "context", _context );
01459 packer.put( "smoothing", _smoothing );
01460 packer.after(this);
01461 }
01462
01463 void walk( Walker& walker ) {
01464 walker.before(this);
01465 _raw->walk( walker );
01466 if( _context )
01467 _context->walk( walker );
01468 walker.after(this);
01469 }
01470
01471 indri::lang::Node* copy( Copier& copier ) {
01472 copier.before(this);
01473
01474 indri::lang::RawExtentNode* duplicateRaw = dynamic_cast<indri::lang::RawExtentNode*>(_raw->copy(copier));
01475 indri::lang::RawExtentNode* duplicateContext = 0;
01476
01477 if( _context )
01478 duplicateContext = dynamic_cast<indri::lang::RawExtentNode*>(_context->copy(copier));
01479
01480 CachedFrequencyScorerNode* duplicate = new CachedFrequencyScorerNode( duplicateRaw,
01481 duplicateContext );
01482 duplicate->setNodeName( nodeName() );
01483 duplicate->setSmoothing( _smoothing );
01484 duplicate->setList( getList() );
01485
01486 return copier.after( this, duplicate );
01487 }
01488 };
01489
01490 class PriorNode : public ScoredExtentNode {
01491 public:
01492 struct tuple_type {
01493 int begin;
01494 int end;
01495 double score;
01496 };
01497
01498 private:
01499 std::map<int,tuple_type> _table;
01500 Field* _field;
01501 std::string _fieldName;
01502
01503 public:
01504 PriorNode() :
01505 _field(0)
01506 {
01507 }
01508
01509 PriorNode( indri::lang::Field* field, const std::map<int, tuple_type>& table ) :
01510 _table(table),
01511 _field(field)
01512 {
01513 _fieldName = _field->getFieldName();
01514 }
01515
01516 PriorNode( std::string& fieldName, const std::map<int, tuple_type>& table ) :
01517 _fieldName(fieldName),
01518 _field(0),
01519 _table(table)
01520 {
01521 }
01522
01523 std::string queryText() const {
01524 std::stringstream qtext;
01525
01526
01527 qtext << "#prior(" << _fieldName << ")";
01528 return qtext.str();
01529 }
01530
01531 PriorNode( Unpacker& unpacker ) {
01532 std::vector<int> beginList = unpacker.getIntVector( "begin" );
01533 std::vector<int> endList = unpacker.getIntVector( "end" );
01534 std::vector<double> scoreList = unpacker.getDoubleVector( "score" );
01535 assert( beginList.size() == endList.size() );
01536 assert( scoreList.size() == endList.size() );
01537
01538 for( size_t i=0; i<beginList.size(); i++ ) {
01539 tuple_type t;
01540 t.begin = beginList[i];
01541 t.end = endList[i];
01542 t.score = scoreList[i];
01543
01544 _table[ beginList[i] ] = t;
01545 }
01546
01547 _field = dynamic_cast<Field*>(unpacker.getRawExtentNode( "field" ));
01548 }
01549
01550 std::string nodeType() {
01551 return "PriorNode";
01552 }
01553
01554 const std::map<int,tuple_type>& getTable() const {
01555 return _table;
01556 }
01557
01558 const std::string& getFieldName() const {
01559 return _fieldName;
01560 }
01561
01562 indri::lang::Field* getField() const {
01563 return _field;
01564 }
01565
01566 void setField( indri::lang::Field* field ) {
01567 _field = field;
01568 }
01569
01570 void walk( Walker& walker ) {
01571 walker.before(this);
01572 _field->walk(walker);
01573 walker.after(this);
01574 }
01575
01576 indri::lang::Node* copy( Copier& copier ) {
01577 copier.before(this);
01578 Field* duplicateField = dynamic_cast<Field*>(_field->copy(copier));
01579 PriorNode* duplicate = new PriorNode( duplicateField, getTable() );
01580 return copier.after(this, duplicate);
01581 }
01582
01583 void pack( Packer& packer ) {
01584 packer.before(this);
01585
01586 std::vector<int> beginList;
01587 std::vector<int> endList;
01588 std::vector<double> scoreList;
01589
01590 for( std::map<int,tuple_type>::iterator iter;
01591 iter != _table.end();
01592 iter++ )
01593 {
01594 beginList.push_back( (*iter).second.begin );
01595 endList.push_back( (*iter).second.end );
01596 scoreList.push_back( (*iter).second.score );
01597 }
01598
01599 packer.put( "begin", beginList );
01600 packer.put( "end", endList );
01601 packer.put( "score", scoreList );
01602 packer.put( "field", _field );
01603 packer.after(this);
01604 }
01605 };
01606
01607 class UnweightedCombinationNode : public ScoredExtentNode {
01608 protected:
01609 std::vector<ScoredExtentNode*> _children;
01610
01611 void _unpack( Unpacker& unpacker ) {
01612 _children = unpacker.getScoredExtentVector( "children" );
01613 }
01614
01615 template<class _ThisType>
01616 void _walk( _ThisType* ptr, Walker& walker ) {
01617 walker.before(ptr);
01618
01619 for( unsigned int i=0; i<_children.size(); i++ ) {
01620 _children[i]->walk(walker);
01621 }
01622
01623 walker.after(ptr);
01624 }
01625
01626 template<class _ThisType>
01627 Node* _copy( _ThisType* ptr, Copier& copier ) {
01628 copier.before(ptr);
01629
01630 _ThisType* duplicate = new _ThisType();
01631 duplicate->setNodeName( nodeName() );
01632 for( unsigned int i=0; i<_children.size(); i++ ) {
01633 duplicate->addChild( dynamic_cast<ScoredExtentNode*>(_children[i]->copy(copier)) );
01634 }
01635
01636 return copier.after(ptr, duplicate);
01637 }
01638
01639 void _childText( std::stringstream& qtext ) const {
01640 for( unsigned int i=0; i<_children.size(); i++ ) {
01641 if(i>0) qtext << " ";
01642 qtext << _children[i]->queryText();
01643 }
01644 }
01645
01646 public:
01647 const std::vector<ScoredExtentNode*>& getChildren() {
01648 return _children;
01649 }
01650
01651 void addChild( ScoredExtentNode* scoredNode ) {
01652 _children.push_back( scoredNode );
01653 }
01654
01655 void pack( Packer& packer ) {
01656 packer.before(this);
01657 packer.put( "children", _children );
01658 packer.after(this);
01659 }
01660 };
01661
01662 class WeightedCombinationNode : public ScoredExtentNode {
01663 protected:
01664 std::vector< std::pair<double, ScoredExtentNode*> > _children;
01665
01666 void _unpack( Unpacker& unpacker ) {
01667 std::vector<double> weights = unpacker.getDoubleVector( "weights" );
01668 std::vector<ScoredExtentNode*> nodes = unpacker.getScoredExtentVector( "children" );
01669
01670 for( unsigned int i=0; i<weights.size(); i++ ) {
01671 _children.push_back( std::make_pair( weights[i], nodes[i] ) );
01672 }
01673 }
01674
01675 template<class _ThisType>
01676 void _walk( _ThisType* ptr, Walker& walker ) {
01677 walker.before(ptr);
01678 for( unsigned int i=0; i<_children.size(); i++ ) {
01679 _children[i].second->walk(walker);
01680 }
01681 walker.after(ptr);
01682 }
01683
01684 template<class _ThisType>
01685 Node* _copy( _ThisType* ptr, Copier& copier ) {
01686 copier.before(ptr);
01687
01688 _ThisType* duplicate = new _ThisType;
01689 duplicate->setNodeName( nodeName() );
01690 for( unsigned int i=0; i<_children.size(); i++ ) {
01691 double childWeight = _children[i].first;
01692 Node* childCopy = _children[i].second->copy( copier );
01693
01694 duplicate->addChild( childWeight, dynamic_cast<ScoredExtentNode*>(childCopy) );
01695 }
01696 return copier.after(ptr, duplicate);
01697 }
01698
01699 void _childText( std::stringstream& qtext ) const {
01700 for( unsigned int i=0; i<_children.size(); i++ ) {
01701 if(i>0) qtext << " ";
01702 qtext << _children[i].first
01703 << " "
01704 << _children[i].second->queryText();
01705 }
01706 }
01707
01708 public:
01709 const std::vector< std::pair<double, ScoredExtentNode*> >& getChildren() {
01710 return _children;
01711 }
01712
01713 void addChild( double weight, ScoredExtentNode* scoredNode ) {
01714 _children.push_back( std::make_pair( weight, scoredNode) );
01715 }
01716
01717 void addChild( const std::string& weight, ScoredExtentNode* scoredNode ) {
01718 addChild( atof( weight.c_str() ), scoredNode );
01719 }
01720
01721 void pack( Packer& packer ) {
01722 packer.before(this);
01723
01724 std::vector<double> weights;
01725 std::vector<ScoredExtentNode*> nodes;
01726
01727 for( unsigned int i=0; i<_children.size(); i++ ) {
01728 weights.push_back( _children[i].first );
01729 nodes.push_back( _children[i].second );
01730 }
01731
01732 packer.put( "weights", weights );
01733 packer.put( "children", nodes );
01734 packer.after(this);
01735 }
01736 };
01737
01738 class OrNode : public UnweightedCombinationNode {
01739 public:
01740 OrNode() {}
01741 OrNode( Unpacker& unpacker ) {
01742 _unpack( unpacker );
01743 }
01744
01745 std::string typeName() const {
01746 return "OrNode";
01747 }
01748
01749 std::string queryText() const {
01750 std::stringstream qtext;
01751 qtext << "#or(";
01752 _childText(qtext);
01753 qtext << ")";
01754
01755 return qtext.str();
01756 }
01757
01758 void walk( Walker& walker ) {
01759 _walk( this, walker );
01760 }
01761
01762 Node* copy( Copier& copier ) {
01763 return _copy( this, copier );
01764 }
01765 };
01766
01767 class NotNode : public ScoredExtentNode {
01768 private:
01769 ScoredExtentNode* _child;
01770
01771 public:
01772 NotNode() {
01773 _child = 0;
01774 }
01775
01776 NotNode( ScoredExtentNode* child ) {
01777 _child = child;
01778 }
01779
01780 NotNode( Unpacker& unpacker ) {
01781 _child = unpacker.getScoredExtentNode( "child" );
01782 }
01783
01784 std::string typeName() const {
01785 return "NotNode";
01786 }
01787
01788 ScoredExtentNode* getChild() {
01789 return _child;
01790 }
01791
01792 void setChild( ScoredExtentNode* child ) {
01793 _child = child;
01794 }
01795
01796 std::string queryText() const {
01797 std::stringstream qtext;
01798 qtext << "#not(";
01799 qtext << _child->queryText();
01800 qtext << ")";
01801
01802 return qtext.str();
01803 }
01804
01805 void walk( Walker& walker ) {
01806 walker.before(this);
01807 _child->walk(walker);
01808 walker.after(this);
01809 }
01810
01811 void pack( Packer& packer ) {
01812 packer.before(this);
01813 packer.put( "child", _child );
01814 packer.after(this);
01815 }
01816
01817 Node* copy( Copier& copier ) {
01818 copier.before( this );
01819 ScoredExtentNode* childDuplicate = dynamic_cast<ScoredExtentNode*>(_child->copy(copier));
01820 NotNode* duplicate = new NotNode( childDuplicate );
01821 duplicate->setNodeName( nodeName() );
01822 return copier.after( this, duplicate );
01823 }
01824 };
01825
01826 class MaxNode : public UnweightedCombinationNode {
01827 public:
01828 MaxNode() {}
01829 MaxNode( Unpacker& unpacker ) {
01830 _unpack( unpacker );
01831 }
01832
01833 std::string typeName() const {
01834 return "MaxNode";
01835 }
01836
01837 std::string queryText() const {
01838 std::stringstream qtext;
01839 qtext << "#max(";
01840 _childText(qtext);
01841 qtext << ")";
01842
01843 return qtext.str();
01844 }
01845
01846
01847 void walk( Walker& walker ) {
01848 _walk(this, walker);
01849 }
01850
01851 Node* copy( Copier& copier ) {
01852 return _copy( this, copier );
01853 }
01854 };
01855
01856 class CombineNode : public UnweightedCombinationNode {
01857 public:
01858 CombineNode() {}
01859 CombineNode( Unpacker& unpacker ) {
01860 _unpack( unpacker );
01861 }
01862
01863 std::string typeName() const {
01864 return "CombineNode";
01865 }
01866
01867 std::string queryText() const {
01868 std::stringstream qtext;
01869 qtext << "#combine(";
01870 _childText(qtext);
01871 qtext << ")";
01872
01873 return qtext.str();
01874 }
01875
01876 void walk( Walker& walker ) {
01877 _walk( this, walker );
01878 }
01879
01880 Node* copy( Copier& copier ) {
01881 return _copy( this, copier );
01882 }
01883 };
01884
01885 class WAndNode : public WeightedCombinationNode {
01886 public:
01887 WAndNode() {}
01888 WAndNode( Unpacker& unpacker ) {
01889 _unpack( unpacker );
01890 }
01891
01892 std::string typeName() const {
01893 return "WAndNode";
01894 }
01895
01896 std::string queryText() const {
01897 std::stringstream qtext;
01898 qtext << "#wand(";
01899 _childText(qtext);
01900 qtext << ")";
01901
01902 return qtext.str();
01903 }
01904
01905 void walk( Walker& walker ) {
01906 _walk( this, walker );
01907 }
01908
01909 Node* copy( Copier& copier ) {
01910 return _copy( this, copier );
01911 }
01912 };
01913
01914 class WSumNode : public WeightedCombinationNode {
01915 public:
01916 WSumNode() {}
01917 WSumNode( Unpacker& unpacker ) {
01918 _unpack( unpacker );
01919 }
01920
01921 std::string typeName() const {
01922 return "WSumNode";
01923 }
01924
01925 std::string queryText() const {
01926 std::stringstream qtext;
01927 qtext << "#wsum(";
01928 _childText(qtext);
01929 qtext << ")";
01930
01931 return qtext.str();
01932 }
01933
01934 void walk( Walker& walker ) {
01935 _walk( this, walker );
01936 }
01937
01938 Node* copy( Copier& copier ) {
01939 return _copy( this, copier );
01940 }
01941 };
01942
01943 class WeightNode : public WeightedCombinationNode {
01944 public:
01945 WeightNode() {}
01946 WeightNode( Unpacker& unpacker ) {
01947 _unpack( unpacker );
01948 }
01949
01950 std::string typeName() const {
01951 return "WeightNode";
01952 }
01953
01954 std::string queryText() const {
01955 std::stringstream qtext;
01956 qtext << "#weight(";
01957 _childText(qtext);
01958 qtext << ")";
01959
01960 return qtext.str();
01961 }
01962
01963 void walk( Walker& walker ) {
01964 _walk( this, walker );
01965 }
01966
01967 Node* copy( Copier& copier ) {
01968 return _copy( this, copier );
01969 }
01970 };
01971
01972 class ExtentRestriction : public ScoredExtentNode {
01973 private:
01974 ScoredExtentNode* _child;
01975 RawExtentNode* _field;
01976
01977 public:
01978 ExtentRestriction( Unpacker& unpacker ) {
01979 _child = unpacker.getScoredExtentNode("child");
01980 _field = unpacker.getRawExtentNode("field");
01981 }
01982
01983 ExtentRestriction( ScoredExtentNode* child, RawExtentNode* field ) :
01984 _child(child),
01985 _field(field)
01986 {
01987 }
01988
01989 std::string typeName() const {
01990 return "ExtentRestriction";
01991 }
01992
01993 std::string queryText() const {
01994 std::stringstream qtext;
01995
01996
01997
01998
01999 std::string childText = _child->queryText();
02000 std::string::size_type pos = childText.find( '(' );
02001
02002 if( pos != std::string::npos ) {
02003 qtext << childText.substr(0,pos)
02004 << "["
02005 << _field->queryText()
02006 << "]"
02007 << childText.substr(pos);
02008 } else {
02009
02010 qtext << "["
02011 << _field->queryText()
02012 << "]"
02013 << childText;
02014 }
02015
02016 return qtext.str();
02017 }
02018
02019 ScoredExtentNode* getChild() {
02020 return _child;
02021 }
02022
02023 RawExtentNode* getField() {
02024 return _field;
02025 }
02026
02027 void setChild( ScoredExtentNode* child ) {
02028 _child = child;
02029 }
02030
02031 void setField( RawExtentNode* field ) {
02032 _field = field;
02033 }
02034
02035 void pack( Packer& packer ) {
02036 packer.before(this);
02037 packer.put("child", _child);
02038 packer.put("field", _field);
02039 packer.after(this);
02040 }
02041
02042 void walk( Walker& walker ) {
02043 walker.before(this);
02044 _child->walk(walker);
02045 _field->walk(walker);
02046 walker.after(this);
02047 }
02048
02049 Node* copy( Copier& copier ) {
02050 copier.before(this);
02051
02052 ScoredExtentNode* duplicateChild = dynamic_cast<indri::lang::ScoredExtentNode*>(_child->copy(copier));
02053 RawExtentNode* duplicateField = dynamic_cast<indri::lang::RawExtentNode*>(_field->copy(copier));
02054 ExtentRestriction* duplicate = new ExtentRestriction( duplicateChild, duplicateField );
02055 duplicate->setNodeName( nodeName() );
02056
02057 return copier.after(this, duplicate);
02058 }
02059 };
02060
02061 class FilterNode : public ScoredExtentNode {
02062 private:
02063 ScoredExtentNode* _child;
02064 std::vector<int> _documents;
02065
02066 public:
02067 FilterNode( ScoredExtentNode* child, std::vector<int>& documents ) :
02068 _child(child),
02069 _documents(documents)
02070 {
02071 }
02072
02073 FilterNode( Unpacker& unpacker ) {
02074 _child = unpacker.getScoredExtentNode( "child" );
02075 _documents = unpacker.getIntVector( "documents" );
02076 }
02077
02078 std::string typeName() const {
02079 return "FilterNode";
02080 }
02081
02082 ScoredExtentNode* getChild() {
02083 return _child;
02084 }
02085
02086 const std::vector<int>& getDocuments() const {
02087 return _documents;
02088 }
02089
02090 void pack( Packer& packer ) {
02091 packer.before(this);
02092 packer.put("child", _child);
02093 packer.put("documents", _documents);
02094 packer.after(this);
02095 }
02096
02097 void walk( Walker& walker ) {
02098 walker.before(this);
02099 _child->walk(walker);
02100 walker.after(this);
02101 }
02102
02103 std::string queryText() const {
02104
02105
02106 return _child->queryText();
02107 }
02108
02109 Node* copy( Copier& copier ) {
02110 copier.before(this);
02111
02112 ScoredExtentNode* duplicateChild = dynamic_cast<ScoredExtentNode*>(_child->copy(copier));
02113 FilterNode* duplicate = new FilterNode( duplicateChild, _documents );
02114 duplicate->setNodeName( nodeName() );
02115
02116 return copier.after(this, duplicate);
02117 }
02118 };
02119
02120 class ContextCounterNode : public AccumulatorNode {
02121 private:
02122 RawExtentNode* _raw;
02123 RawExtentNode* _context;
02124 bool _hasCounts;
02125 bool _hasContextSize;
02126 bool _hasMaxScore;
02127 UINT64 _occurrences;
02128 UINT64 _contextSize;
02129
02130 UINT64 _minimumContextLength;
02131 UINT64 _maximumContextLength;
02132 UINT64 _maximumOccurrences;
02133 double _maximumContextFraction;
02134
02135 public:
02136 ContextCounterNode( RawExtentNode* raw, RawExtentNode* context ) :
02137 _hasCounts(false),
02138 _hasContextSize(false),
02139 _hasMaxScore(false),
02140 _occurrences(0),
02141 _contextSize(0),
02142 _minimumContextLength(1),
02143 _maximumContextLength(MAX_INT32),
02144 _maximumOccurrences(MAX_INT32),
02145 _maximumContextFraction(1.0)
02146 {
02147 _raw = raw;
02148 _context = context;
02149 }
02150
02151 ContextCounterNode( Unpacker& unpacker ) {
02152 _raw = unpacker.getRawExtentNode( "raw" );
02153 _context = unpacker.getRawExtentNode( "context" );
02154 _occurrences = unpacker.getInteger( "occurrences" );
02155 _contextSize = unpacker.getInteger( "contextSize" );
02156
02157 _minimumContextLength = unpacker.getInteger( "minimumContextLength" );
02158 _maximumContextLength = unpacker.getInteger( "maximumContextLength" );
02159 _maximumOccurrences = unpacker.getInteger( "maximumOccurrences" );
02160 _maximumContextFraction = unpacker.getDouble( "maximumContextFraction" );
02161
02162 _hasCounts = unpacker.getBoolean( "hasCounts" );
02163 _hasContextSize = unpacker.getBoolean( "hasContextSize" );
02164 _hasMaxScore = unpacker.getBoolean( "hasMaxScore" );
02165 }
02166
02167 std::string typeName() const {
02168 return "ContextCounterNode";
02169 }
02170
02171 std::string queryText() const {
02172 std::stringstream qtext;
02173
02174 if( _raw )
02175 qtext << _raw->queryText();
02176
02177 if( _context ) {
02178
02179 int dot = qtext.str().find('.');
02180 if( dot < 0 )
02181 qtext << '.';
02182
02183 qtext << "(" << _context->queryText() << ")";
02184 }
02185
02186 return qtext.str();
02187 }
02188
02189 RawExtentNode* getContext() {
02190 return _context;
02191 }
02192
02193 RawExtentNode* getRawExtent() {
02194 return _raw;
02195 }
02196
02197 void setRawExtent( RawExtentNode* rawExtent ) {
02198 _raw = rawExtent;
02199 }
02200
02201 void setContext( RawExtentNode* context ) {
02202 _context = context;
02203 }
02204
02205 void pack( Packer& packer ) {
02206 packer.before(this);
02207 packer.put( "raw", _raw );
02208 packer.put( "context", _context );
02209 packer.put( "occurrences", _occurrences );
02210 packer.put( "contextSize", _contextSize );
02211
02212 packer.put( "maximumOccurrences", _maximumOccurrences );
02213 packer.put( "maximumContextFraction", _maximumContextFraction );
02214 packer.put( "maximumContextLength", _maximumContextLength );
02215 packer.put( "minimumContextLength", _minimumContextLength );
02216
02217 packer.put( "hasCounts", _hasCounts );
02218 packer.put( "hasContextSize", _hasContextSize );
02219 packer.put( "hasMaxScore", _hasMaxScore );
02220 packer.after(this);
02221 }
02222
02223 void walk( Walker& walker ) {
02224 walker.before(this);
02225 if( _raw ) _raw->walk(walker);
02226 if( _context ) _context->walk(walker);
02227 walker.after(this);
02228 }
02229
02230 Node* copy( Copier& copier ) {
02231 copier.before(this);
02232 RawExtentNode* duplicateRaw = _raw ? dynamic_cast<RawExtentNode*>(_raw->copy(copier)) : 0;
02233 RawExtentNode* duplicateContext = _context ? dynamic_cast<RawExtentNode*>(_context->copy(copier)) : 0;
02234 ContextCounterNode* duplicate = new ContextCounterNode(*this);
02235 duplicate->setContext(duplicateContext);
02236 duplicate->setRawExtent(duplicateRaw);
02237 return copier.after(this, duplicate);
02238 }
02239
02240 bool hasCounts() const {
02241 return _hasCounts;
02242 }
02243
02244 bool hasContextSize() const {
02245 return _hasContextSize;
02246 }
02247
02248 bool hasMaxScore() const {
02249 return _hasMaxScore;
02250 }
02251
02252 UINT64 getOccurrences() const {
02253 return _occurrences;
02254 }
02255
02256 UINT64 getContextSize() const {
02257 return _contextSize;
02258 }
02259
02260 UINT64 getMaximumOccurrences() const {
02261 return _maximumOccurrences;
02262 }
02263
02264 UINT64 getMinimumContextLength() const {
02265 return _minimumContextLength;
02266 }
02267
02268 UINT64 getMaximumContextLength() const {
02269 return _maximumContextLength;
02270 }
02271
02272 double getMaximumContextFraction() const {
02273 return _maximumContextFraction;
02274 }
02275
02276 void setContextSize( UINT64 contextSize ) {
02277 _contextSize = contextSize;
02278 _hasContextSize = true;
02279 }
02280
02281 void setCounts( UINT64 occurrences,
02282 UINT64 contextSize ) {
02283 _hasCounts = true;
02284 _occurrences = occurrences;
02285 setContextSize( contextSize );
02286 }
02287
02288 void setCounts( UINT64 occurrences,
02289 UINT64 contextSize,
02290 UINT64 maximumOccurrences,
02291 UINT64 minimumContextLength,
02292 UINT64 maximumContextLength,
02293 double maximumContextFraction )
02294 {
02295 setCounts( occurrences, contextSize );
02296 _hasMaxScore = true;
02297 _maximumOccurrences = maximumOccurrences;
02298 _minimumContextLength = minimumContextLength;
02299 _maximumContextLength = maximumContextLength;
02300 _maximumContextFraction = maximumContextFraction;
02301 }
02302 };
02303
02304 class ScoreAccumulatorNode : public AccumulatorNode {
02305 private:
02306 ScoredExtentNode* _scoredNode;
02307
02308 public:
02309 ScoreAccumulatorNode( ScoredExtentNode* scoredNode ) :
02310 _scoredNode(scoredNode)
02311 {
02312 }
02313
02314 ScoreAccumulatorNode( Unpacker& unpacker ) {
02315 _scoredNode = unpacker.getScoredExtentNode( "scoredNode" );
02316 }
02317
02318 std::string typeName() const {
02319 return "ScoreAccumulatorNode";
02320 }
02321
02322 std::string queryText() const {
02323
02324 return _scoredNode->queryText();
02325 }
02326
02327 ScoredExtentNode* getChild() {
02328 return _scoredNode;
02329 }
02330
02331 void pack( Packer& packer ) {
02332 packer.before(this);
02333 packer.put( "scoredNode", _scoredNode );
02334 packer.after(this);
02335 }
02336
02337 void walk( Walker& walker ) {
02338 walker.before(this);
02339 _scoredNode->walk(walker);
02340 walker.after(this);
02341 }
02342
02343 Node* copy( Copier& copier ) {
02344 copier.before(this);
02345 ScoredExtentNode* duplicateChild = dynamic_cast<ScoredExtentNode*>(_scoredNode->copy(copier));
02346 ScoreAccumulatorNode* duplicate = new ScoreAccumulatorNode(duplicateChild);
02347 duplicate->setNodeName( nodeName() );
02348 return copier.after(this, duplicate);
02349 }
02350 };
02351
02352 class AnnotatorNode : public AccumulatorNode {
02353 private:
02354 ScoredExtentNode* _scoredNode;
02355
02356 public:
02357 AnnotatorNode( ScoredExtentNode* scoredNode ) :
02358 _scoredNode(scoredNode)
02359 {
02360 }
02361
02362 AnnotatorNode( Unpacker& unpacker ) {
02363 _scoredNode = unpacker.getScoredExtentNode( "scoredNode" );
02364 }
02365
02366 std::string typeName() const {
02367 return "AnnotatorNode";
02368 }
02369
02370 std::string queryText() const {
02371 return _scoredNode->queryText();
02372 }
02373
02374 ScoredExtentNode* getChild() {
02375 return _scoredNode;
02376 }
02377
02378 void pack( Packer& packer ) {
02379 packer.before(this);
02380 packer.put( "scoredNode", _scoredNode );
02381 packer.after(this);
02382 }
02383
02384 void walk( Walker& walker ) {
02385 walker.before(this);
02386 _scoredNode->walk(walker);
02387 walker.after(this);
02388 }
02389
02390 Node* copy( Copier& copier ) {
02391 copier.before(this);
02392 ScoredExtentNode* duplicateChild = dynamic_cast<ScoredExtentNode*>(_scoredNode->copy(copier));
02393 AnnotatorNode* duplicate = new AnnotatorNode(duplicateChild);
02394 duplicate->setNodeName( nodeName() );
02395 return copier.after(this, duplicate);
02396 }
02397 };
02398 }
02399 }
02400
02401 #endif // INDRI_QUERYSPEC_HPP
02402
02403
02404