Main Page Namespace List Class Hierarchy Alphabetical List Compound List File List Namespace Members Compound Members File Members Related Pages

IndriIndex Class Reference

#include <IndriIndex.hpp>

List of all members.

Public Methods

IndriIndex (size_t memorySize=INDRI_DEFAULT_MEMORY_SIZE, float queryProportion=INDRI_DEFAULT_QUERY_PROPORTION)

~IndriIndex ()

void setName (const std::string &prefix)

sets the name for this index

DOCID_T addDocument (struct ParsedDocument *document)

DOCID_T addDocument (const char *documentName, const greedy_vector< char * > &words, const greedy_vector< TagExtent > &tagExtents)

DocInfoList * docInfoList (TERMID_T termID) const

doc entries in a term index,
See also:
DocList

DocPositionInfoList * docPositionInfoList (TERMID_T termID)

doc entries in a term index with positions

indri::index::DocListFrequencyIterator * docFrequencyInfoList (TERMID_T termID)

doc entries in a term index without positions

TermInfoList * termInfoList (DOCID_T docID) const

word entries in a document index (bag of words),
See also:
TermList

TermInfoList * termInfoListSeq (DOCID_T docID) const

word entries in a document index (sequence of words),
See also:
TermList

indri::index::TermListBuilder * termPositionList (DOCID_T docID)

internal IndriIndex term list representation

indri::index::FieldListIterator * fieldPositionListIterator (int fieldID)

field list

Open index

bool open (const std::string &indexName)

Open previously created Index with given prefix.

bool open (const char *indexName)

Open previously created Index with given prefix.

bool openRead (const std::string &indexName)

Open previously created Index with given prefix in read only mode.

bool create (const std::string &indexName)

Create a new index with the given prefix.

bool create (const std::string &indexName, const std::vector< FieldDescription > &fields)

Create a new index with the given prefix and tag set.

void close ()

Close the index.

Spelling and index conversion

TERMID_T term (const TERM_T &word) const

Convert a term spelling to a termID.

const TERM_T term (TERMID_T termID) const

Convert a termID to its spelling.

DOCID_T document (const EXDOCID_T &docIDStr) const

Convert a spelling to docID.

const EXDOCID_T document (DOCID_T docID) const

Convert a docID to its spelling.

const char * field (int fieldID)

Convert a fieldID to its name.

int field (const char *fieldName)

Convert a field name to its fieldID.

int field (const std::string &fieldName)

Convert a field name to its fieldID.

Summary counts

COUNT_T docCount () const

Total count (i.e., number) of documents in collection.

COUNT_T termCountUnique () const

Total count of unique terms in collection.

INT64 termCount (TERMID_T termID) const

Total counts of a term in collection.

INT64 termCount () const

Total counts of all terms in collection.

INT64 fieldTermCount (int fieldID, TERMID_T termID) const

Total counts of a term in a field.

INT64 fieldTermCount (int fieldID) const

Total counts of all terms in a field.

INT64 fieldDocCount (int fieldID) const

Total count of documents that contain a given field.

INT64 fieldDocCount (int fieldID, TERMID_T termID) const

Total count of documents that contain a given term in a given field.

double docLengthAvg () const

Average document length.

COUNT_T docCount (TERMID_T termID) const

Total counts of doc with a given term.

COUNT_T docIndexedLength (DOCID_T documentID) const

return indexed length of the document

COUNT_T docLength (DOCID_T documentID) const

return length of the document

int termMaxDocumentFrequency (TERMID_T termID)

Maximum number of times this term is in any documents.

int termMinDocumentLength (TERMID_T termID)

Minimum length of any document containing this term.

double termMaxDocumentFraction (TERMID_T termID)

Argmax over documents of (termCount/documentLength).

int maxDocumentLength ()

Maximum length of any document in the corpus.

Protected Methods

void _writeCache ()

void _writeAndMerge ()

void _writeBatchSegment ()

void _mergeBatch ()

void _mergeBatchSegments (int start, int end, int newNumber, bool finalMerge)

void _mergeBatchTermLists (const std::vector< int > &segmentMapping)

void _writeIncrementalSegment ()

void _mergeIncrementalSegments ()

void _readTermMapping (greedy_vector< int > &mapping, int segment, int secondSegment)

void _openMergeFiles (int startSegment, int endSegment, std::vector< File * > &listFiles, std::vector< File * > &statsFiles, std::vector< File * > &mappingFiles, std::vector< WriteBuffer * > &mappingBuffers, std::vector< ReadBuffer * > &statsBuffers, std::vector< indri::index::DocListFileIterator * > &listIterators, std::vector< char * > &terms, std::vector< indri::index::TermData * > &termDatas, bool finalMerge)

void _openDBs ()

void _openReadOnlyDBs ()

void _openSegments ()

void _createDBs ()

void _createFields (const std::vector< FieldDescription > &fieldNames)

void _closeFields ()

indri::index::DocumentData fetchDocumentData (int key) const

int fetchDocumentLength (int key) const

void _updateTermlist (TERMID_T termID, int position)

int _updateTermData (int documentLength)

size_t _cacheSize ()

void _computeMemoryBounds (size_t memorySize, float queryProportion)

void _resetEstimatePoint ()

indri::index::TermData * _createTermData ()

indri::index::TermData * _fetchTermData (TERMID_T termID)

indri::index::TermData * _lookupTermData (TERMID_T termID)

void _cleanCache ()

void _deleteTermData (indri::index::TermData *termData)

size_t _sizeTermData ()

void _clearTermData ()

void _clearTermCache ()

void _storeTermCache (const char *term, TERMID_T termID, indri::index::TermData *&termData)

void _flushTermStatistics (TERMID_T termID, const indri::index::TermFieldStatistics &statistics)

void _addTermDataToBuilder (indri::index::DocListDiskBuilder &builder, indri::index::DocListFileIterator &iterator, int writingID, int readingID)

void _addOpenTags (greedy_vector< indri::index::FieldExtent > &indexedTags, greedy_vector< indri::index::FieldExtent > &openTags, const greedy_vector< TagExtent > &extents, unsigned int &extentIndex, unsigned int position)

void _removeClosedTags (greedy_vector< indri::index::FieldExtent > &tags, unsigned int position)

void _lookupTerm (const char *term, TERMID_T &termID, indri::index::TermData *&termData)

void _finishDocument (greedy_vector< indri::index::TermFieldStatistics * > &seenStatistics)

void _writeDocumentTermList (File::offset_type &offset, int &byteLength, DOCID_T documentID, int documentLength, indri::index::TermListBuilder &locatedTerms)

void _writeDocumentStatistics (File::offset_type offset, int byteLength, int indexedLength, int totalLength, int uniqueTerms)

void _handleCache ()

int _lookupTag (const char *tag)

bool _readTermData (TERMID_T &termID, char *termBuffer, indri::index::TermData *termData, ReadBuffer *termDataFile)

void _incrementalWriteTermData (TERMID_T termID, indri::index::TermData *termData)

void _batchWriteTermData (TERMID_T termID, indri::index::TermData *termData, WriteBuffer *file)

int _compressTermData (char *buffer, int size, indri::index::TermData *termData)

void _decompressTermData (const char *buffer, int size, indri::index::TermData *termData)

void _writeParameters (const std::string &fileName)

bool _readParameters (const std::string &fileName)

void _openDocumentFiles ()

std::string _buildFileName (const char *suffix)

std::string _buildFileName (const char *suffix, int index)

Protected Attributes

bool _readOnly

indri::index::CorpusStatistics _corpusStatistics

std::vector< indri::index::FieldData * > _fieldData

std::map< const char *, int,
less_string > _fieldLookup

std::string _baseName

bool _writingDocTermLists

the prefix name

Keyfile _termDataStore

KeyfileWordMap _documentMap

KeyfileWordMap _termMap

File * _documentStatisticsFile

File _documentLengthFile

std::vector< File * > _segments

int _batchSegmentCount

File * _documentTermLocationsFile

indri::index::TermListBuilder _termList

Buffer _termListBuffer

greedy_vector< indri::index::TermData * > _seenTerms

HashTable< int, indri::index::TermData * > * _termDataTable

HashTable< const char *, term_cache_entry * > * _cache

in memory storage of data relating to terms -- partial inverted lists and statistics

ReadBuffer * _documentStatisticsBuffer

ReadBuffer * _documentLengthBuffer

size_t _listsSize

size_t _memorySize

size_t _termDataSize

size_t _termCacheSize

size_t _statisticsBufferSize

size_t _lengthBufferSize

float _queryProportion

bool _batchBuild

INT64 _estimatePoint

INT64 _lastCacheFlush

number of terms in the index when we should next check on flushing the inverted lists

Detailed Description

Indri internal index class.

Constructor & Destructor Documentation

IndriIndex::IndriIndex ( size_t memorySize = INDRI_DEFAULT_MEMORY_SIZE,

float queryProportion = INDRI_DEFAULT_QUERY_PROPORTION

)

Create.
Parameters:

memorySize how much memory to use

queryProportion proportion of load assumed to be queries

IndriIndex::~IndriIndex ( )

Member Function Documentation

void IndriIndex::_addOpenTags ( greedy_vector< indri::index::FieldExtent > & indexedTags,

greedy_vector< indri::index::FieldExtent > & openTags,

const greedy_vector< TagExtent > & extents,

unsigned int & extentIndex,

unsigned int position

) [protected]

void IndriIndex::_addTermDataToBuilder ( indri::index::DocListDiskBuilder & builder,

indri::index::DocListFileIterator & iterator,

int writingID,

int readingID

) [protected]

void IndriIndex::_batchWriteTermData ( TERMID_T termID,

indri::index::TermData * termData,

WriteBuffer * file

) [protected]

std::string IndriIndex::_buildFileName ( const char * suffix,

int index

) [protected]

std::string IndriIndex::_buildFileName ( const char * suffix ) [protected]

size_t IndriIndex::_cacheSize ( ) [protected]

void IndriIndex::_cleanCache ( ) [protected]

void IndriIndex::_clearTermCache ( ) [protected]

void IndriIndex::_clearTermData ( ) [protected]

void IndriIndex::_closeFields ( ) [protected]

int IndriIndex::_compressTermData ( char * buffer,

int size,

indri::index::TermData * termData

) [protected]

void IndriIndex::_computeMemoryBounds ( size_t memorySize,

float queryProportion

) [protected]

void IndriIndex::_createDBs ( ) [protected]

void IndriIndex::_createFields ( const std::vector< FieldDescription > & fieldNames ) [protected]

indri::index::TermData * IndriIndex::_createTermData ( ) [protected]

void IndriIndex::_decompressTermData ( const char * buffer,

int size,

indri::index::TermData * termData

) [protected]

void IndriIndex::_deleteTermData ( indri::index::TermData * termData ) [protected]

indri::index::TermData * IndriIndex::_fetchTermData ( TERMID_T termID ) [protected]

void IndriIndex::_finishDocument ( greedy_vector< indri::index::TermFieldStatistics * > & seenStatistics ) [protected]

void IndriIndex::_flushTermStatistics ( TERMID_T termID,

const indri::index::TermFieldStatistics & statistics

) [protected]

void IndriIndex::_handleCache ( ) [protected]

void IndriIndex::_incrementalWriteTermData ( TERMID_T termID,

indri::index::TermData * termData

) [protected]

int IndriIndex::_lookupTag ( const char * tag ) [protected]

void IndriIndex::_lookupTerm ( const char * term,

TERMID_T & termID,

indri::index::TermData *& termData

) [protected]

indri::index::TermData * IndriIndex::_lookupTermData ( TERMID_T termID ) [protected]

void IndriIndex::_mergeBatch ( ) [protected]

void IndriIndex::_mergeBatchSegments ( int start,

int end,

int newNumber,

bool finalMerge

) [protected]

void IndriIndex::_mergeBatchTermLists ( const std::vector< int > & segmentMapping ) [protected]

void IndriIndex::_mergeIncrementalSegments ( ) [protected]

void IndriIndex::_openDBs ( ) [protected]

void IndriIndex::_openDocumentFiles ( ) [protected]

void IndriIndex::_openMergeFiles ( int startSegment,

int endSegment,

std::vector< File * > & listFiles,

std::vector< File * > & statsFiles,

std::vector< File * > & mappingFiles,

std::vector< WriteBuffer * > & mappingBuffers,

std::vector< ReadBuffer * > & statsBuffers,

std::vector< indri::index::DocListFileIterator * > & listIterators,

std::vector< char * > & terms,

std::vector< indri::index::TermData * > & termDatas,

bool finalMerge

) [protected]

void IndriIndex::_openReadOnlyDBs ( ) [protected]

void IndriIndex::_openSegments ( ) [protected]

bool IndriIndex::_readParameters ( const std::string & fileName ) [protected]

bool IndriIndex::_readTermData ( TERMID_T & termID,

char * termBuffer,

indri::index::TermData * termData,

ReadBuffer * termDataFile

) [protected]

void IndriIndex::_readTermMapping ( greedy_vector< int > & mapping,

int segment,

int secondSegment

) [protected]

void IndriIndex::_removeClosedTags ( greedy_vector< indri::index::FieldExtent > & tags,

unsigned int position

) [protected]

void IndriIndex::_resetEstimatePoint ( ) [protected]

size_t IndriIndex::_sizeTermData ( ) [protected]

void IndriIndex::_storeTermCache ( const char * term,

TERMID_T termID,

indri::index::TermData *& termData

) [protected]

int IndriIndex::_updateTermData ( int documentLength ) [protected]

void IndriIndex::_updateTermlist ( TERMID_T termID,

int position

) [protected]

void IndriIndex::_writeAndMerge ( ) [protected]

void IndriIndex::_writeBatchSegment ( ) [protected]

void IndriIndex::_writeCache ( ) [protected]

void IndriIndex::_writeDocumentStatistics ( File::offset_type offset,

int byteLength,

int indexedLength,

int totalLength,

int uniqueTerms

) [protected]

void IndriIndex::_writeDocumentTermList ( File::offset_type & offset,

int & byteLength,

DOCID_T documentID,

int documentLength,

indri::index::TermListBuilder & locatedTerms

) [protected]

void IndriIndex::_writeIncrementalSegment ( ) [protected]

void IndriIndex::_writeParameters ( const std::string & fileName ) [protected]

DOCID_T IndriIndex::addDocument ( const char * documentName,

const greedy_vector< char * > & words,

const greedy_vector< TagExtent > & tagExtents

)

add a parsed document to the index.
Parameters:

documentName the name of the document to add

words greedy vector of the terms in the document

tagExtents greedy vector of the tag extents for the document.

Returns:
the internal document id of the document

DOCID_T IndriIndex::addDocument ( struct ParsedDocument * document )

add a parsed document to the index.
Parameters:

document the document to add

Returns:
the internal document id of the document

void IndriIndex::close ( )

Close the index.

bool IndriIndex::create ( const std::string & indexName,

const std::vector< FieldDescription > & fields

)

Create a new index with the given prefix and tag set.

bool IndriIndex::create ( const std::string & indexName )

Create a new index with the given prefix.

COUNT_T IndriIndex::docCount ( TERMID_T termID ) const

Total counts of doc with a given term.

COUNT_T IndriIndex::docCount ( ) const [inline]

Total count (i.e., number) of documents in collection.

indri::index::DocListFrequencyIterator * IndriIndex::docFrequencyInfoList ( TERMID_T termID )

doc entries in a term index without positions

COUNT_T IndriIndex::docIndexedLength ( DOCID_T documentID ) const

return indexed length of the document

DocInfoList * IndriIndex::docInfoList ( TERMID_T termID ) const

doc entries in a term index,
See also:
DocList

COUNT_T IndriIndex::docLength ( DOCID_T documentID ) const

return length of the document

double IndriIndex::docLengthAvg ( )

Average document length.

DocPositionInfoList * IndriIndex::docPositionInfoList ( TERMID_T termID )

doc entries in a term index with positions

const EXDOCID_T IndriIndex::document ( DOCID_T docID ) const

Convert a docID to its spelling.

DOCID_T IndriIndex::document ( const EXDOCID_T & docIDStr ) const

Convert a spelling to docID.

indri::index::DocumentData IndriIndex::fetchDocumentData ( int key ) const [protected]

int IndriIndex::fetchDocumentLength ( int key ) const [protected]

int IndriIndex::field ( const std::string & fieldName )

Convert a field name to its fieldID.

int IndriIndex::field ( const char * fieldName )

Convert a field name to its fieldID.

const char * IndriIndex::field ( int fieldID )

Convert a fieldID to its name.

INT64 IndriIndex::fieldDocCount ( int fieldID,

TERMID_T termID

) const

Total count of documents that contain a given term in a given field.

INT64 IndriIndex::fieldDocCount ( int fieldID ) const

Total count of documents that contain a given field.

indri::index::FieldListIterator * IndriIndex::fieldPositionListIterator ( int fieldID )

field list

INT64 IndriIndex::fieldTermCount ( int fieldID ) const

Total counts of all terms in a field.

INT64 IndriIndex::fieldTermCount ( int fieldID,

TERMID_T termID

) const

Total counts of a term in a field.

int IndriIndex::maxDocumentLength ( )

Maximum length of any document in the corpus.

bool IndriIndex::open ( const char * indexName )

Open previously created Index with given prefix.

bool IndriIndex::open ( const std::string & indexName )

Open previously created Index with given prefix.

bool IndriIndex::openRead ( const std::string & indexName )

Open previously created Index with given prefix in read only mode.

void IndriIndex::setName ( const std::string & prefix )

sets the name for this index

const TERM_T IndriIndex::term ( TERMID_T termID ) const

Convert a termID to its spelling.

TERMID_T IndriIndex::term ( const TERM_T & word ) const

Convert a term spelling to a termID.

INT64 IndriIndex::termCount ( ) const [inline]

Total counts of all terms in collection.

INT64 IndriIndex::termCount ( TERMID_T termID ) const

Total counts of a term in collection.

COUNT_T IndriIndex::termCountUnique ( ) const [inline]

Total count of unique terms in collection.

TermInfoList * IndriIndex::termInfoList ( DOCID_T docID ) const

word entries in a document index (bag of words),
See also:
TermList

TermInfoList * IndriIndex::termInfoListSeq ( DOCID_T docID ) const

word entries in a document index (sequence of words),
See also:
TermList

double IndriIndex::termMaxDocumentFraction ( TERMID_T termID )

Argmax over documents of (termCount/documentLength).

int IndriIndex::termMaxDocumentFrequency ( TERMID_T termID )

Maximum number of times this term is in any documents.

int IndriIndex::termMinDocumentLength ( TERMID_T termID )

Minimum length of any document containing this term.

indri::index::TermListBuilder * IndriIndex::termPositionList ( DOCID_T docID )

internal IndriIndex term list representation

Member Data Documentation

std::string IndriIndex::_baseName [protected]

bool IndriIndex::_batchBuild [protected]

int IndriIndex::_batchSegmentCount [protected]

HashTable<const char*, term_cache_entry*>* IndriIndex::_cache [protected]

in memory storage of data relating to terms -- partial inverted lists and statistics

indri::index::CorpusStatistics IndriIndex::_corpusStatistics [protected]

ReadBuffer* IndriIndex::_documentLengthBuffer [protected]

File IndriIndex::_documentLengthFile [protected]

KeyfileWordMap IndriIndex::_documentMap [protected]

ReadBuffer* IndriIndex::_documentStatisticsBuffer [protected]

File* IndriIndex::_documentStatisticsFile [protected]

File* IndriIndex::_documentTermLocationsFile [protected]

INT64 IndriIndex::_estimatePoint [protected]

std::vector<indri::index::FieldData*> IndriIndex::_fieldData [protected]

std::map<const char*, int, less_string> IndriIndex::_fieldLookup [protected]

INT64 IndriIndex::_lastCacheFlush [protected]

number of terms in the index when we should next check on flushing the inverted lists

size_t IndriIndex::_lengthBufferSize [protected]

size_t IndriIndex::_listsSize [protected]

size_t IndriIndex::_memorySize [protected]

float IndriIndex::_queryProportion [protected]

bool IndriIndex::_readOnly [protected]

greedy_vector<indri::index::TermData*> IndriIndex::_seenTerms [protected]

std::vector<File*> IndriIndex::_segments [protected]

size_t IndriIndex::_statisticsBufferSize [protected]

size_t IndriIndex::_termCacheSize [protected]

size_t IndriIndex::_termDataSize [protected]

Keyfile IndriIndex::_termDataStore [protected]

HashTable<int, indri::index::TermData*>* IndriIndex::_termDataTable [protected]

indri::index::TermListBuilder IndriIndex::_termList [protected]

Buffer IndriIndex::_termListBuffer [protected]

KeyfileWordMap IndriIndex::_termMap [protected]

bool IndriIndex::_writingDocTermLists [protected]

the prefix name

The documentation for this class was generated from the following files:

Generated on Wed Nov 3 12:59:39 2004 for Lemur Toolkit by

1.2.18


Public Methods
	IndriIndex (size_t memorySize=INDRI_DEFAULT_MEMORY_SIZE, float queryProportion=INDRI_DEFAULT_QUERY_PROPORTION)
	~IndriIndex ()
void	setName (const std::string &prefix)
	sets the name for this index
DOCID_T	addDocument (struct ParsedDocument *document)
DOCID_T	addDocument (const char documentName, const greedy_vector< char > &words, const greedy_vector< TagExtent > &tagExtents)
DocInfoList *	docInfoList (TERMID_T termID) const
	doc entries in a term index, See also: DocList
DocPositionInfoList *	docPositionInfoList (TERMID_T termID)
	doc entries in a term index with positions
indri::index::DocListFrequencyIterator *	docFrequencyInfoList (TERMID_T termID)
	doc entries in a term index without positions
TermInfoList *	termInfoList (DOCID_T docID) const
	word entries in a document index (bag of words), See also: TermList
TermInfoList *	termInfoListSeq (DOCID_T docID) const
	word entries in a document index (sequence of words), See also: TermList
indri::index::TermListBuilder *	termPositionList (DOCID_T docID)
	internal IndriIndex term list representation
indri::index::FieldListIterator *	fieldPositionListIterator (int fieldID)
	field list
Open index
bool	open (const std::string &indexName)
	Open previously created Index with given prefix.
bool	open (const char *indexName)
	Open previously created Index with given prefix.
bool	openRead (const std::string &indexName)
	Open previously created Index with given prefix in read only mode.
bool	create (const std::string &indexName)
	Create a new index with the given prefix.
bool	create (const std::string &indexName, const std::vector< FieldDescription > &fields)
	Create a new index with the given prefix and tag set.
void	close ()
	Close the index.
Spelling and index conversion
TERMID_T	term (const TERM_T &word) const
	Convert a term spelling to a termID.
const TERM_T	term (TERMID_T termID) const
	Convert a termID to its spelling.
DOCID_T	document (const EXDOCID_T &docIDStr) const
	Convert a spelling to docID.
const EXDOCID_T	document (DOCID_T docID) const
	Convert a docID to its spelling.
const char *	field (int fieldID)
	Convert a fieldID to its name.
int	field (const char *fieldName)
	Convert a field name to its fieldID.
int	field (const std::string &fieldName)
	Convert a field name to its fieldID.
Summary counts
COUNT_T	docCount () const
	Total count (i.e., number) of documents in collection.
COUNT_T	termCountUnique () const
	Total count of unique terms in collection.
INT64	termCount (TERMID_T termID) const
	Total counts of a term in collection.
INT64	termCount () const
	Total counts of all terms in collection.
INT64	fieldTermCount (int fieldID, TERMID_T termID) const
	Total counts of a term in a field.
INT64	fieldTermCount (int fieldID) const
	Total counts of all terms in a field.
INT64	fieldDocCount (int fieldID) const
	Total count of documents that contain a given field.
INT64	fieldDocCount (int fieldID, TERMID_T termID) const
	Total count of documents that contain a given term in a given field.
double	docLengthAvg () const
	Average document length.
COUNT_T	docCount (TERMID_T termID) const
	Total counts of doc with a given term.
COUNT_T	docIndexedLength (DOCID_T documentID) const
	return indexed length of the document
COUNT_T	docLength (DOCID_T documentID) const
	return length of the document
int	termMaxDocumentFrequency (TERMID_T termID)
	Maximum number of times this term is in any documents.
int	termMinDocumentLength (TERMID_T termID)
	Minimum length of any document containing this term.
double	termMaxDocumentFraction (TERMID_T termID)
	Argmax over documents of (termCount/documentLength).
int	maxDocumentLength ()
	Maximum length of any document in the corpus.
Protected Methods
void	_writeCache ()
void	_writeAndMerge ()
void	_writeBatchSegment ()
void	_mergeBatch ()
void	_mergeBatchSegments (int start, int end, int newNumber, bool finalMerge)
void	_mergeBatchTermLists (const std::vector< int > &segmentMapping)
void	_writeIncrementalSegment ()
void	_mergeIncrementalSegments ()
void	_readTermMapping (greedy_vector< int > &mapping, int segment, int secondSegment)
void	_openMergeFiles (int startSegment, int endSegment, std::vector< File * > &listFiles, std::vector< File * > &statsFiles, std::vector< File * > &mappingFiles, std::vector< WriteBuffer * > &mappingBuffers, std::vector< ReadBuffer * > &statsBuffers, std::vector< indri::index::DocListFileIterator * > &listIterators, std::vector< char * > &terms, std::vector< indri::index::TermData * > &termDatas, bool finalMerge)
void	_openDBs ()
void	_openReadOnlyDBs ()
void	_openSegments ()
void	_createDBs ()
void	_createFields (const std::vector< FieldDescription > &fieldNames)
void	_closeFields ()
indri::index::DocumentData	fetchDocumentData (int key) const
int	fetchDocumentLength (int key) const
void	_updateTermlist (TERMID_T termID, int position)
int	_updateTermData (int documentLength)
size_t	_cacheSize ()
void	_computeMemoryBounds (size_t memorySize, float queryProportion)
void	_resetEstimatePoint ()
indri::index::TermData *	_createTermData ()
indri::index::TermData *	_fetchTermData (TERMID_T termID)
indri::index::TermData *	_lookupTermData (TERMID_T termID)
void	_cleanCache ()
void	_deleteTermData (indri::index::TermData *termData)
size_t	_sizeTermData ()
void	_clearTermData ()
void	_clearTermCache ()
void	_storeTermCache (const char term, TERMID_T termID, indri::index::TermData &termData)
void	_flushTermStatistics (TERMID_T termID, const indri::index::TermFieldStatistics &statistics)
void	_addTermDataToBuilder (indri::index::DocListDiskBuilder &builder, indri::index::DocListFileIterator &iterator, int writingID, int readingID)
void	_addOpenTags (greedy_vector< indri::index::FieldExtent > &indexedTags, greedy_vector< indri::index::FieldExtent > &openTags, const greedy_vector< TagExtent > &extents, unsigned int &extentIndex, unsigned int position)
void	_removeClosedTags (greedy_vector< indri::index::FieldExtent > &tags, unsigned int position)
void	_lookupTerm (const char term, TERMID_T &termID, indri::index::TermData &termData)
void	_finishDocument (greedy_vector< indri::index::TermFieldStatistics * > &seenStatistics)
void	_writeDocumentTermList (File::offset_type &offset, int &byteLength, DOCID_T documentID, int documentLength, indri::index::TermListBuilder &locatedTerms)
void	_writeDocumentStatistics (File::offset_type offset, int byteLength, int indexedLength, int totalLength, int uniqueTerms)
void	_handleCache ()
int	_lookupTag (const char *tag)
bool	_readTermData (TERMID_T &termID, char termBuffer, indri::index::TermData termData, ReadBuffer *termDataFile)
void	_incrementalWriteTermData (TERMID_T termID, indri::index::TermData *termData)
void	_batchWriteTermData (TERMID_T termID, indri::index::TermData termData, WriteBuffer file)
int	_compressTermData (char buffer, int size, indri::index::TermData termData)
void	_decompressTermData (const char buffer, int size, indri::index::TermData termData)
void	_writeParameters (const std::string &fileName)
bool	_readParameters (const std::string &fileName)
void	_openDocumentFiles ()
std::string	_buildFileName (const char *suffix)
std::string	_buildFileName (const char *suffix, int index)
Protected Attributes
bool	_readOnly
indri::index::CorpusStatistics	_corpusStatistics
std::vector< indri::index::FieldData * >	_fieldData
std::map< const char *, int, less_string >	_fieldLookup
std::string	_baseName
bool	_writingDocTermLists
	the prefix name
Keyfile	_termDataStore
KeyfileWordMap	_documentMap
KeyfileWordMap	_termMap
File *	_documentStatisticsFile
File	_documentLengthFile
std::vector< File * >	_segments
int	_batchSegmentCount
File *	_documentTermLocationsFile
indri::index::TermListBuilder	_termList
Buffer	_termListBuffer
greedy_vector< indri::index::TermData * >	_seenTerms
HashTable< int, indri::index::TermData * > *	_termDataTable
HashTable< const char , term_cache_entry > *	_cache
	in memory storage of data relating to terms -- partial inverted lists and statistics
ReadBuffer *	_documentStatisticsBuffer
ReadBuffer *	_documentLengthBuffer
size_t	_listsSize
size_t	_memorySize
size_t	_termDataSize
size_t	_termCacheSize
size_t	_statisticsBufferSize
size_t	_lengthBufferSize
float	_queryProportion
bool	_batchBuild
INT64	_estimatePoint
INT64	_lastCacheFlush
	number of terms in the index when we should next check on flushing the inverted lists