2 StandardAnalyzer, RAMDirectory, Document, Field, Version, \
3 IndexWriter, IndexReader, TermPositionVector, initVM
5 if __name__ == '__main__':
8 directory = RAMDirectory()
9 iwriter = IndexWriter(directory, StandardAnalyzer(Version.LUCENE_CURRENT),
10 True, IndexWriter.MaxFieldLength.LIMITED)
11 ts = ["this bernhard is the text to be index text",
12 "this claudia is the text to be index"]
15 doc.add(Field("fieldname", t,
16 Field.Store.YES, Field.Index.ANALYZED,
17 Field.TermVector.WITH_POSITIONS_OFFSETS))
18 iwriter.addDocument(doc)
22 ireader = IndexReader.open(directory, True)
23 tpv = TermPositionVector.cast_(ireader.getTermFreqVector(0, 'fieldname'))
25 for (t,f,i) in zip(tpv.getTerms(),tpv.getTermFrequencies(),xrange(100000)):
29 print ' pos: ' + str([p for p in tpv.getTermPositions(i)])
34 str(["%i-%i" % (o.getStartOffset(), o.getEndOffset())
35 for o in tpv.getOffsets(i)])