X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java diff --git a/lucene-java-3.4.0/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java b/lucene-java-3.4.0/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java deleted file mode 100644 index b6a250c..0000000 --- a/lucene-java-3.4.0/lucene/src/test/org/apache/lucene/index/TestTermVectorsWriter.java +++ /dev/null @@ -1,475 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.io.StringReader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.CachingTokenFilter; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.TeeSinkTokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.MockDirectoryWrapper; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.LuceneTestCase; - -/** tests for writing term vectors */ -public class TestTermVectorsWriter extends LuceneTestCase { - // LUCENE-1442 - public void testDoubleOffsetCounting() throws Exception { - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(random))); - Document doc = new Document(); - Field f = newField("field", "abcd", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f); - doc.add(f); - Field f2 = newField("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f2); - doc.add(f); - w.addDocument(doc); - w.close(); - - IndexReader r = IndexReader.open(dir, true); - TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); - - // Token "" occurred once - assertEquals(1, termOffsets.length); - assertEquals(8, termOffsets[0].getStartOffset()); - assertEquals(8, termOffsets[0].getEndOffset()); - - // Token "abcd" occurred three times - termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(1); - assertEquals(3, termOffsets.length); - assertEquals(0, termOffsets[0].getStartOffset()); - assertEquals(4, termOffsets[0].getEndOffset()); - assertEquals(4, termOffsets[1].getStartOffset()); - assertEquals(8, termOffsets[1].getEndOffset()); - assertEquals(8, termOffsets[2].getStartOffset()); - assertEquals(12, termOffsets[2].getEndOffset()); - r.close(); - dir.close(); - } - - // LUCENE-1442 - public void testDoubleOffsetCounting2() throws Exception { - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); - Document doc = new Document(); - Field f = newField("field", "abcd", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f); - doc.add(f); - w.addDocument(doc); - w.close(); - - IndexReader r = IndexReader.open(dir, true); - TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); - assertEquals(2, termOffsets.length); - assertEquals(0, termOffsets[0].getStartOffset()); - assertEquals(4, termOffsets[0].getEndOffset()); - assertEquals(5, termOffsets[1].getStartOffset()); - assertEquals(9, termOffsets[1].getEndOffset()); - r.close(); - dir.close(); - } - - // LUCENE-1448 - public void testEndOffsetPositionCharAnalyzer() throws Exception { - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); - Document doc = new Document(); - Field f = newField("field", "abcd ", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f); - doc.add(f); - w.addDocument(doc); - w.close(); - - IndexReader r = IndexReader.open(dir, true); - TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); - assertEquals(2, termOffsets.length); - assertEquals(0, termOffsets[0].getStartOffset()); - assertEquals(4, termOffsets[0].getEndOffset()); - assertEquals(8, termOffsets[1].getStartOffset()); - assertEquals(12, termOffsets[1].getEndOffset()); - r.close(); - dir.close(); - } - - // LUCENE-1448 - public void testEndOffsetPositionWithCachingTokenFilter() throws Exception { - Directory dir = newDirectory(); - Analyzer analyzer = new MockAnalyzer(random); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); - Document doc = new Document(); - TokenStream stream = analyzer.tokenStream("field", new StringReader("abcd ")); - stream.reset(); // TODO: wierd to reset before wrapping with CachingTokenFilter... correct? - stream = new CachingTokenFilter(stream); - Field f = new Field("field", stream, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f); - doc.add(f); - w.addDocument(doc); - w.close(); - - IndexReader r = IndexReader.open(dir, true); - TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); - assertEquals(2, termOffsets.length); - assertEquals(0, termOffsets[0].getStartOffset()); - assertEquals(4, termOffsets[0].getEndOffset()); - assertEquals(8, termOffsets[1].getStartOffset()); - assertEquals(12, termOffsets[1].getEndOffset()); - r.close(); - dir.close(); - } - - // LUCENE-1448 - public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception { - MockDirectoryWrapper dir = newDirectory(); - Analyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); - IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); - Document doc = new Document(); - TeeSinkTokenFilter tee = new TeeSinkTokenFilter(analyzer.tokenStream("field", new StringReader("abcd "))); - TokenStream sink = tee.newSinkTokenStream(); - Field f1 = new Field("field", tee, Field.TermVector.WITH_POSITIONS_OFFSETS); - Field f2 = new Field("field", sink, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f1); - doc.add(f2); - w.addDocument(doc); - w.close(); - - IndexReader r = IndexReader.open(dir, true); - TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); - assertEquals(2, termOffsets.length); - assertEquals(0, termOffsets[0].getStartOffset()); - assertEquals(4, termOffsets[0].getEndOffset()); - assertEquals(8, termOffsets[1].getStartOffset()); - assertEquals(12, termOffsets[1].getEndOffset()); - r.close(); - dir.close(); - } - - // LUCENE-1448 - public void testEndOffsetPositionStopFilter() throws Exception { - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); - Document doc = new Document(); - Field f = newField("field", "abcd the", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f); - doc.add(f); - w.addDocument(doc); - w.close(); - - IndexReader r = IndexReader.open(dir, true); - TermVectorOffsetInfo[] termOffsets = ((TermPositionVector) r.getTermFreqVector(0, "field")).getOffsets(0); - assertEquals(2, termOffsets.length); - assertEquals(0, termOffsets[0].getStartOffset()); - assertEquals(4, termOffsets[0].getEndOffset()); - assertEquals(9, termOffsets[1].getStartOffset()); - assertEquals(13, termOffsets[1].getEndOffset()); - r.close(); - dir.close(); - } - - // LUCENE-1448 - public void testEndOffsetPositionStandard() throws Exception { - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(random))); - Document doc = new Document(); - Field f = newField("field", "abcd the ", Field.Store.NO, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - Field f2 = newField("field", "crunch man", Field.Store.NO, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f); - doc.add(f2); - w.addDocument(doc); - w.close(); - - IndexReader r = IndexReader.open(dir, true); - TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); - TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0); - assertEquals(1, termOffsets.length); - assertEquals(0, termOffsets[0].getStartOffset()); - assertEquals(4, termOffsets[0].getEndOffset()); - termOffsets = tpv.getOffsets(1); - assertEquals(11, termOffsets[0].getStartOffset()); - assertEquals(17, termOffsets[0].getEndOffset()); - termOffsets = tpv.getOffsets(2); - assertEquals(18, termOffsets[0].getStartOffset()); - assertEquals(21, termOffsets[0].getEndOffset()); - r.close(); - dir.close(); - } - - // LUCENE-1448 - public void testEndOffsetPositionStandardEmptyField() throws Exception { - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(random))); - Document doc = new Document(); - Field f = newField("field", "", Field.Store.NO, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - Field f2 = newField("field", "crunch man", Field.Store.NO, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f); - doc.add(f2); - w.addDocument(doc); - w.close(); - - IndexReader r = IndexReader.open(dir, true); - TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); - TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0); - assertEquals(1, termOffsets.length); - assertEquals(1, termOffsets[0].getStartOffset()); - assertEquals(7, termOffsets[0].getEndOffset()); - termOffsets = tpv.getOffsets(1); - assertEquals(8, termOffsets[0].getStartOffset()); - assertEquals(11, termOffsets[0].getEndOffset()); - r.close(); - dir.close(); - } - - // LUCENE-1448 - public void testEndOffsetPositionStandardEmptyField2() throws Exception { - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(random))); - Document doc = new Document(); - - Field f = newField("field", "abcd", Field.Store.NO, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f); - doc.add(newField("field", "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - - Field f2 = newField("field", "crunch", Field.Store.NO, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - doc.add(f2); - - w.addDocument(doc); - w.close(); - - IndexReader r = IndexReader.open(dir, true); - TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0, "field")); - TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0); - assertEquals(1, termOffsets.length); - assertEquals(0, termOffsets[0].getStartOffset()); - assertEquals(4, termOffsets[0].getEndOffset()); - termOffsets = tpv.getOffsets(1); - assertEquals(6, termOffsets[0].getStartOffset()); - assertEquals(12, termOffsets[0].getEndOffset()); - r.close(); - dir.close(); - } - - // LUCENE-1168 - public void testTermVectorCorruption() throws IOException { - - Directory dir = newDirectory(); - for(int iter=0;iter<2;iter++) { - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(random)) - .setMaxBufferedDocs(2).setRAMBufferSizeMB( - IndexWriterConfig.DISABLE_AUTO_FLUSH).setMergeScheduler( - new SerialMergeScheduler()).setMergePolicy( - new LogDocMergePolicy())); - - Document document = new Document(); - - Field storedField = newField("stored", "stored", Field.Store.YES, - Field.Index.NO); - document.add(storedField); - writer.addDocument(document); - writer.addDocument(document); - - document = new Document(); - document.add(storedField); - Field termVectorField = newField("termVector", "termVector", - Field.Store.NO, Field.Index.NOT_ANALYZED, - Field.TermVector.WITH_POSITIONS_OFFSETS); - - document.add(termVectorField); - writer.addDocument(document); - writer.optimize(); - writer.close(); - - IndexReader reader = IndexReader.open(dir, true); - for(int i=0;i