X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java diff --git a/lucene-java-3.5.0/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java b/lucene-java-3.5.0/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java new file mode 100644 index 0000000..13e256e --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java @@ -0,0 +1,198 @@ +package org.apache.lucene.search.regex; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MultiSearcher; +import org.apache.lucene.search.spans.SpanFirstQuery; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.LuceneTestCase; + +public class TestSpanRegexQuery extends LuceneTestCase { + + Directory indexStoreA; + Directory indexStoreB; + + @Override + public void setUp() throws Exception { + super.setUp(); + indexStoreA = newDirectory(); + indexStoreB = newDirectory(); + } + + @Override + public void tearDown() throws Exception { + indexStoreA.close(); + indexStoreB.close(); + super.tearDown(); + } + + public void testSpanRegex() throws Exception { + Directory directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + // doc.add(newField("field", "the quick brown fox jumps over the lazy dog", + // Field.Store.NO, Field.Index.ANALYZED)); + // writer.addDocument(doc); + // doc = new Document(); + doc.add(newField("field", "auto update", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newField("field", "first auto update", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.optimize(); + writer.close(); + + IndexSearcher searcher = new IndexSearcher(directory, true); + SpanQuery srq = new SpanMultiTermQueryWrapper(new RegexQuery(new Term("field", "aut.*"))); + SpanFirstQuery sfq = new SpanFirstQuery(srq, 1); + // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, + // true); + int numHits = searcher.search(sfq, null, 1000).totalHits; + assertEquals(1, numHits); + searcher.close(); + directory.close(); + } + + public void testSpanRegexBug() throws CorruptIndexException, IOException { + createRAMDirectories(); + + SpanQuery srq = new SpanMultiTermQueryWrapper(new RegexQuery(new Term("field", "a.*"))); + SpanQuery stq = new SpanMultiTermQueryWrapper(new RegexQuery(new Term("field", "b.*"))); + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { srq, stq }, 6, + true); + + // 1. Search the same store which works + IndexSearcher[] arrSearcher = new IndexSearcher[2]; + arrSearcher[0] = new IndexSearcher(indexStoreA, true); + arrSearcher[1] = new IndexSearcher(indexStoreB, true); + MultiSearcher searcher = new MultiSearcher(arrSearcher); + int numHits = searcher.search(query, null, 1000).totalHits; + arrSearcher[0].close(); + arrSearcher[1].close(); + + // Will fail here + // We expect 2 but only one matched + // The rewriter function only write it once on the first IndexSearcher + // So it's using term: a1 b1 to search on the second IndexSearcher + // As a result, it won't match the document in the second IndexSearcher + assertEquals(2, numHits); + } + + /** remove in lucene 4.0 */ + @Deprecated + public void testSpanRegexOld() throws Exception { + Directory directory = newDirectory(); + IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document doc = new Document(); + // doc.add(newField("field", "the quick brown fox jumps over the lazy dog", + // Field.Store.NO, Field.Index.ANALYZED)); + // writer.addDocument(doc); + // doc = new Document(); + doc.add(newField("field", "auto update", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + doc = new Document(); + doc.add(newField("field", "first auto update", Field.Store.NO, + Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.optimize(); + writer.close(); + + IndexSearcher searcher = new IndexSearcher(directory, true); + SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "aut.*")); + SpanFirstQuery sfq = new SpanFirstQuery(srq, 1); + // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6, + // true); + int numHits = searcher.search(sfq, null, 1000).totalHits; + assertEquals(1, numHits); + searcher.close(); + directory.close(); + } + + /** remove in lucene 4.0 */ + @Deprecated + public void testSpanRegexBugOld() throws CorruptIndexException, IOException { + createRAMDirectories(); + + SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "a.*")); + SpanRegexQuery stq = new SpanRegexQuery(new Term("field", "b.*")); + SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { srq, stq }, 6, + true); + + // 1. Search the same store which works + IndexSearcher[] arrSearcher = new IndexSearcher[2]; + arrSearcher[0] = new IndexSearcher(indexStoreA, true); + arrSearcher[1] = new IndexSearcher(indexStoreB, true); + MultiSearcher searcher = new MultiSearcher(arrSearcher); + int numHits = searcher.search(query, null, 1000).totalHits; + arrSearcher[0].close(); + arrSearcher[1].close(); + + // Will fail here + // We expect 2 but only one matched + // The rewriter function only write it once on the first IndexSearcher + // So it's using term: a1 b1 to search on the second IndexSearcher + // As a result, it won't match the document in the second IndexSearcher + assertEquals(2, numHits); + } + + private void createRAMDirectories() throws CorruptIndexException, + LockObtainFailedException, IOException { + // creating a document to store + Document lDoc = new Document(); + lDoc.add(newField("field", "a1 b1", Field.Store.NO, + Field.Index.ANALYZED_NO_NORMS)); + + // creating a document to store + Document lDoc2 = new Document(); + lDoc2.add(newField("field", "a2 b2", Field.Store.NO, + Field.Index.ANALYZED_NO_NORMS)); + + // creating first index writer + IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + writerA.addDocument(lDoc); + writerA.optimize(); + writerA.close(); + + // creating second index writer + IndexWriter writerB = new IndexWriter(indexStoreB, newIndexWriterConfig( + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); + writerB.addDocument(lDoc2); + writerB.optimize(); + writerB.close(); + } +}