pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / queries / src / test / org / apache / lucene / search / regex / TestSpanRegexQuery.java
diff --git a/lucene-java-3.5.0/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java b/lucene-java-3.5.0/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java

new file mode 100644 (file)

index 0000000..13e256e
--- /dev/null
+++ b/lucene-java-3.5.0/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestSpanRegexQuery.java
@@ -0,0 +1,198 @@
+package org.apache.lucene.search.regex;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MultiSearcher;
+import org.apache.lucene.search.spans.SpanFirstQuery;
+import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestSpanRegexQuery extends LuceneTestCase {
+  
+  Directory indexStoreA;
+  Directory indexStoreB;
+  
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    indexStoreA = newDirectory();
+    indexStoreB = newDirectory();
+  }
+  
+  @Override
+  public void tearDown() throws Exception {
+    indexStoreA.close();
+    indexStoreB.close();
+    super.tearDown();
+  }
+  
+  public void testSpanRegex() throws Exception {
+    Directory directory = newDirectory();
+    IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(
+        TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+    Document doc = new Document();
+    // doc.add(newField("field", "the quick brown fox jumps over the lazy dog",
+    // Field.Store.NO, Field.Index.ANALYZED));
+    // writer.addDocument(doc);
+    // doc = new Document();
+    doc.add(newField("field", "auto update", Field.Store.NO,
+        Field.Index.ANALYZED));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(newField("field", "first auto update", Field.Store.NO,
+        Field.Index.ANALYZED));
+    writer.addDocument(doc);
+    writer.optimize();
+    writer.close();
+
+    IndexSearcher searcher = new IndexSearcher(directory, true);
+    SpanQuery srq = new SpanMultiTermQueryWrapper<RegexQuery>(new RegexQuery(new Term("field", "aut.*")));
+    SpanFirstQuery sfq = new SpanFirstQuery(srq, 1);
+    // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6,
+    // true);
+    int numHits = searcher.search(sfq, null, 1000).totalHits;
+    assertEquals(1, numHits);
+    searcher.close();
+    directory.close();
+  }
+  
+  public void testSpanRegexBug() throws CorruptIndexException, IOException {
+    createRAMDirectories();
+
+    SpanQuery srq = new SpanMultiTermQueryWrapper<RegexQuery>(new RegexQuery(new Term("field", "a.*")));
+    SpanQuery stq = new SpanMultiTermQueryWrapper<RegexQuery>(new RegexQuery(new Term("field", "b.*")));
+    SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { srq, stq }, 6,
+        true);
+
+    // 1. Search the same store which works
+    IndexSearcher[] arrSearcher = new IndexSearcher[2];
+    arrSearcher[0] = new IndexSearcher(indexStoreA, true);
+    arrSearcher[1] = new IndexSearcher(indexStoreB, true);
+    MultiSearcher searcher = new MultiSearcher(arrSearcher);
+    int numHits = searcher.search(query, null, 1000).totalHits;
+    arrSearcher[0].close();
+    arrSearcher[1].close();
+
+    // Will fail here
+    // We expect 2 but only one matched
+    // The rewriter function only write it once on the first IndexSearcher
+    // So it's using term: a1 b1 to search on the second IndexSearcher
+    // As a result, it won't match the document in the second IndexSearcher
+    assertEquals(2, numHits);
+  }
+  
+  /** remove in lucene 4.0 */
+  @Deprecated
+  public void testSpanRegexOld() throws Exception {
+    Directory directory = newDirectory();
+    IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(
+        TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+    Document doc = new Document();
+    // doc.add(newField("field", "the quick brown fox jumps over the lazy dog",
+    // Field.Store.NO, Field.Index.ANALYZED));
+    // writer.addDocument(doc);
+    // doc = new Document();
+    doc.add(newField("field", "auto update", Field.Store.NO,
+        Field.Index.ANALYZED));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(newField("field", "first auto update", Field.Store.NO,
+        Field.Index.ANALYZED));
+    writer.addDocument(doc);
+    writer.optimize();
+    writer.close();
+
+    IndexSearcher searcher = new IndexSearcher(directory, true);
+    SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "aut.*"));
+    SpanFirstQuery sfq = new SpanFirstQuery(srq, 1);
+    // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6,
+    // true);
+    int numHits = searcher.search(sfq, null, 1000).totalHits;
+    assertEquals(1, numHits);
+    searcher.close();
+    directory.close();
+  }
+
+  /** remove in lucene 4.0 */
+  @Deprecated
+  public void testSpanRegexBugOld() throws CorruptIndexException, IOException {
+    createRAMDirectories();
+
+    SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "a.*"));
+    SpanRegexQuery stq = new SpanRegexQuery(new Term("field", "b.*"));
+    SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { srq, stq }, 6,
+        true);
+
+    // 1. Search the same store which works
+    IndexSearcher[] arrSearcher = new IndexSearcher[2];
+    arrSearcher[0] = new IndexSearcher(indexStoreA, true);
+    arrSearcher[1] = new IndexSearcher(indexStoreB, true);
+    MultiSearcher searcher = new MultiSearcher(arrSearcher);
+    int numHits = searcher.search(query, null, 1000).totalHits;
+    arrSearcher[0].close();
+    arrSearcher[1].close();
+
+    // Will fail here
+    // We expect 2 but only one matched
+    // The rewriter function only write it once on the first IndexSearcher
+    // So it's using term: a1 b1 to search on the second IndexSearcher
+    // As a result, it won't match the document in the second IndexSearcher
+    assertEquals(2, numHits);
+  }
+
+  private void createRAMDirectories() throws CorruptIndexException,
+      LockObtainFailedException, IOException {
+    // creating a document to store
+    Document lDoc = new Document();
+    lDoc.add(newField("field", "a1 b1", Field.Store.NO,
+        Field.Index.ANALYZED_NO_NORMS));
+
+    // creating a document to store
+    Document lDoc2 = new Document();
+    lDoc2.add(newField("field", "a2 b2", Field.Store.NO,
+        Field.Index.ANALYZED_NO_NORMS));
+
+    // creating first index writer
+    IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig(
+        TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
+    writerA.addDocument(lDoc);
+    writerA.optimize();
+    writerA.close();
+
+    // creating second index writer
+    IndexWriter writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(
+        TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE));
+    writerB.addDocument(lDoc2);
+    writerB.optimize();
+    writerB.close();
+  }
+}