X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java diff --git a/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java new file mode 100644 index 0000000..a8a86f6 --- /dev/null +++ b/lucene-java-3.5.0/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java @@ -0,0 +1,632 @@ +package org.apache.lucene.search.spans; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.LowerCaseTokenizer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Payload; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CheckHits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryUtils; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.English; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Tests basic search capabilities. + * + *
Uses a collection of 1000 documents, each the english rendition of their + * document number. For example, the document numbered 333 has text "three + * hundred thirty three". + * + *
Tests are each a single query, and its hits are checked to ensure that
+ * all and only the correct documents are returned, thus providing end-to-end
+ * testing of the indexing and search code.
+ *
+ */
+public class TestBasics extends LuceneTestCase {
+ private static IndexSearcher searcher;
+ private static IndexReader reader;
+ private static Directory directory;
+
+ static final class SimplePayloadFilter extends TokenFilter {
+ String fieldName;
+ int pos;
+ final PayloadAttribute payloadAttr;
+ final CharTermAttribute termAttr;
+
+ public SimplePayloadFilter(TokenStream input, String fieldName) {
+ super(input);
+ this.fieldName = fieldName;
+ pos = 0;
+ payloadAttr = input.addAttribute(PayloadAttribute.class);
+ termAttr = input.addAttribute(CharTermAttribute.class);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (input.incrementToken()) {
+ payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes()));
+ pos++;
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ pos = 0;
+ }
+ }
+
+ static final Analyzer simplePayloadAnalyzer = new Analyzer() {
+
+ @Override
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new SimplePayloadFilter(new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader), fieldName);
+ }
+
+ };
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ directory = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random, directory,
+ newIndexWriterConfig(TEST_VERSION_CURRENT, simplePayloadAnalyzer)
+ .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000)).setMergePolicy(newLogMergePolicy()));
+ //writer.infoStream = System.out;
+ for (int i = 0; i < 2000; i++) {
+ Document doc = new Document();
+ doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
+ writer.addDocument(doc);
+ }
+ reader = writer.getReader();
+ searcher = newSearcher(reader);
+ writer.close();
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ searcher.close();
+ reader.close();
+ directory.close();
+ searcher = null;
+ reader = null;
+ directory = null;
+ }
+
+ @Test
+ public void testTerm() throws Exception {
+ Query query = new TermQuery(new Term("field", "seventy"));
+ checkHits(query, new int[]
+ {70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 270, 271, 272, 273, 274, 275, 276, 277, 278,
+ 279, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 470, 471,
+ 472, 473, 474, 475, 476, 477, 478, 479, 570, 571, 572, 573, 574,
+ 575, 576, 577, 578, 579, 670, 671, 672, 673, 674, 675, 676, 677,
+ 678, 679, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 870,
+ 871, 872, 873, 874, 875, 876, 877, 878, 879, 970, 971, 972, 973,
+ 974, 975, 976, 977, 978, 979, 1070, 1071, 1072, 1073, 1074, 1075,
+ 1076, 1077, 1078, 1079, 1170, 1171, 1172, 1173, 1174, 1175, 1176,
+ 1177, 1178, 1179, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277,
+ 1278, 1279, 1370, 1371, 1372, 1373, 1374, 1375, 1376, 1377, 1378,
+ 1379, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479,
+ 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1670,
+ 1671, 1672, 1673, 1674, 1675, 1676, 1677, 1678, 1679, 1770, 1771,
+ 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1870, 1871, 1872,
+ 1873, 1874, 1875, 1876, 1877,
+ 1878, 1879, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978,
+ 1979});
+ }
+
+ @Test
+ public void testTerm2() throws Exception {
+ Query query = new TermQuery(new Term("field", "seventish"));
+ checkHits(query, new int[] {});
+ }
+
+ @Test
+ public void testPhrase() throws Exception {
+ PhraseQuery query = new PhraseQuery();
+ query.add(new Term("field", "seventy"));
+ query.add(new Term("field", "seven"));
+ checkHits(query, new int[]
+ {77, 177, 277, 377, 477, 577, 677, 777, 877,
+ 977, 1077, 1177, 1277, 1377, 1477, 1577, 1677, 1777, 1877, 1977});
+ }
+
+ @Test
+ public void testPhrase2() throws Exception {
+ PhraseQuery query = new PhraseQuery();
+ query.add(new Term("field", "seventish"));
+ query.add(new Term("field", "sevenon"));
+ checkHits(query, new int[] {});
+ }
+
+ @Test
+ public void testBoolean() throws Exception {
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term("field", "seventy")), BooleanClause.Occur.MUST);
+ query.add(new TermQuery(new Term("field", "seven")), BooleanClause.Occur.MUST);
+ checkHits(query, new int[]
+ {77, 177, 277, 377, 477, 577, 677, 770, 771, 772, 773, 774, 775, 776, 777,
+ 778, 779, 877, 977, 1077, 1177, 1277, 1377, 1477, 1577, 1677,
+ 1770, 1771, 1772, 1773, 1774, 1775, 1776, 1777, 1778, 1779, 1877,
+ 1977});
+ }
+
+ @Test
+ public void testBoolean2() throws Exception {
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term("field", "sevento")), BooleanClause.Occur.MUST);
+ query.add(new TermQuery(new Term("field", "sevenly")), BooleanClause.Occur.MUST);
+ checkHits(query, new int[] {});
+ }
+
+ @Test
+ public void testSpanNearExact() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "seventy"));
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "seven"));
+ SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
+ 0, true);
+ checkHits(query, new int[]
+ {77, 177, 277, 377, 477, 577, 677, 777, 877, 977, 1077, 1177, 1277, 1377, 1477, 1577, 1677, 1777, 1877, 1977});
+
+ assertTrue(searcher.explain(query, 77).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 977).getValue() > 0.0f);
+
+ QueryUtils.check(term1);
+ QueryUtils.check(term2);
+ QueryUtils.checkUnequal(term1,term2);
+ }
+
+ public void testSpanTermQuery() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "seventy"));
+ checkHits(term1, new int[]
+ { 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 170,
+ 171, 172, 173, 174, 175, 176, 177, 178, 179, 270, 271, 272, 273, 274,
+ 275, 276, 277, 278, 279, 370, 371, 372, 373, 374, 375, 376, 377, 378,
+ 379, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 570, 571, 572,
+ 573, 574, 575, 576, 577, 578, 579, 670, 671, 672, 673, 674, 675, 676,
+ 677, 678, 679, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 870,
+ 871, 872, 873, 874, 875, 876, 877, 878, 879, 970, 971, 972, 973, 974,
+ 975, 976, 977, 978, 979, 1070, 1071, 1072, 1073, 1074, 1075, 1076,
+ 1077, 1078, 1079, 1170, 1270, 1370, 1470, 1570, 1670, 1770, 1870, 1970,
+ 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1271, 1272, 1273,
+ 1274, 1275, 1276, 1277, 1278, 1279, 1371, 1372, 1373, 1374, 1375, 1376,
+ 1377, 1378, 1379, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479,
+ 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1671, 1672, 1673,
+ 1674, 1675, 1676, 1677, 1678, 1679, 1771, 1772, 1773, 1774, 1775, 1776,
+ 1777, 1778, 1779, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879,
+ 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979 });
+ }
+
+ @Test
+ public void testSpanNearUnordered() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "nine"));
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "six"));
+ SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
+ 4, false);
+
+ checkHits(query, new int[]
+ {609, 629, 639, 649, 659, 669, 679, 689, 699, 906, 926, 936, 946, 956,
+ 966, 976, 986, 996, 1609, 1629, 1639, 1649, 1659, 1669,
+ 1679, 1689, 1699, 1906, 1926, 1936, 1946, 1956, 1966, 1976, 1986,
+ 1996});
+ }
+
+ @Test
+ public void testSpanNearOrdered() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "nine"));
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "six"));
+ SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {term1, term2},
+ 4, true);
+ checkHits(query, new int[]
+ {906, 926, 936, 946, 956, 966, 976, 986, 996, 1906, 1926, 1936, 1946, 1956, 1966, 1976, 1986, 1996});
+ }
+
+ @Test
+ public void testSpanNot() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one"));
+ SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
+ 4, true);
+ SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
+ SpanNotQuery query = new SpanNotQuery(near, term3);
+
+ checkHits(query, new int[]
+ {801, 821, 831, 851, 861, 871, 881, 891, 1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
+
+ assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
+ }
+
+ @Test
+ public void testSpanWithMultipleNotSingle() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one"));
+ SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
+ 4, true);
+ SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
+
+ SpanOrQuery or = new SpanOrQuery(term3);
+
+ SpanNotQuery query = new SpanNotQuery(near, or);
+
+ checkHits(query, new int[]
+ {801, 821, 831, 851, 861, 871, 881, 891,
+ 1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
+
+ assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
+ }
+
+ @Test
+ public void testSpanWithMultipleNotMany() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one"));
+ SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
+ 4, true);
+ SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
+ SpanTermQuery term4 = new SpanTermQuery(new Term("field", "sixty"));
+ SpanTermQuery term5 = new SpanTermQuery(new Term("field", "eighty"));
+
+ SpanOrQuery or = new SpanOrQuery(term3, term4, term5);
+
+ SpanNotQuery query = new SpanNotQuery(near, or);
+
+ checkHits(query, new int[]
+ {801, 821, 831, 851, 871, 891, 1801, 1821, 1831, 1851, 1871, 1891});
+
+ assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
+ }
+
+ @Test
+ public void testNpeInSpanNearWithSpanNot() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one"));
+ SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
+ 4, true);
+ SpanTermQuery hun = new SpanTermQuery(new Term("field", "hundred"));
+ SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
+ SpanNearQuery exclude = new SpanNearQuery(new SpanQuery[] {hun, term3},
+ 1, true);
+
+ SpanNotQuery query = new SpanNotQuery(near, exclude);
+
+ checkHits(query, new int[]
+ {801, 821, 831, 851, 861, 871, 881, 891,
+ 1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
+
+ assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
+ }
+
+ @Test
+ public void testNpeInSpanNearInSpanFirstInSpanNot() throws Exception {
+ int n = 5;
+ SpanTermQuery hun = new SpanTermQuery(new Term("field", "hundred"));
+ SpanTermQuery term40 = new SpanTermQuery(new Term("field", "forty"));
+ SpanTermQuery term40c = (SpanTermQuery)term40.clone();
+
+ SpanFirstQuery include = new SpanFirstQuery(term40, n);
+ SpanNearQuery near = new SpanNearQuery(new SpanQuery[]{hun, term40c},
+ n-1, true);
+ SpanFirstQuery exclude = new SpanFirstQuery(near, n-1);
+ SpanNotQuery q = new SpanNotQuery(include, exclude);
+
+ checkHits(q, new int[]{40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048,
+ 1049, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1240, 1241, 1242, 1243, 1244,
+ 1245, 1246, 1247, 1248, 1249, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, 1349, 1440, 1441, 1442,
+ 1443, 1444, 1445, 1446, 1447, 1448, 1449, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1640,
+ 1641, 1642, 1643, 1644, 1645, 1646, 1647,
+ 1648, 1649, 1740, 1741, 1742, 1743, 1744, 1745, 1746, 1747, 1748, 1749, 1840, 1841, 1842, 1843, 1844, 1845, 1846,
+ 1847, 1848, 1849, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949});
+ }
+
+ @Test
+ public void testSpanFirst() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
+ SpanFirstQuery query = new SpanFirstQuery(term1, 1);
+
+ checkHits(query, new int[]
+ {5, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513,
+ 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527,
+ 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541,
+ 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555,
+ 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569,
+ 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583,
+ 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597,
+ 598, 599});
+
+ assertTrue(searcher.explain(query, 5).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 599).getValue() > 0.0f);
+
+ }
+
+ @Test
+ public void testSpanPositionRange() throws Exception {
+ SpanPositionRangeQuery query;
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
+ query = new SpanPositionRangeQuery(term1, 1, 2);
+ checkHits(query, new int[]
+ {25,35, 45, 55, 65, 75, 85, 95});
+ assertTrue(searcher.explain(query, 25).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 95).getValue() > 0.0f);
+
+ query = new SpanPositionRangeQuery(term1, 0, 1);
+ checkHits(query, new int[]
+ {5, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512,
+ 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525,
+ 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538,
+ 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551,
+ 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564,
+ 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577,
+ 578, 579, 580, 581, 582, 583, 584,
+ 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597,
+ 598, 599});
+
+ query = new SpanPositionRangeQuery(term1, 6, 7);
+ checkHits(query, new int[]{});
+ }
+
+ @Test
+ public void testSpanPayloadCheck() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
+ Payload pay = new Payload(("pos: " + 5).getBytes());
+ SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay.getData()));
+ checkHits(query, new int[]
+ {1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995});
+ assertTrue(searcher.explain(query, 1125).getValue() > 0.0f);
+
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred"));
+ SpanNearQuery snq;
+ SpanQuery[] clauses;
+ List Returns true iff there is such
+ * a match. Behaves as if written:
+ * boolean skipTo(int target) {
+ * do {
+ * if (!next())
+ * return false;
+ * } while (target > doc());
+ * return true;
+ * }
+ *
+ */
+ private boolean skipToAccoringToJavaDocs(Spans s, int target)
+ throws Exception {
+ do {
+ if (!s.next())
+ return false;
+ } while (target > s.doc());
+ return true;
+
+ }
+
+ private void checkHits(Query query, int[] results) throws IOException {
+ CheckHits.checkHits(random, query, "field", searcher, results);
+ }
+}