X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java diff --git a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java b/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java deleted file mode 100644 index d2793e6..0000000 --- a/lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java +++ /dev/null @@ -1,543 +0,0 @@ -package org.apache.lucene.search.spans; - -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; -import java.util.Collection; -import java.util.HashSet; -import java.util.Set; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.LowerCaseTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Payload; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.DefaultSimilarity; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Similarity; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.payloads.PayloadHelper; -import org.apache.lucene.search.payloads.PayloadSpanUtil; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.LockObtainFailedException; -import org.apache.lucene.util.LuceneTestCase; - -public class TestPayloadSpans extends LuceneTestCase { - private IndexSearcher searcher; - private Similarity similarity = new DefaultSimilarity(); - protected IndexReader indexReader; - private IndexReader closeIndexReader; - private Directory directory; - - @Override - public void setUp() throws Exception { - super.setUp(); - PayloadHelper helper = new PayloadHelper(); - searcher = helper.setUp(random, similarity, 1000); - indexReader = searcher.getIndexReader(); - } - - public void testSpanTermQuery() throws Exception { - SpanTermQuery stq; - Spans spans; - stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "seventy")); - spans = stq.getSpans(indexReader); - assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 100, 1, 1, 1); - - stq = new SpanTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "seventy")); - spans = stq.getSpans(indexReader); - assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 100, 0, 0, 0); - } - - public void testSpanFirst() throws IOException { - - SpanQuery match; - SpanFirstQuery sfq; - match = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); - sfq = new SpanFirstQuery(match, 2); - Spans spans = sfq.getSpans(indexReader); - checkSpans(spans, 109, 1, 1, 1); - //Test more complicated subclause - SpanQuery[] clauses = new SpanQuery[2]; - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "hundred")); - match = new SpanNearQuery(clauses, 0, true); - sfq = new SpanFirstQuery(match, 2); - checkSpans(sfq.getSpans(indexReader), 100, 2, 1, 1); - - match = new SpanNearQuery(clauses, 0, false); - sfq = new SpanFirstQuery(match, 2); - checkSpans(sfq.getSpans(indexReader), 100, 2, 1, 1); - - } - - public void testSpanNot() throws Exception { - SpanQuery[] clauses = new SpanQuery[2]; - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three")); - SpanQuery spq = new SpanNearQuery(clauses, 5, true); - SpanNotQuery snq = new SpanNotQuery(spq, new SpanTermQuery(new Term(PayloadHelper.FIELD, "two"))); - - - - Directory directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); - - Document doc = new Document(); - doc.add(newField(PayloadHelper.FIELD, "one two three one four three", - Field.Store.YES, Field.Index.ANALYZED)); - writer.addDocument(doc); - IndexReader reader = writer.getReader(); - writer.close(); - - checkSpans(snq.getSpans(reader), 1,new int[]{2}); - reader.close(); - directory.close(); - } - - public void testNestedSpans() throws Exception { - SpanTermQuery stq; - Spans spans; - IndexSearcher searcher = getSearcher(); - stq = new SpanTermQuery(new Term(PayloadHelper.FIELD, "mark")); - spans = stq.getSpans(searcher.getIndexReader()); - assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 0, null); - - - SpanQuery[] clauses = new SpanQuery[3]; - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); - clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); - SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 12, false); - - spans = spanNearQuery.getSpans(searcher.getIndexReader()); - assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 2, new int[]{3,3}); - - - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); - clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); - - spanNearQuery = new SpanNearQuery(clauses, 6, true); - - - spans = spanNearQuery.getSpans(searcher.getIndexReader()); - assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 1, new int[]{3}); - - clauses = new SpanQuery[2]; - - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "xx")); - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "rr")); - - spanNearQuery = new SpanNearQuery(clauses, 6, true); - - // xx within 6 of rr - - SpanQuery[] clauses2 = new SpanQuery[2]; - - clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "yy")); - clauses2[1] = spanNearQuery; - - SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses2, 6, false); - - // yy within 6 of xx within 6 of rr - - spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); - assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 2, new int[]{3,3}); - searcher.close(); - closeIndexReader.close(); - directory.close(); - } - - public void testFirstClauseWithoutPayload() throws Exception { - Spans spans; - IndexSearcher searcher = getSearcher(); - - SpanQuery[] clauses = new SpanQuery[3]; - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nopayload")); - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "qq")); - clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ss")); - - SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 6, true); - - SpanQuery[] clauses2 = new SpanQuery[2]; - - clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "pp")); - clauses2[1] = spanNearQuery; - - SpanNearQuery snq = new SpanNearQuery(clauses2, 6, false); - - SpanQuery[] clauses3 = new SpanQuery[2]; - - clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "np")); - clauses3[1] = snq; - - SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); - - spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); - assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 1, new int[]{3}); - searcher.close(); - closeIndexReader.close(); - directory.close(); - } - - public void testHeavilyNestedSpanQuery() throws Exception { - Spans spans; - IndexSearcher searcher = getSearcher(); - - SpanQuery[] clauses = new SpanQuery[3]; - clauses[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "one")); - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "two")); - clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "three")); - - SpanNearQuery spanNearQuery = new SpanNearQuery(clauses, 5, true); - - clauses = new SpanQuery[3]; - clauses[0] = spanNearQuery; - clauses[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "five")); - clauses[2] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "six")); - - SpanNearQuery spanNearQuery2 = new SpanNearQuery(clauses, 6, true); - - SpanQuery[] clauses2 = new SpanQuery[2]; - clauses2[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "eleven")); - clauses2[1] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "ten")); - SpanNearQuery spanNearQuery3 = new SpanNearQuery(clauses2, 2, false); - - SpanQuery[] clauses3 = new SpanQuery[3]; - clauses3[0] = new SpanTermQuery(new Term(PayloadHelper.FIELD, "nine")); - clauses3[1] = spanNearQuery2; - clauses3[2] = spanNearQuery3; - - SpanNearQuery nestedSpanNearQuery = new SpanNearQuery(clauses3, 6, false); - - spans = nestedSpanNearQuery.getSpans(searcher.getIndexReader()); - assertTrue("spans is null and it shouldn't be", spans != null); - checkSpans(spans, 2, new int[]{8, 8}); - searcher.close(); - closeIndexReader.close(); - directory.close(); - } - - public void testShrinkToAfterShortestMatch() throws CorruptIndexException, - LockObtainFailedException, IOException { - Directory directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer())); - - Document doc = new Document(); - doc.add(new Field("content", new StringReader("a b c d e f g h i j a k"))); - writer.addDocument(doc); - - IndexReader reader = writer.getReader(); - IndexSearcher is = newSearcher(reader); - writer.close(); - - SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); - SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); - SpanQuery[] sqs = { stq1, stq2 }; - SpanNearQuery snq = new SpanNearQuery(sqs, 1, true); - Spans spans = snq.getSpans(is.getIndexReader()); - - TopDocs topDocs = is.search(snq, 1); - Set payloadSet = new HashSet(); - for (int i = 0; i < topDocs.scoreDocs.length; i++) { - while (spans.next()) { - Collection payloads = spans.getPayload(); - - for (final byte [] payload : payloads) { - payloadSet.add(new String(payload)); - } - } - } - assertEquals(2, payloadSet.size()); - assertTrue(payloadSet.contains("a:Noise:10")); - assertTrue(payloadSet.contains("k:Noise:11")); - is.close(); - reader.close(); - directory.close(); - } - - public void testShrinkToAfterShortestMatch2() throws CorruptIndexException, - LockObtainFailedException, IOException { - Directory directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer())); - - Document doc = new Document(); - doc.add(new Field("content", new StringReader("a b a d k f a h i k a k"))); - writer.addDocument(doc); - IndexReader reader = writer.getReader(); - IndexSearcher is = newSearcher(reader); - writer.close(); - - SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); - SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); - SpanQuery[] sqs = { stq1, stq2 }; - SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); - Spans spans = snq.getSpans(is.getIndexReader()); - - TopDocs topDocs = is.search(snq, 1); - Set payloadSet = new HashSet(); - for (int i = 0; i < topDocs.scoreDocs.length; i++) { - while (spans.next()) { - Collection payloads = spans.getPayload(); - for (final byte[] payload : payloads) { - payloadSet.add(new String(payload)); - } - } - } - assertEquals(2, payloadSet.size()); - assertTrue(payloadSet.contains("a:Noise:10")); - assertTrue(payloadSet.contains("k:Noise:11")); - is.close(); - reader.close(); - directory.close(); - } - - public void testShrinkToAfterShortestMatch3() throws CorruptIndexException, - LockObtainFailedException, IOException { - Directory directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer())); - - Document doc = new Document(); - doc.add(new Field("content", new StringReader("j k a l f k k p a t a k l k t a"))); - writer.addDocument(doc); - IndexReader reader = writer.getReader(); - IndexSearcher is = newSearcher(reader); - writer.close(); - - SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a")); - SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k")); - SpanQuery[] sqs = { stq1, stq2 }; - SpanNearQuery snq = new SpanNearQuery(sqs, 0, true); - Spans spans = snq.getSpans(is.getIndexReader()); - - TopDocs topDocs = is.search(snq, 1); - Set payloadSet = new HashSet(); - for (int i = 0; i < topDocs.scoreDocs.length; i++) { - while (spans.next()) { - Collection payloads = spans.getPayload(); - - for (final byte [] payload : payloads) { - payloadSet.add(new String(payload)); - } - } - } - assertEquals(2, payloadSet.size()); - if(VERBOSE) { - for (final String payload : payloadSet) - System.out.println("match:" + payload); - - } - assertTrue(payloadSet.contains("a:Noise:10")); - assertTrue(payloadSet.contains("k:Noise:11")); - is.close(); - reader.close(); - directory.close(); - } - - public void testPayloadSpanUtil() throws Exception { - Directory directory = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); - - Document doc = new Document(); - doc.add(newField(PayloadHelper.FIELD,"xx rr yy mm pp", Field.Store.YES, Field.Index.ANALYZED)); - writer.addDocument(doc); - - IndexReader reader = writer.getReader(); - writer.close(); - IndexSearcher searcher = newSearcher(reader); - - PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getIndexReader()); - - Collection payloads = psu.getPayloadsForQuery(new TermQuery(new Term(PayloadHelper.FIELD, "rr"))); - if(VERBOSE) - System.out.println("Num payloads:" + payloads.size()); - for (final byte [] bytes : payloads) { - if(VERBOSE) - System.out.println(new String(bytes)); - } - searcher.close(); - reader.close(); - directory.close(); - } - - private void checkSpans(Spans spans, int expectedNumSpans, int expectedNumPayloads, - int expectedPayloadLength, int expectedFirstByte) throws IOException { - assertTrue("spans is null and it shouldn't be", spans != null); - //each position match should have a span associated with it, since there is just one underlying term query, there should - //only be one entry in the span - int seen = 0; - while (spans.next() == true) - { - //if we expect payloads, then isPayloadAvailable should be true - if (expectedNumPayloads > 0) { - assertTrue("isPayloadAvailable is not returning the correct value: " + spans.isPayloadAvailable() - + " and it should be: " + (expectedNumPayloads > 0), - spans.isPayloadAvailable() == true); - } else { - assertTrue("isPayloadAvailable should be false", spans.isPayloadAvailable() == false); - } - //See payload helper, for the PayloadHelper.FIELD field, there is a single byte payload at every token - if (spans.isPayloadAvailable()) { - Collection payload = spans.getPayload(); - assertTrue("payload Size: " + payload.size() + " is not: " + expectedNumPayloads, payload.size() == expectedNumPayloads); - for (final byte [] thePayload : payload) { - assertTrue("payload[0] Size: " + thePayload.length + " is not: " + expectedPayloadLength, - thePayload.length == expectedPayloadLength); - assertTrue(thePayload[0] + " does not equal: " + expectedFirstByte, thePayload[0] == expectedFirstByte); - - } - - } - seen++; - } - assertTrue(seen + " does not equal: " + expectedNumSpans, seen == expectedNumSpans); - } - - private IndexSearcher getSearcher() throws Exception { - directory = newDirectory(); - String[] docs = new String[]{"xx rr yy mm pp","xx yy mm rr pp", "nopayload qq ss pp np", "one two three four five six seven eight nine ten eleven", "nine one two three four five six seven eight eleven ten"}; - RandomIndexWriter writer = new RandomIndexWriter(random, directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarity(similarity)); - - Document doc = null; - for(int i = 0; i < docs.length; i++) { - doc = new Document(); - String docText = docs[i]; - doc.add(newField(PayloadHelper.FIELD,docText, Field.Store.YES, Field.Index.ANALYZED)); - writer.addDocument(doc); - } - - closeIndexReader = writer.getReader(); - writer.close(); - - IndexSearcher searcher = newSearcher(closeIndexReader); - return searcher; - } - - private void checkSpans(Spans spans, int numSpans, int[] numPayloads) throws IOException { - int cnt = 0; - - while (spans.next() == true) { - if(VERBOSE) - System.out.println("\nSpans Dump --"); - if (spans.isPayloadAvailable()) { - Collection payload = spans.getPayload(); - if(VERBOSE) - System.out.println("payloads for span:" + payload.size()); - for (final byte [] bytes : payload) { - if(VERBOSE) - System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " " - + new String(bytes)); - } - - assertEquals(numPayloads[cnt],payload.size()); - } else { - assertFalse("Expected spans:" + numPayloads[cnt] + " found: 0",numPayloads.length > 0 && numPayloads[cnt] > 0 ); - } - cnt++; - } - - assertEquals(numSpans, cnt); - } - - final class PayloadAnalyzer extends Analyzer { - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader); - result = new PayloadFilter(result, fieldName); - return result; - } - } - - final class PayloadFilter extends TokenFilter { - String fieldName; - int numSeen = 0; - Set entities = new HashSet(); - Set nopayload = new HashSet(); - int pos; - PayloadAttribute payloadAtt; - CharTermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; - - public PayloadFilter(TokenStream input, String fieldName) { - super(input); - this.fieldName = fieldName; - pos = 0; - entities.add("xx"); - entities.add("one"); - nopayload.add("nopayload"); - nopayload.add("np"); - termAtt = addAttribute(CharTermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - payloadAtt = addAttribute(PayloadAttribute.class); - } - - @Override - public boolean incrementToken() throws IOException { - if (input.incrementToken()) { - String token = termAtt.toString(); - - if (!nopayload.contains(token)) { - if (entities.contains(token)) { - payloadAtt.setPayload(new Payload((token + ":Entity:"+ pos ).getBytes())); - } else { - payloadAtt.setPayload(new Payload((token + ":Noise:" + pos ).getBytes())); - } - } - pos += posIncrAtt.getPositionIncrement(); - return true; - } - return false; - } - } - - public final class TestPayloadAnalyzer extends Analyzer { - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader); - result = new PayloadFilter(result, fieldName); - return result; - } - } -}