X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java?ds=sidebyside diff --git a/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java b/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java deleted file mode 100644 index 805283c..0000000 --- a/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java +++ /dev/null @@ -1,377 +0,0 @@ -package org.apache.lucene.search.highlight; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermPositionVector; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TopDocs; - -import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.LockObtainFailedException; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.FixedBitSet; - -public class HighlighterPhraseTest extends LuceneTestCase { - private static final String FIELD = "text"; - public void testConcurrentPhrase() throws CorruptIndexException, - LockObtainFailedException, IOException, InvalidTokenOffsetsException { - final String TEXT = "the fox jumped"; - final Directory directory = newDirectory(); - final IndexWriter indexWriter = new IndexWriter(directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); - try { - final Document document = new Document(); - document.add(new Field(FIELD, new TokenStreamConcurrent(), - TermVector.WITH_POSITIONS_OFFSETS)); - indexWriter.addDocument(document); - } finally { - indexWriter.close(); - } - final IndexReader indexReader = IndexReader.open(directory, true); - try { - assertEquals(1, indexReader.numDocs()); - final IndexSearcher indexSearcher = newSearcher(indexReader); - try { - final PhraseQuery phraseQuery = new PhraseQuery(); - phraseQuery.add(new Term(FIELD, "fox")); - phraseQuery.add(new Term(FIELD, "jumped")); - phraseQuery.setSlop(0); - TopDocs hits = indexSearcher.search(phraseQuery, 1); - assertEquals(1, hits.totalHits); - final Highlighter highlighter = new Highlighter( - new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), - new QueryScorer(phraseQuery)); - - final TokenStream tokenStream = TokenSources - .getTokenStream((TermPositionVector) indexReader.getTermFreqVector( - 0, FIELD), false); - assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(), - TEXT), highlighter.getBestFragment(tokenStream, TEXT)); - - } finally { - indexSearcher.close(); - } - } finally { - indexReader.close(); - directory.close(); - } - } - - public void testConcurrentSpan() throws CorruptIndexException, - LockObtainFailedException, IOException, InvalidTokenOffsetsException { - final String TEXT = "the fox jumped"; - final Directory directory = newDirectory(); - final IndexWriter indexWriter = new IndexWriter(directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); - try { - final Document document = new Document(); - document.add(new Field(FIELD, new TokenStreamConcurrent(), - TermVector.WITH_POSITIONS_OFFSETS)); - indexWriter.addDocument(document); - } finally { - indexWriter.close(); - } - final IndexReader indexReader = IndexReader.open(directory, true); - try { - assertEquals(1, indexReader.numDocs()); - final IndexSearcher indexSearcher = newSearcher(indexReader); - try { - final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { - new SpanTermQuery(new Term(FIELD, "fox")), - new SpanTermQuery(new Term(FIELD, "jumped")) }, 0, true); - final FixedBitSet bitset = new FixedBitSet(indexReader.maxDoc()); - indexSearcher.search(phraseQuery, new Collector() { - private int baseDoc; - - @Override - public boolean acceptsDocsOutOfOrder() { - return true; - } - - @Override - public void collect(int i) throws IOException { - bitset.set(this.baseDoc + i); - } - - @Override - public void setNextReader(IndexReader indexreader, int i) - throws IOException { - this.baseDoc = i; - } - - @Override - public void setScorer(org.apache.lucene.search.Scorer scorer) - throws IOException { - // Do Nothing - } - }); - assertEquals(1, bitset.cardinality()); - final int maxDoc = indexReader.maxDoc(); - final Highlighter highlighter = new Highlighter( - new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), - new QueryScorer(phraseQuery)); - for (int position = bitset.nextSetBit(0); position >= 0 && position < maxDoc-1; position = bitset - .nextSetBit(position + 1)) { - assertEquals(0, position); - final TokenStream tokenStream = TokenSources.getTokenStream( - (TermPositionVector) indexReader.getTermFreqVector(position, - FIELD), false); - assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(), - TEXT), highlighter.getBestFragment(tokenStream, TEXT)); - } - } finally { - indexSearcher.close(); - } - } finally { - indexReader.close(); - directory.close(); - } - } - - public void testSparsePhrase() throws CorruptIndexException, - LockObtainFailedException, IOException, InvalidTokenOffsetsException { - final String TEXT = "the fox did not jump"; - final Directory directory = newDirectory(); - final IndexWriter indexWriter = new IndexWriter(directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); - try { - final Document document = new Document(); - document.add(new Field(FIELD, new TokenStreamSparse(), - TermVector.WITH_POSITIONS_OFFSETS)); - indexWriter.addDocument(document); - } finally { - indexWriter.close(); - } - final IndexReader indexReader = IndexReader.open(directory, true); - try { - assertEquals(1, indexReader.numDocs()); - final IndexSearcher indexSearcher = newSearcher(indexReader); - try { - final PhraseQuery phraseQuery = new PhraseQuery(); - phraseQuery.add(new Term(FIELD, "did")); - phraseQuery.add(new Term(FIELD, "jump")); - phraseQuery.setSlop(0); - TopDocs hits = indexSearcher.search(phraseQuery, 1); - assertEquals(0, hits.totalHits); - final Highlighter highlighter = new Highlighter( - new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), - new QueryScorer(phraseQuery)); - final TokenStream tokenStream = TokenSources - .getTokenStream((TermPositionVector) indexReader.getTermFreqVector( - 0, FIELD), false); - assertEquals( - highlighter.getBestFragment(new TokenStreamSparse(), TEXT), - highlighter.getBestFragment(tokenStream, TEXT)); - } finally { - indexSearcher.close(); - } - } finally { - indexReader.close(); - directory.close(); - } - } - - public void testSparsePhraseWithNoPositions() throws CorruptIndexException, - LockObtainFailedException, IOException, InvalidTokenOffsetsException { - final String TEXT = "the fox did not jump"; - final Directory directory = newDirectory(); - final IndexWriter indexWriter = new IndexWriter(directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); - try { - final Document document = new Document(); - document.add(new Field(FIELD, TEXT, Store.YES, Index.ANALYZED, - TermVector.WITH_OFFSETS)); - indexWriter.addDocument(document); - } finally { - indexWriter.close(); - } - final IndexReader indexReader = IndexReader.open(directory, true); - try { - assertEquals(1, indexReader.numDocs()); - final IndexSearcher indexSearcher = newSearcher(indexReader); - try { - final PhraseQuery phraseQuery = new PhraseQuery(); - phraseQuery.add(new Term(FIELD, "did")); - phraseQuery.add(new Term(FIELD, "jump")); - phraseQuery.setSlop(1); - TopDocs hits = indexSearcher.search(phraseQuery, 1); - assertEquals(1, hits.totalHits); - final Highlighter highlighter = new Highlighter( - new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), - new QueryScorer(phraseQuery)); - final TokenStream tokenStream = TokenSources.getTokenStream( - (TermPositionVector) indexReader.getTermFreqVector(0, FIELD), true); - assertEquals("the fox did not jump", highlighter - .getBestFragment(tokenStream, TEXT)); - } finally { - indexSearcher.close(); - } - } finally { - indexReader.close(); - directory.close(); - } - } - - public void testSparseSpan() throws CorruptIndexException, - LockObtainFailedException, IOException, InvalidTokenOffsetsException { - final String TEXT = "the fox did not jump"; - final Directory directory = newDirectory(); - final IndexWriter indexWriter = new IndexWriter(directory, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); - try { - final Document document = new Document(); - document.add(new Field(FIELD, new TokenStreamSparse(), - TermVector.WITH_POSITIONS_OFFSETS)); - indexWriter.addDocument(document); - } finally { - indexWriter.close(); - } - final IndexReader indexReader = IndexReader.open(directory, true); - try { - assertEquals(1, indexReader.numDocs()); - final IndexSearcher indexSearcher = newSearcher(indexReader); - try { - final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { - new SpanTermQuery(new Term(FIELD, "did")), - new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true); - - TopDocs hits = indexSearcher.search(phraseQuery, 1); - assertEquals(0, hits.totalHits); - final Highlighter highlighter = new Highlighter( - new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), - new QueryScorer(phraseQuery)); - final TokenStream tokenStream = TokenSources - .getTokenStream((TermPositionVector) indexReader.getTermFreqVector( - 0, FIELD), false); - assertEquals( - highlighter.getBestFragment(new TokenStreamSparse(), TEXT), - highlighter.getBestFragment(tokenStream, TEXT)); - } finally { - indexSearcher.close(); - } - } finally { - indexReader.close(); - directory.close(); - } - } - - private static final class TokenStreamSparse extends TokenStream { - private Token[] tokens; - - private int i = -1; - - private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); - private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); - private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); - - public TokenStreamSparse() { - reset(); - } - - @Override - public boolean incrementToken() throws IOException { - this.i++; - if (this.i >= this.tokens.length) { - return false; - } - clearAttributes(); - termAttribute.setEmpty().append(this.tokens[i]); - offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i] - .endOffset()); - positionIncrementAttribute.setPositionIncrement(this.tokens[i] - .getPositionIncrement()); - return true; - } - - @Override - public void reset() { - this.i = -1; - this.tokens = new Token[] { - new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3), - new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7), - new Token(new char[] { 'd', 'i', 'd' }, 0, 3, 8, 11), - new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 16, 20) }; - this.tokens[3].setPositionIncrement(2); - } - } - - private static final class TokenStreamConcurrent extends TokenStream { - private Token[] tokens; - - private int i = -1; - - private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); - private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); - private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); - - public TokenStreamConcurrent() { - reset(); - } - - @Override - public boolean incrementToken() throws IOException { - this.i++; - if (this.i >= this.tokens.length) { - return false; - } - clearAttributes(); - termAttribute.setEmpty().append(this.tokens[i]); - offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i] - .endOffset()); - positionIncrementAttribute.setPositionIncrement(this.tokens[i] - .getPositionIncrement()); - return true; - } - - @Override - public void reset() { - this.i = -1; - this.tokens = new Token[] { - new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3), - new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7), - new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 8, 14), - new Token(new char[] { 'j', 'u', 'm', 'p', 'e', 'd' }, 0, 6, 8, 14) }; - this.tokens[3].setPositionIncrement(0); - } - } - -}