X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java?ds=inline
diff --git a/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java b/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
deleted file mode 100644
index 805283c..0000000
--- a/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
+++ /dev/null
@@ -1,377 +0,0 @@
-package org.apache.lucene.search.highlight;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Field.Index;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.Field.TermVector;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermPositionVector;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TopDocs;
-
-import org.apache.lucene.search.spans.SpanNearQuery;
-import org.apache.lucene.search.spans.SpanQuery;
-import org.apache.lucene.search.spans.SpanTermQuery;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.FixedBitSet;
-
-public class HighlighterPhraseTest extends LuceneTestCase {
- private static final String FIELD = "text";
- public void testConcurrentPhrase() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
- final String TEXT = "the fox jumped";
- final Directory directory = newDirectory();
- final IndexWriter indexWriter = new IndexWriter(directory,
- newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
- try {
- final Document document = new Document();
- document.add(new Field(FIELD, new TokenStreamConcurrent(),
- TermVector.WITH_POSITIONS_OFFSETS));
- indexWriter.addDocument(document);
- } finally {
- indexWriter.close();
- }
- final IndexReader indexReader = IndexReader.open(directory, true);
- try {
- assertEquals(1, indexReader.numDocs());
- final IndexSearcher indexSearcher = newSearcher(indexReader);
- try {
- final PhraseQuery phraseQuery = new PhraseQuery();
- phraseQuery.add(new Term(FIELD, "fox"));
- phraseQuery.add(new Term(FIELD, "jumped"));
- phraseQuery.setSlop(0);
- TopDocs hits = indexSearcher.search(phraseQuery, 1);
- assertEquals(1, hits.totalHits);
- final Highlighter highlighter = new Highlighter(
- new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
- new QueryScorer(phraseQuery));
-
- final TokenStream tokenStream = TokenSources
- .getTokenStream((TermPositionVector) indexReader.getTermFreqVector(
- 0, FIELD), false);
- assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(),
- TEXT), highlighter.getBestFragment(tokenStream, TEXT));
-
- } finally {
- indexSearcher.close();
- }
- } finally {
- indexReader.close();
- directory.close();
- }
- }
-
- public void testConcurrentSpan() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
- final String TEXT = "the fox jumped";
- final Directory directory = newDirectory();
- final IndexWriter indexWriter = new IndexWriter(directory,
- newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
- try {
- final Document document = new Document();
- document.add(new Field(FIELD, new TokenStreamConcurrent(),
- TermVector.WITH_POSITIONS_OFFSETS));
- indexWriter.addDocument(document);
- } finally {
- indexWriter.close();
- }
- final IndexReader indexReader = IndexReader.open(directory, true);
- try {
- assertEquals(1, indexReader.numDocs());
- final IndexSearcher indexSearcher = newSearcher(indexReader);
- try {
- final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
- new SpanTermQuery(new Term(FIELD, "fox")),
- new SpanTermQuery(new Term(FIELD, "jumped")) }, 0, true);
- final FixedBitSet bitset = new FixedBitSet(indexReader.maxDoc());
- indexSearcher.search(phraseQuery, new Collector() {
- private int baseDoc;
-
- @Override
- public boolean acceptsDocsOutOfOrder() {
- return true;
- }
-
- @Override
- public void collect(int i) throws IOException {
- bitset.set(this.baseDoc + i);
- }
-
- @Override
- public void setNextReader(IndexReader indexreader, int i)
- throws IOException {
- this.baseDoc = i;
- }
-
- @Override
- public void setScorer(org.apache.lucene.search.Scorer scorer)
- throws IOException {
- // Do Nothing
- }
- });
- assertEquals(1, bitset.cardinality());
- final int maxDoc = indexReader.maxDoc();
- final Highlighter highlighter = new Highlighter(
- new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
- new QueryScorer(phraseQuery));
- for (int position = bitset.nextSetBit(0); position >= 0 && position < maxDoc-1; position = bitset
- .nextSetBit(position + 1)) {
- assertEquals(0, position);
- final TokenStream tokenStream = TokenSources.getTokenStream(
- (TermPositionVector) indexReader.getTermFreqVector(position,
- FIELD), false);
- assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(),
- TEXT), highlighter.getBestFragment(tokenStream, TEXT));
- }
- } finally {
- indexSearcher.close();
- }
- } finally {
- indexReader.close();
- directory.close();
- }
- }
-
- public void testSparsePhrase() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
- final String TEXT = "the fox did not jump";
- final Directory directory = newDirectory();
- final IndexWriter indexWriter = new IndexWriter(directory,
- newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
- try {
- final Document document = new Document();
- document.add(new Field(FIELD, new TokenStreamSparse(),
- TermVector.WITH_POSITIONS_OFFSETS));
- indexWriter.addDocument(document);
- } finally {
- indexWriter.close();
- }
- final IndexReader indexReader = IndexReader.open(directory, true);
- try {
- assertEquals(1, indexReader.numDocs());
- final IndexSearcher indexSearcher = newSearcher(indexReader);
- try {
- final PhraseQuery phraseQuery = new PhraseQuery();
- phraseQuery.add(new Term(FIELD, "did"));
- phraseQuery.add(new Term(FIELD, "jump"));
- phraseQuery.setSlop(0);
- TopDocs hits = indexSearcher.search(phraseQuery, 1);
- assertEquals(0, hits.totalHits);
- final Highlighter highlighter = new Highlighter(
- new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
- new QueryScorer(phraseQuery));
- final TokenStream tokenStream = TokenSources
- .getTokenStream((TermPositionVector) indexReader.getTermFreqVector(
- 0, FIELD), false);
- assertEquals(
- highlighter.getBestFragment(new TokenStreamSparse(), TEXT),
- highlighter.getBestFragment(tokenStream, TEXT));
- } finally {
- indexSearcher.close();
- }
- } finally {
- indexReader.close();
- directory.close();
- }
- }
-
- public void testSparsePhraseWithNoPositions() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
- final String TEXT = "the fox did not jump";
- final Directory directory = newDirectory();
- final IndexWriter indexWriter = new IndexWriter(directory,
- newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
- try {
- final Document document = new Document();
- document.add(new Field(FIELD, TEXT, Store.YES, Index.ANALYZED,
- TermVector.WITH_OFFSETS));
- indexWriter.addDocument(document);
- } finally {
- indexWriter.close();
- }
- final IndexReader indexReader = IndexReader.open(directory, true);
- try {
- assertEquals(1, indexReader.numDocs());
- final IndexSearcher indexSearcher = newSearcher(indexReader);
- try {
- final PhraseQuery phraseQuery = new PhraseQuery();
- phraseQuery.add(new Term(FIELD, "did"));
- phraseQuery.add(new Term(FIELD, "jump"));
- phraseQuery.setSlop(1);
- TopDocs hits = indexSearcher.search(phraseQuery, 1);
- assertEquals(1, hits.totalHits);
- final Highlighter highlighter = new Highlighter(
- new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
- new QueryScorer(phraseQuery));
- final TokenStream tokenStream = TokenSources.getTokenStream(
- (TermPositionVector) indexReader.getTermFreqVector(0, FIELD), true);
- assertEquals("the fox did not jump", highlighter
- .getBestFragment(tokenStream, TEXT));
- } finally {
- indexSearcher.close();
- }
- } finally {
- indexReader.close();
- directory.close();
- }
- }
-
- public void testSparseSpan() throws CorruptIndexException,
- LockObtainFailedException, IOException, InvalidTokenOffsetsException {
- final String TEXT = "the fox did not jump";
- final Directory directory = newDirectory();
- final IndexWriter indexWriter = new IndexWriter(directory,
- newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
- try {
- final Document document = new Document();
- document.add(new Field(FIELD, new TokenStreamSparse(),
- TermVector.WITH_POSITIONS_OFFSETS));
- indexWriter.addDocument(document);
- } finally {
- indexWriter.close();
- }
- final IndexReader indexReader = IndexReader.open(directory, true);
- try {
- assertEquals(1, indexReader.numDocs());
- final IndexSearcher indexSearcher = newSearcher(indexReader);
- try {
- final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
- new SpanTermQuery(new Term(FIELD, "did")),
- new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true);
-
- TopDocs hits = indexSearcher.search(phraseQuery, 1);
- assertEquals(0, hits.totalHits);
- final Highlighter highlighter = new Highlighter(
- new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
- new QueryScorer(phraseQuery));
- final TokenStream tokenStream = TokenSources
- .getTokenStream((TermPositionVector) indexReader.getTermFreqVector(
- 0, FIELD), false);
- assertEquals(
- highlighter.getBestFragment(new TokenStreamSparse(), TEXT),
- highlighter.getBestFragment(tokenStream, TEXT));
- } finally {
- indexSearcher.close();
- }
- } finally {
- indexReader.close();
- directory.close();
- }
- }
-
- private static final class TokenStreamSparse extends TokenStream {
- private Token[] tokens;
-
- private int i = -1;
-
- private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
- private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
- private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
-
- public TokenStreamSparse() {
- reset();
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- this.i++;
- if (this.i >= this.tokens.length) {
- return false;
- }
- clearAttributes();
- termAttribute.setEmpty().append(this.tokens[i]);
- offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i]
- .endOffset());
- positionIncrementAttribute.setPositionIncrement(this.tokens[i]
- .getPositionIncrement());
- return true;
- }
-
- @Override
- public void reset() {
- this.i = -1;
- this.tokens = new Token[] {
- new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3),
- new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7),
- new Token(new char[] { 'd', 'i', 'd' }, 0, 3, 8, 11),
- new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 16, 20) };
- this.tokens[3].setPositionIncrement(2);
- }
- }
-
- private static final class TokenStreamConcurrent extends TokenStream {
- private Token[] tokens;
-
- private int i = -1;
-
- private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
- private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
- private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
-
- public TokenStreamConcurrent() {
- reset();
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- this.i++;
- if (this.i >= this.tokens.length) {
- return false;
- }
- clearAttributes();
- termAttribute.setEmpty().append(this.tokens[i]);
- offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i]
- .endOffset());
- positionIncrementAttribute.setPositionIncrement(this.tokens[i]
- .getPositionIncrement());
- return true;
- }
-
- @Override
- public void reset() {
- this.i = -1;
- this.tokens = new Token[] {
- new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3),
- new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7),
- new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 8, 14),
- new Token(new char[] { 'j', 'u', 'm', 'p', 'e', 'd' }, 0, 6, 8, 14) };
- this.tokens[3].setPositionIncrement(0);
- }
- }
-
-}