X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java diff --git a/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java b/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java deleted file mode 100644 index 0f19ebf..0000000 --- a/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java +++ /dev/null @@ -1,438 +0,0 @@ -package org.apache.lucene.search.vectorhighlight; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.io.Reader; -import java.util.Collection; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.queryParser.QueryParser; -import org.apache.lucene.search.DisjunctionMaxQuery; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; - -public abstract class AbstractTestCase extends LuceneTestCase { - - protected final String F = "f"; - protected final String F1 = "f1"; - protected final String F2 = "f2"; - protected Directory dir; - protected Analyzer analyzerW; - protected Analyzer analyzerB; - protected Analyzer analyzerK; - protected IndexReader reader; - protected QueryParser paW; - protected QueryParser paB; - - protected static final String[] shortMVValues = { - "", - "", - "a b c", - "", // empty data in multi valued field - "d e" - }; - - protected static final String[] longMVValues = { - "Followings are the examples of customizable parameters and actual examples of customization:", - "The most search engines use only one of these methods. Even the search engines that says they can use the both methods basically" - }; - - // test data for LUCENE-1448 bug - protected static final String[] biMVValues = { - "\nLucene/Solr does not require such additional hardware.", - "\nWhen you talk about processing speed, the" - }; - - protected static final String[] strMVValues = { - "abc", - "defg", - "hijkl" - }; - - @Override - public void setUp() throws Exception { - super.setUp(); - analyzerW = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false); - analyzerB = new BigramAnalyzer(); - analyzerK = new MockAnalyzer(random, MockTokenizer.KEYWORD, false); - paW = new QueryParser(TEST_VERSION_CURRENT, F, analyzerW ); - paB = new QueryParser(TEST_VERSION_CURRENT, F, analyzerB ); - dir = newDirectory(); - } - - @Override - public void tearDown() throws Exception { - if( reader != null ){ - reader.close(); - reader = null; - } - dir.close(); - super.tearDown(); - } - - protected Query tq( String text ){ - return tq( 1F, text ); - } - - protected Query tq( float boost, String text ){ - return tq( boost, F, text ); - } - - protected Query tq( String field, String text ){ - return tq( 1F, field, text ); - } - - protected Query tq( float boost, String field, String text ){ - Query query = new TermQuery( new Term( field, text ) ); - query.setBoost( boost ); - return query; - } - - protected Query pqF( String... texts ){ - return pqF( 1F, texts ); - } - - protected Query pqF( float boost, String... texts ){ - return pqF( boost, 0, texts ); - } - - protected Query pqF( float boost, int slop, String... texts ){ - return pq( boost, slop, F, texts ); - } - - protected Query pq( String field, String... texts ){ - return pq( 1F, 0, field, texts ); - } - - protected Query pq( float boost, String field, String... texts ){ - return pq( boost, 0, field, texts ); - } - - protected Query pq( float boost, int slop, String field, String... texts ){ - PhraseQuery query = new PhraseQuery(); - for( String text : texts ){ - query.add( new Term( field, text ) ); - } - query.setBoost( boost ); - query.setSlop( slop ); - return query; - } - - protected Query dmq( Query... queries ){ - return dmq( 0.0F, queries ); - } - - protected Query dmq( float tieBreakerMultiplier, Query... queries ){ - DisjunctionMaxQuery query = new DisjunctionMaxQuery( tieBreakerMultiplier ); - for( Query q : queries ){ - query.add( q ); - } - return query; - } - - protected void assertCollectionQueries( Collection actual, Query... expected ){ - assertEquals( expected.length, actual.size() ); - for( Query query : expected ){ - assertTrue( actual.contains( query ) ); - } - } - - static final class BigramAnalyzer extends Analyzer { - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new BasicNGramTokenizer( reader ); - } - } - - static final class BasicNGramTokenizer extends Tokenizer { - - public static final int DEFAULT_N_SIZE = 2; - public static final String DEFAULT_DELIMITERS = " \t\n.,"; - private final int n; - private final String delimiters; - private int startTerm; - private int lenTerm; - private int startOffset; - private int nextStartOffset; - private int ch; - private String snippet; - private StringBuilder snippetBuffer; - private static final int BUFFER_SIZE = 4096; - private char[] charBuffer; - private int charBufferIndex; - private int charBufferLen; - - public BasicNGramTokenizer( Reader in ){ - this( in, DEFAULT_N_SIZE ); - } - - public BasicNGramTokenizer( Reader in, int n ){ - this( in, n, DEFAULT_DELIMITERS ); - } - - public BasicNGramTokenizer( Reader in, String delimiters ){ - this( in, DEFAULT_N_SIZE, delimiters ); - } - - public BasicNGramTokenizer( Reader in, int n, String delimiters ){ - super(in); - this.n = n; - this.delimiters = delimiters; - startTerm = 0; - nextStartOffset = 0; - snippet = null; - snippetBuffer = new StringBuilder(); - charBuffer = new char[BUFFER_SIZE]; - charBufferIndex = BUFFER_SIZE; - charBufferLen = 0; - ch = 0; - } - - CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - @Override - public boolean incrementToken() throws IOException { - if( !getNextPartialSnippet() ) - return false; - clearAttributes(); - termAtt.setEmpty().append(snippet, startTerm, startTerm + lenTerm); - offsetAtt.setOffset(correctOffset(startOffset), correctOffset(startOffset + lenTerm)); - return true; - } - - private int getFinalOffset() { - return nextStartOffset; - } - - @Override - public final void end(){ - offsetAtt.setOffset(getFinalOffset(),getFinalOffset()); - } - - protected boolean getNextPartialSnippet() throws IOException { - if( snippet != null && snippet.length() >= startTerm + 1 + n ){ - startTerm++; - startOffset++; - lenTerm = n; - return true; - } - return getNextSnippet(); - } - - protected boolean getNextSnippet() throws IOException { - startTerm = 0; - startOffset = nextStartOffset; - snippetBuffer.delete( 0, snippetBuffer.length() ); - while( true ){ - if( ch != -1 ) - ch = readCharFromBuffer(); - if( ch == -1 ) break; - else if( !isDelimiter( ch ) ) - snippetBuffer.append( (char)ch ); - else if( snippetBuffer.length() > 0 ) - break; - else - startOffset++; - } - if( snippetBuffer.length() == 0 ) - return false; - snippet = snippetBuffer.toString(); - lenTerm = snippet.length() >= n ? n : snippet.length(); - return true; - } - - protected int readCharFromBuffer() throws IOException { - if( charBufferIndex >= charBufferLen ){ - charBufferLen = input.read( charBuffer ); - if( charBufferLen == -1 ){ - return -1; - } - charBufferIndex = 0; - } - int c = charBuffer[charBufferIndex++]; - nextStartOffset++; - return c; - } - - protected boolean isDelimiter( int c ){ - return delimiters.indexOf( c ) >= 0; - } - - @Override - public void reset( Reader input ) throws IOException { - super.reset( input ); - reset(); - } - - @Override - public void reset() throws IOException { - startTerm = 0; - nextStartOffset = 0; - snippet = null; - snippetBuffer.setLength( 0 ); - charBufferIndex = BUFFER_SIZE; - charBufferLen = 0; - ch = 0; - } - } - - protected void make1d1fIndex( String value ) throws Exception { - make1dmfIndex( value ); - } - - protected void make1d1fIndexB( String value ) throws Exception { - make1dmfIndexB( value ); - } - - protected void make1dmfIndex( String... values ) throws Exception { - make1dmfIndex( analyzerW, values ); - } - - protected void make1dmfIndexB( String... values ) throws Exception { - make1dmfIndex( analyzerB, values ); - } - - // make 1 doc with multi valued field - protected void make1dmfIndex( Analyzer analyzer, String... values ) throws Exception { - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE)); - Document doc = new Document(); - for( String value: values ) - doc.add( new Field( F, value, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) ); - writer.addDocument( doc ); - writer.close(); - if (reader != null) reader.close(); - reader = IndexReader.open( dir, true ); - } - - // make 1 doc with multi valued & not analyzed field - protected void make1dmfIndexNA( String... values ) throws Exception { - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, analyzerK).setOpenMode(OpenMode.CREATE)); - Document doc = new Document(); - for( String value: values ) - doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) ); - writer.addDocument( doc ); - writer.close(); - if (reader != null) reader.close(); - reader = IndexReader.open( dir, true ); - } - - protected void makeIndexShortMV() throws Exception { - - // 0 - // "" - // 1 - // "" - - // 234567 - // "a b c" - // 0 1 2 - - // 8 - // "" - - // 111 - // 9012 - // "d e" - // 3 4 - make1dmfIndex( shortMVValues ); - } - - protected void makeIndexLongMV() throws Exception { - // 11111111112222222222333333333344444444445555555555666666666677777777778888888888999 - // 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 - // Followings are the examples of customizable parameters and actual examples of customization: - // 0 1 2 3 4 5 6 7 8 9 10 11 - - // 1 2 - // 999999900000000001111111111222222222233333333334444444444555555555566666666667777777777888888888899999999990000000000111111111122 - // 345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901 - // The most search engines use only one of these methods. Even the search engines that says they can use the both methods basically - // 12 13 (14) (15) 16 17 18 19 20 21 22 23 (24) (25) 26 27 28 29 30 31 32 33 34 - - make1dmfIndex( longMVValues ); - } - - protected void makeIndexLongMVB() throws Exception { - // "*" ... LF - - // 1111111111222222222233333333334444444444555555 - // 01234567890123456789012345678901234567890123456789012345 - // *Lucene/Solr does not require such additional hardware. - // Lu 0 do 10 re 15 su 21 na 31 - // uc 1 oe 11 eq 16 uc 22 al 32 - // ce 2 es 12 qu 17 ch 23 ha 33 - // en 3 no 13 ui 18 ad 24 ar 34 - // ne 4 ot 14 ir 19 dd 25 rd 35 - // e/ 5 re 20 di 26 dw 36 - // /S 6 it 27 wa 37 - // So 7 ti 28 ar 38 - // ol 8 io 29 re 39 - // lr 9 on 30 - - // 5555666666666677777777778888888888999999999 - // 6789012345678901234567890123456789012345678 - // *When you talk about processing speed, the - // Wh 40 ab 48 es 56 th 65 - // he 41 bo 49 ss 57 he 66 - // en 42 ou 50 si 58 - // yo 43 ut 51 in 59 - // ou 44 pr 52 ng 60 - // ta 45 ro 53 sp 61 - // al 46 oc 54 pe 62 - // lk 47 ce 55 ee 63 - // ed 64 - - make1dmfIndexB( biMVValues ); - } - - protected void makeIndexStrMV() throws Exception { - - // 0123 - // "abc" - - // 34567 - // "defg" - - // 111 - // 789012 - // "hijkl" - make1dmfIndexNA( strMVValues ); - } -}