pylucene 3.5.0-3

[pylucene.git] / lucene-java-3.4.0 / lucene / contrib / highlighter / src / test / org / apache / lucene / search / vectorhighlight / AbstractTestCase.java
diff --git a/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java b/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java

deleted file mode 100644 (file)

index 0f19ebf..0000000
--- a/lucene-java-3.4.0/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
+++ /dev/null
@@ -1,438 +0,0 @@
-package org.apache.lucene.search.vectorhighlight;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.io.Reader;
-import java.util.Collection;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Field.Index;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.Field.TermVector;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.DisjunctionMaxQuery;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
-
-public abstract class AbstractTestCase extends LuceneTestCase {
-
-  protected final String F = "f";
-  protected final String F1 = "f1";
-  protected final String F2 = "f2";
-  protected Directory dir;
-  protected Analyzer analyzerW;
-  protected Analyzer analyzerB;
-  protected Analyzer analyzerK;
-  protected IndexReader reader;  
-  protected QueryParser paW;
-  protected QueryParser paB;
-  
-  protected static final String[] shortMVValues = {
-    "",
-    "",
-    "a b c",
-    "",   // empty data in multi valued field
-    "d e"
-  };
-  
-  protected static final String[] longMVValues = {
-    "Followings are the examples of customizable parameters and actual examples of customization:",
-    "The most search engines use only one of these methods. Even the search engines that says they can use the both methods basically"
-  };
-  
-  // test data for LUCENE-1448 bug
-  protected static final String[] biMVValues = {
-    "\nLucene/Solr does not require such additional hardware.",
-    "\nWhen you talk about processing speed, the"
-  };
-  
-  protected static final String[] strMVValues = {
-    "abc",
-    "defg",
-    "hijkl"
-  };
-
-  @Override
-  public void setUp() throws Exception {
-    super.setUp();
-    analyzerW = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
-    analyzerB = new BigramAnalyzer();
-    analyzerK = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
-    paW = new QueryParser(TEST_VERSION_CURRENT,  F, analyzerW );
-    paB = new QueryParser(TEST_VERSION_CURRENT,  F, analyzerB );
-    dir = newDirectory();
-  }
-  
-  @Override
-  public void tearDown() throws Exception {
-    if( reader != null ){
-      reader.close();
-      reader = null;
-    }
-    dir.close();
-    super.tearDown();
-  }
-
-  protected Query tq( String text ){
-    return tq( 1F, text );
-  }
-
-  protected Query tq( float boost, String text ){
-    return tq( boost, F, text );
-  }
-  
-  protected Query tq( String field, String text ){
-    return tq( 1F, field, text );
-  }
-  
-  protected Query tq( float boost, String field, String text ){
-    Query query = new TermQuery( new Term( field, text ) );
-    query.setBoost( boost );
-    return query;
-  }
-  
-  protected Query pqF( String... texts ){
-    return pqF( 1F, texts );
-  }
-  
-  protected Query pqF( float boost, String... texts ){
-    return pqF( boost, 0, texts );
-  }
-  
-  protected Query pqF( float boost, int slop, String... texts ){
-    return pq( boost, slop, F, texts );
-  }
-  
-  protected Query pq( String field, String... texts ){
-    return pq( 1F, 0, field, texts );
-  }
-  
-  protected Query pq( float boost, String field, String... texts ){
-    return pq( boost, 0, field, texts );
-  }
-  
-  protected Query pq( float boost, int slop, String field, String... texts ){
-    PhraseQuery query = new PhraseQuery();
-    for( String text : texts ){
-      query.add( new Term( field, text ) );
-    }
-    query.setBoost( boost );
-    query.setSlop( slop );
-    return query;
-  }
-  
-  protected Query dmq( Query... queries ){
-    return dmq( 0.0F, queries );
-  }
-  
-  protected Query dmq( float tieBreakerMultiplier, Query... queries ){
-    DisjunctionMaxQuery query = new DisjunctionMaxQuery( tieBreakerMultiplier );
-    for( Query q : queries ){
-      query.add( q );
-    }
-    return query;
-  }
-  
-  protected void assertCollectionQueries( Collection<Query> actual, Query... expected ){
-    assertEquals( expected.length, actual.size() );
-    for( Query query : expected ){
-      assertTrue( actual.contains( query ) );
-    }
-  }
-
-  static final class BigramAnalyzer extends Analyzer {
-    @Override
-    public TokenStream tokenStream(String fieldName, Reader reader) {
-      return new BasicNGramTokenizer( reader );
-    }
-  }
-  
-  static final class BasicNGramTokenizer extends Tokenizer {
-
-    public static final int DEFAULT_N_SIZE = 2;
-    public static final String DEFAULT_DELIMITERS = " \t\n.,";
-    private final int n;
-    private final String delimiters;
-    private int startTerm;
-    private int lenTerm;
-    private int startOffset;
-    private int nextStartOffset;
-    private int ch;
-    private String snippet;
-    private StringBuilder snippetBuffer;
-    private static final int BUFFER_SIZE = 4096;
-    private char[] charBuffer;
-    private int charBufferIndex;
-    private int charBufferLen;
-    
-    public BasicNGramTokenizer( Reader in ){
-      this( in, DEFAULT_N_SIZE );
-    }
-    
-    public BasicNGramTokenizer( Reader in, int n ){
-      this( in, n, DEFAULT_DELIMITERS );
-    }
-    
-    public BasicNGramTokenizer( Reader in, String delimiters ){
-      this( in, DEFAULT_N_SIZE, delimiters );
-    }
-    
-    public BasicNGramTokenizer( Reader in, int n, String delimiters ){
-      super(in);
-      this.n = n;
-      this.delimiters = delimiters;
-      startTerm = 0;
-      nextStartOffset = 0;
-      snippet = null;
-      snippetBuffer = new StringBuilder();
-      charBuffer = new char[BUFFER_SIZE];
-      charBufferIndex = BUFFER_SIZE;
-      charBufferLen = 0;
-      ch = 0;
-    }
-
-    CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-    OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-    @Override
-    public boolean incrementToken() throws IOException {
-      if( !getNextPartialSnippet() )
-        return false;
-      clearAttributes();
-      termAtt.setEmpty().append(snippet, startTerm, startTerm + lenTerm);
-      offsetAtt.setOffset(correctOffset(startOffset), correctOffset(startOffset + lenTerm));
-      return true;
-    }
-
-    private int getFinalOffset() {
-      return nextStartOffset;
-    }
-    
-    @Override
-    public final void end(){
-      offsetAtt.setOffset(getFinalOffset(),getFinalOffset());
-    }
-    
-    protected boolean getNextPartialSnippet() throws IOException {
-      if( snippet != null && snippet.length() >= startTerm + 1 + n ){
-        startTerm++;
-        startOffset++;
-        lenTerm = n;
-        return true;
-      }
-      return getNextSnippet();
-    }
-    
-    protected boolean getNextSnippet() throws IOException {
-      startTerm = 0;
-      startOffset = nextStartOffset;
-      snippetBuffer.delete( 0, snippetBuffer.length() );
-      while( true ){
-        if( ch != -1 )
-          ch = readCharFromBuffer();
-        if( ch == -1 ) break;
-        else if( !isDelimiter( ch ) )
-          snippetBuffer.append( (char)ch );
-        else if( snippetBuffer.length() > 0 )
-          break;
-        else
-          startOffset++;
-      }
-      if( snippetBuffer.length() == 0 )
-        return false;
-      snippet = snippetBuffer.toString();
-      lenTerm = snippet.length() >= n ? n : snippet.length();
-      return true;
-    }
-    
-    protected int readCharFromBuffer() throws IOException {
-      if( charBufferIndex >= charBufferLen ){
-        charBufferLen = input.read( charBuffer );
-        if( charBufferLen == -1 ){
-          return -1;
-        }
-        charBufferIndex = 0;
-      }
-      int c = charBuffer[charBufferIndex++];
-      nextStartOffset++;
-      return c;
-    }
-    
-    protected boolean isDelimiter( int c ){
-      return delimiters.indexOf( c ) >= 0;
-    }
-    
-    @Override
-    public void reset( Reader input ) throws IOException {
-      super.reset( input );
-      reset();
-    }
-    
-    @Override
-    public void reset() throws IOException {
-      startTerm = 0;
-      nextStartOffset = 0;
-      snippet = null;
-      snippetBuffer.setLength( 0 );
-      charBufferIndex = BUFFER_SIZE;
-      charBufferLen = 0;
-      ch = 0;
-    }
-  }
-
-  protected void make1d1fIndex( String value ) throws Exception {
-    make1dmfIndex( value );
-  }
-  
-  protected void make1d1fIndexB( String value ) throws Exception {
-    make1dmfIndexB( value );
-  }
-  
-  protected void make1dmfIndex( String... values ) throws Exception {
-    make1dmfIndex( analyzerW, values );
-  }
-  
-  protected void make1dmfIndexB( String... values ) throws Exception {
-    make1dmfIndex( analyzerB, values );
-  }
-  
-  // make 1 doc with multi valued field
-  protected void make1dmfIndex( Analyzer analyzer, String... values ) throws Exception {
-    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
-        TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE));
-    Document doc = new Document();
-    for( String value: values )
-      doc.add( new Field( F, value, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
-    writer.addDocument( doc );
-    writer.close();
-    if (reader != null) reader.close();
-    reader = IndexReader.open( dir, true );
-  }
-  
-  // make 1 doc with multi valued & not analyzed field
-  protected void make1dmfIndexNA( String... values ) throws Exception {
-    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
-        TEST_VERSION_CURRENT, analyzerK).setOpenMode(OpenMode.CREATE));
-    Document doc = new Document();
-    for( String value: values )
-      doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
-    writer.addDocument( doc );
-    writer.close();
-    if (reader != null) reader.close();
-    reader = IndexReader.open( dir, true );
-  }
-  
-  protected void makeIndexShortMV() throws Exception {
-    
-    //  0
-    // ""
-    //  1
-    // ""
-
-    //  234567
-    // "a b c"
-    //  0 1 2
-
-    //  8
-    // ""
-
-    //   111
-    //  9012
-    // "d e"
-    //  3 4
-    make1dmfIndex( shortMVValues );
-  }
-  
-  protected void makeIndexLongMV() throws Exception {
-    //           11111111112222222222333333333344444444445555555555666666666677777777778888888888999
-    // 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012
-    // Followings are the examples of customizable parameters and actual examples of customization:
-    // 0          1   2   3        4  5            6          7   8      9        10 11
-    
-    //        1                                                                                                   2
-    // 999999900000000001111111111222222222233333333334444444444555555555566666666667777777777888888888899999999990000000000111111111122
-    // 345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901
-    // The most search engines use only one of these methods. Even the search engines that says they can use the both methods basically
-    // 12  13  (14)   (15)     16  17   18  19 20    21       22   23 (24)   (25)     26   27   28   29  30  31  32   33      34
-
-    make1dmfIndex( longMVValues );
-  }
-  
-  protected void makeIndexLongMVB() throws Exception {
-    // "*" ... LF
-    
-    //           1111111111222222222233333333334444444444555555
-    // 01234567890123456789012345678901234567890123456789012345
-    // *Lucene/Solr does not require such additional hardware.
-    //  Lu 0        do 10    re 15   su 21       na 31
-    //   uc 1        oe 11    eq 16   uc 22       al 32
-    //    ce 2        es 12    qu 17   ch 23         ha 33
-    //     en 3          no 13  ui 18     ad 24       ar 34
-    //      ne 4          ot 14  ir 19     dd 25       rd 35
-    //       e/ 5                 re 20     di 26       dw 36
-    //        /S 6                           it 27       wa 37
-    //         So 7                           ti 28       ar 38
-    //          ol 8                           io 29       re 39
-    //           lr 9                           on 30
-
-    // 5555666666666677777777778888888888999999999
-    // 6789012345678901234567890123456789012345678
-    // *When you talk about processing speed, the
-    //  Wh 40         ab 48     es 56         th 65
-    //   he 41         bo 49     ss 57         he 66
-    //    en 42         ou 50     si 58
-    //       yo 43       ut 51     in 59
-    //        ou 44         pr 52   ng 60
-    //           ta 45       ro 53     sp 61
-    //            al 46       oc 54     pe 62
-    //             lk 47       ce 55     ee 63
-    //                                    ed 64
-
-    make1dmfIndexB( biMVValues );
-  }
-  
-  protected void makeIndexStrMV() throws Exception {
-
-    //  0123
-    // "abc"
-    
-    //  34567
-    // "defg"
-
-    //     111
-    //  789012
-    // "hijkl"
-    make1dmfIndexNA( strMVValues );
-  }
-}