lucene-java-3.4.0/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java

   1 package org.apache.lucene.analysis;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20
  21 import java.io.IOException;
  22
  23 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
  24 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
  25 import org.apache.lucene.document.Document;
  26 import org.apache.lucene.document.Field;
  27 import org.apache.lucene.document.Field.TermVector;
  28 import org.apache.lucene.index.IndexReader;
  29 import org.apache.lucene.index.Term;
  30 import org.apache.lucene.index.TermPositions;
  31 import org.apache.lucene.index.RandomIndexWriter;
  32 import org.apache.lucene.store.Directory;
  33
  34 public class TestCachingTokenFilter extends BaseTokenStreamTestCase {
  35   private String[] tokens = new String[] {"term1", "term2", "term3", "term2"};
  36
  37   public void testCaching() throws IOException {
  38     Directory dir = newDirectory();
  39     RandomIndexWriter writer = new RandomIndexWriter(random, dir);
  40
  41     Document doc = new Document();
  42     TokenStream stream = new TokenStream() {
  43       private int index = 0;
  44       private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  45       private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
  46
  47       @Override
  48       public boolean incrementToken() throws IOException {
  49         if (index == tokens.length) {
  50           return false;
  51         } else {
  52           clearAttributes();
  53           termAtt.append(tokens[index++]);
  54           offsetAtt.setOffset(0,0);
  55           return true;
  56         }
  57       }
  58
  59     };
  60
  61     stream = new CachingTokenFilter(stream);
  62
  63     doc.add(new Field("preanalyzed", stream, TermVector.NO));
  64
  65     // 1) we consume all tokens twice before we add the doc to the index
  66     checkTokens(stream);
  67     stream.reset();
  68     checkTokens(stream);
  69
  70     // 2) now add the document to the index and verify if all tokens are indexed
  71     //    don't reset the stream here, the DocumentWriter should do that implicitly
  72     writer.addDocument(doc);
  73
  74     IndexReader reader = writer.getReader();
  75     TermPositions termPositions = reader.termPositions(new Term("preanalyzed", "term1"));
  76     assertTrue(termPositions.next());
  77     assertEquals(1, termPositions.freq());
  78     assertEquals(0, termPositions.nextPosition());
  79
  80     termPositions.seek(new Term("preanalyzed", "term2"));
  81     assertTrue(termPositions.next());
  82     assertEquals(2, termPositions.freq());
  83     assertEquals(1, termPositions.nextPosition());
  84     assertEquals(3, termPositions.nextPosition());
  85
  86     termPositions.seek(new Term("preanalyzed", "term3"));
  87     assertTrue(termPositions.next());
  88     assertEquals(1, termPositions.freq());
  89     assertEquals(2, termPositions.nextPosition());
  90     reader.close();
  91     writer.close();
  92     // 3) reset stream and consume tokens again
  93     stream.reset();
  94     checkTokens(stream);
  95     dir.close();
  96   }
  97
  98   private void checkTokens(TokenStream stream) throws IOException {
  99     int count = 0;
 100
 101     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
 102     while (stream.incrementToken()) {
 103       assertTrue(count < tokens.length);
 104       assertEquals(tokens[count], termAtt.toString());
 105       count++;
 106     }
 107
 108     assertEquals(tokens.length, count);
 109   }
 110 }