lucene-java-3.4.0/lucene/backwards/src/test/org/apache/lucene/analysis/VocabularyAssert.java

   1 package org.apache.lucene.analysis;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.BufferedReader;
  21 import java.io.File;
  22 import java.io.IOException;
  23 import java.io.InputStream;
  24 import java.io.InputStreamReader;
  25 import java.util.zip.ZipFile;
  26
  27 import org.apache.lucene.analysis.Analyzer;
  28 import org.junit.Assert;
  29
  30 /** Utility class for doing vocabulary-based stemming tests */
  31 public class VocabularyAssert {
  32   /** Run a vocabulary test against two data files. */
  33   public static void assertVocabulary(Analyzer a, InputStream voc, InputStream out)
  34   throws IOException {
  35     BufferedReader vocReader = new BufferedReader(
  36         new InputStreamReader(voc, "UTF-8"));
  37     BufferedReader outputReader = new BufferedReader(
  38         new InputStreamReader(out, "UTF-8"));
  39     String inputWord = null;
  40     while ((inputWord = vocReader.readLine()) != null) {
  41       String expectedWord = outputReader.readLine();
  42       Assert.assertNotNull(expectedWord);
  43       BaseTokenStreamTestCase.checkOneTermReuse(a, inputWord, expectedWord);
  44     }
  45   }
  46
  47   /** Run a vocabulary test against one file: tab separated. */
  48   public static void assertVocabulary(Analyzer a, InputStream vocOut)
  49   throws IOException {
  50     BufferedReader vocReader = new BufferedReader(
  51         new InputStreamReader(vocOut, "UTF-8"));
  52     String inputLine = null;
  53     while ((inputLine = vocReader.readLine()) != null) {
  54       if (inputLine.startsWith("#") || inputLine.trim().length() == 0)
  55         continue; /* comment */
  56       String words[] = inputLine.split("\t");
  57       BaseTokenStreamTestCase.checkOneTermReuse(a, words[0], words[1]);
  58     }
  59   }
  60
  61   /** Run a vocabulary test against two data files inside a zip file */
  62   public static void assertVocabulary(Analyzer a, File zipFile, String voc, String out)
  63   throws IOException {
  64     ZipFile zip = new ZipFile(zipFile);
  65     InputStream v = zip.getInputStream(zip.getEntry(voc));
  66     InputStream o = zip.getInputStream(zip.getEntry(out));
  67     assertVocabulary(a, v, o);
  68     v.close();
  69     o.close();
  70     zip.close();
  71   }
  72
  73   /** Run a vocabulary test against a tab-separated data file inside a zip file */
  74   public static void assertVocabulary(Analyzer a, File zipFile, String vocOut)
  75   throws IOException {
  76     ZipFile zip = new ZipFile(zipFile);
  77     InputStream vo = zip.getInputStream(zip.getEntry(vocOut));
  78     assertVocabulary(a, vo);
  79     vo.close();
  80     zip.close();
  81   }
  82 }