lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java

   1 package org.apache.lucene.analysis.ar;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.io.StringReader;
  22
  23 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
  24 import org.apache.lucene.analysis.CharArraySet;
  25 import org.apache.lucene.analysis.KeywordMarkerFilter;
  26
  27 /**
  28  * Test the Arabic Normalization Filter
  29  *
  30  */
  31 public class TestArabicStemFilter extends BaseTokenStreamTestCase {
  32
  33   public void testAlPrefix() throws IOException {
  34     check("الحسن", "حسن");
  35   }
  36
  37   public void testWalPrefix() throws IOException {
  38     check("والحسن", "حسن");
  39   }
  40
  41   public void testBalPrefix() throws IOException {
  42     check("بالحسن", "حسن");
  43   }
  44
  45   public void testKalPrefix() throws IOException {
  46     check("كالحسن", "حسن");
  47   }
  48
  49   public void testFalPrefix() throws IOException {
  50     check("فالحسن", "حسن");
  51   }
  52
  53   public void testLlPrefix() throws IOException {
  54     check("للاخر", "اخر");
  55   }
  56
  57   public void testWaPrefix() throws IOException {
  58     check("وحسن", "حسن");
  59   }
  60
  61   public void testAhSuffix() throws IOException {
  62     check("زوجها", "زوج");
  63   }
  64
  65   public void testAnSuffix() throws IOException {
  66     check("ساهدان", "ساهد");
  67   }
  68
  69   public void testAtSuffix() throws IOException {
  70     check("ساهدات", "ساهد");
  71   }
  72
  73   public void testWnSuffix() throws IOException {
  74     check("ساهدون", "ساهد");
  75   }
  76
  77   public void testYnSuffix() throws IOException {
  78     check("ساهدين", "ساهد");
  79   }
  80
  81   public void testYhSuffix() throws IOException {
  82     check("ساهديه", "ساهد");
  83   }
  84
  85   public void testYpSuffix() throws IOException {
  86     check("ساهدية", "ساهد");
  87   }
  88
  89   public void testHSuffix() throws IOException {
  90     check("ساهده", "ساهد");
  91   }
  92
  93   public void testPSuffix() throws IOException {
  94     check("ساهدة", "ساهد");
  95   }
  96
  97   public void testYSuffix() throws IOException {
  98     check("ساهدي", "ساهد");
  99   }
 100
 101   public void testComboPrefSuf() throws IOException {
 102     check("وساهدون", "ساهد");
 103   }
 104
 105   public void testComboSuf() throws IOException {
 106     check("ساهدهات", "ساهد");
 107   }
 108
 109   public void testShouldntStem() throws IOException {
 110     check("الو", "الو");
 111   }
 112
 113   public void testNonArabic() throws IOException {
 114     check("English", "English");
 115   }
 116
 117   public void testWithKeywordAttribute() throws IOException {
 118     CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
 119     set.add("ساهدهات");
 120     ArabicLetterTokenizer tokenStream  = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));
 121
 122     ArabicStemFilter filter = new ArabicStemFilter(new KeywordMarkerFilter(tokenStream, set));
 123     assertTokenStreamContents(filter, new String[]{"ساهدهات"});
 124   }
 125
 126   private void check(final String input, final String expected) throws IOException {
 127     ArabicLetterTokenizer tokenStream  = new ArabicLetterTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
 128     ArabicStemFilter filter = new ArabicStemFilter(tokenStream);
 129     assertTokenStreamContents(filter, new String[]{expected});
 130   }
 131 }