lucene-java-3.5.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java

   1 /**
   2  * Licensed to the Apache Software Foundation (ASF) under one or more
   3  * contributor license agreements.  See the NOTICE file distributed with
   4  * this work for additional information regarding copyright ownership.
   5  * The ASF licenses this file to You under the Apache License, Version 2.0
   6  * (the "License"); you may not use this file except in compliance with
   7  * the License.  You may obtain a copy of the License at
   8  *
   9  *     http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing, software
  12  * distributed under the License is distributed on an "AS IS" BASIS,
  13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14  * See the License for the specific language governing permissions and
  15  * limitations under the License.
  16  */
  17
  18 package org.apache.lucene.analysis.synonym;
  19
  20 import java.io.Reader;
  21 import java.io.StringReader;
  22 import java.util.ArrayList;
  23 import java.util.Arrays;
  24 import java.util.HashMap;
  25 import java.util.HashSet;
  26 import java.util.List;
  27 import java.util.Map;
  28 import java.util.Set;
  29
  30 import org.apache.lucene.analysis.Analyzer;
  31 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
  32 import org.apache.lucene.analysis.MockAnalyzer;
  33 import org.apache.lucene.analysis.MockTokenizer;
  34 import org.apache.lucene.analysis.Tokenizer;
  35 import org.apache.lucene.analysis.tokenattributes.*;
  36 import org.apache.lucene.analysis.ReusableAnalyzerBase;
  37 import org.apache.lucene.util.CharsRef;
  38 import org.apache.lucene.util._TestUtil;
  39
  40 public class TestSynonymMapFilter extends BaseTokenStreamTestCase {
  41
  42   private SynonymMap.Builder b;
  43   private Tokenizer tokensIn;
  44   private SynonymFilter tokensOut;
  45   private CharTermAttribute termAtt;
  46   private PositionIncrementAttribute posIncrAtt;
  47   private OffsetAttribute offsetAtt;
  48
  49   private void add(String input, String output, boolean keepOrig) {
  50     b.add(new CharsRef(input.replaceAll(" +", "\u0000")),
  51           new CharsRef(output.replaceAll(" +", "\u0000")),
  52           keepOrig);
  53   }
  54
  55   private void assertEquals(CharTermAttribute term, String expected) {
  56     assertEquals(expected.length(), term.length());
  57     final char[] buffer = term.buffer();
  58     for(int chIDX=0;chIDX<expected.length();chIDX++) {
  59       assertEquals(expected.charAt(chIDX), buffer[chIDX]);
  60     }
  61   }
  62
  63   // todo: we should probably refactor this guy to use/take analyzer,
  64   // the tests are a little messy
  65   private void verify(String input, String output) throws Exception {
  66     if (VERBOSE) {
  67       System.out.println("TEST: verify input=" + input + " expectedOutput=" + output);
  68     }
  69
  70     tokensIn.reset(new StringReader(input));
  71     tokensOut.reset();
  72     final String[] expected = output.split(" ");
  73     int expectedUpto = 0;
  74     while(tokensOut.incrementToken()) {
  75
  76       if (VERBOSE) {
  77         System.out.println("  incr token=" + termAtt.toString() + " posIncr=" + posIncrAtt.getPositionIncrement());
  78       }
  79
  80       assertTrue(expectedUpto < expected.length);
  81       final int startOffset = offsetAtt.startOffset();
  82       final int endOffset = offsetAtt.endOffset();
  83
  84       final String[] expectedAtPos = expected[expectedUpto++].split("/");
  85       for(int atPos=0;atPos<expectedAtPos.length;atPos++) {
  86         if (atPos > 0) {
  87           assertTrue(tokensOut.incrementToken());
  88           if (VERBOSE) {
  89             System.out.println("  incr token=" + termAtt.toString() + " posIncr=" + posIncrAtt.getPositionIncrement());
  90           }
  91         }
  92         assertEquals(termAtt, expectedAtPos[atPos]);
  93         assertEquals(atPos == 0 ? 1 : 0,
  94                      posIncrAtt.getPositionIncrement());
  95         // start/end offset of all tokens at same pos should
  96         // be the same:
  97         assertEquals(startOffset, offsetAtt.startOffset());
  98         assertEquals(endOffset, offsetAtt.endOffset());
  99       }
 100     }
 101     tokensOut.end();
 102     tokensOut.close();
 103     if (VERBOSE) {
 104       System.out.println("  incr: END");
 105     }
 106     assertEquals(expectedUpto, expected.length);
 107   }
 108
 109   public void testBasic() throws Exception {
 110     b = new SynonymMap.Builder(true);
 111     add("a", "foo", true);
 112     add("a b", "bar fee", true);
 113     add("b c", "dog collar", true);
 114     add("c d", "dog harness holder extras", true);
 115     add("m c e", "dog barks loudly", false);
 116
 117     add("e f", "foo bar", false);
 118     add("e f", "baz bee", false);
 119
 120     add("z", "boo", false);
 121     add("y", "bee", true);
 122
 123     tokensIn = new MockTokenizer(new StringReader("a"),
 124                                  MockTokenizer.WHITESPACE,
 125                                  true);
 126     tokensIn.reset();
 127     assertTrue(tokensIn.incrementToken());
 128     assertFalse(tokensIn.incrementToken());
 129     tokensIn.end();
 130     tokensIn.close();
 131
 132     tokensOut = new SynonymFilter(tokensIn,
 133                                      b.build(),
 134                                      true);
 135     termAtt = tokensOut.addAttribute(CharTermAttribute.class);
 136     posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
 137     offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
 138
 139     verify("a b c", "a/bar b/fee c");
 140
 141     // syn output extends beyond input tokens
 142     verify("x a b c d", "x a/bar b/fee c/dog d/harness holder extras");
 143
 144     verify("a b a", "a/bar b/fee a/foo");
 145
 146     // outputs that add to one another:
 147     verify("c d c d", "c/dog d/harness c/holder/dog d/extras/harness holder extras");
 148
 149     // two outputs for same input
 150     verify("e f", "foo/baz bar/bee");
 151
 152     // mixed keepOrig true/false:
 153     verify("a m c e x", "a/foo dog barks loudly x");
 154     verify("c d m c e x", "c/dog d/harness holder/dog extras/barks loudly x");
 155     assertTrue(tokensOut.getCaptureCount() > 0);
 156
 157     // no captureStates when no syns matched
 158     verify("p q r s t", "p q r s t");
 159     assertEquals(0, tokensOut.getCaptureCount());
 160
 161     // no captureStates when only single-input syns, w/ no
 162     // lookahead needed, matched
 163     verify("p q z y t", "p q boo y/bee t");
 164     assertEquals(0, tokensOut.getCaptureCount());
 165   }
 166
 167   private String getRandomString(char start, int alphabetSize, int length) {
 168     assert alphabetSize <= 26;
 169     char[] s = new char[2*length];
 170     for(int charIDX=0;charIDX<length;charIDX++) {
 171       s[2*charIDX] = (char) (start + random.nextInt(alphabetSize));
 172       s[2*charIDX+1] = ' ';
 173     }
 174     return new String(s);
 175   }
 176
 177   private static class OneSyn {
 178     String in;
 179     List<String> out;
 180     boolean keepOrig;
 181   }
 182
 183   public String slowSynMatcher(String doc, List<OneSyn> syns, int maxOutputLength) {
 184     assertTrue(doc.length() % 2 == 0);
 185     final int numInputs = doc.length()/2;
 186     boolean[] keepOrigs = new boolean[numInputs];
 187     boolean[] hasMatch = new boolean[numInputs];
 188     Arrays.fill(keepOrigs, false);
 189     String[] outputs = new String[numInputs + maxOutputLength];
 190     OneSyn[] matches = new OneSyn[numInputs];
 191     for(OneSyn syn : syns) {
 192       int idx = -1;
 193       while(true) {
 194         idx = doc.indexOf(syn.in, 1+idx);
 195         if (idx == -1) {
 196           break;
 197         }
 198         assertTrue(idx % 2 == 0);
 199         final int matchIDX = idx/2;
 200         assertTrue(syn.in.length() % 2 == 1);
 201         if (matches[matchIDX] == null) {
 202           matches[matchIDX] = syn;
 203         } else if (syn.in.length() > matches[matchIDX].in.length()) {
 204           // Greedy conflict resolution: longer match wins:
 205           matches[matchIDX] = syn;
 206         } else {
 207           assertTrue(syn.in.length() < matches[matchIDX].in.length());
 208         }
 209       }
 210     }
 211
 212     // Greedy conflict resolution: if syn matches a range of inputs,
 213     // it prevents other syns from matching that range
 214     for(int inputIDX=0;inputIDX<numInputs;inputIDX++) {
 215       final OneSyn match = matches[inputIDX];
 216       if (match != null) {
 217         final int synInLength = (1+match.in.length())/2;
 218         for(int nextInputIDX=inputIDX+1;nextInputIDX<numInputs && nextInputIDX<(inputIDX+synInLength);nextInputIDX++) {
 219           matches[nextInputIDX] = null;
 220         }
 221       }
 222     }
 223
 224     // Fill overlapping outputs:
 225     for(int inputIDX=0;inputIDX<numInputs;inputIDX++) {
 226       final OneSyn syn = matches[inputIDX];
 227       if (syn == null) {
 228         continue;
 229       }
 230       for(int idx=0;idx<(1+syn.in.length())/2;idx++) {
 231         hasMatch[inputIDX+idx] = true;
 232         keepOrigs[inputIDX+idx] |= syn.keepOrig;
 233       }
 234       for(String synOut : syn.out) {
 235         final String[] synOutputs = synOut.split(" ");
 236         assertEquals(synOutputs.length, (1+synOut.length())/2);
 237         final int matchEnd = inputIDX + synOutputs.length;
 238         int synUpto = 0;
 239         for(int matchIDX=inputIDX;matchIDX<matchEnd;matchIDX++) {
 240           if (outputs[matchIDX] == null) {
 241             outputs[matchIDX] = synOutputs[synUpto++];
 242           } else {
 243             outputs[matchIDX] = outputs[matchIDX] + "/" + synOutputs[synUpto++];
 244           }
 245         }
 246       }
 247     }
 248
 249     StringBuilder sb = new StringBuilder();
 250     String[] inputTokens = doc.split(" ");
 251     final int limit = inputTokens.length + maxOutputLength;
 252     for(int inputIDX=0;inputIDX<limit;inputIDX++) {
 253       boolean posHasOutput = false;
 254       if (inputIDX >= numInputs && outputs[inputIDX] == null) {
 255         break;
 256       }
 257       if (inputIDX < numInputs && (!hasMatch[inputIDX] || keepOrigs[inputIDX])) {
 258         assertTrue(inputTokens[inputIDX].length() != 0);
 259         sb.append(inputTokens[inputIDX]);
 260         posHasOutput = true;
 261       }
 262
 263       if (outputs[inputIDX] != null) {
 264         if (posHasOutput) {
 265           sb.append('/');
 266         }
 267         sb.append(outputs[inputIDX]);
 268       } else if (!posHasOutput) {
 269         continue;
 270       }
 271       if (inputIDX < limit-1) {
 272         sb.append(' ');
 273       }
 274     }
 275
 276     return sb.toString();
 277   }
 278
 279   public void testRandom() throws Exception {
 280
 281     final int alphabetSize = _TestUtil.nextInt(random, 2, 7);
 282
 283     final int docLen = atLeast(3000);
 284     //final int docLen = 50;
 285
 286     final String document = getRandomString('a', alphabetSize, docLen);
 287
 288     if (VERBOSE) {
 289       System.out.println("TEST: doc=" + document);
 290     }
 291
 292     final int numSyn = atLeast(5);
 293     //final int numSyn = 2;
 294
 295     final Map<String,OneSyn> synMap = new HashMap<String,OneSyn>();
 296     final List<OneSyn> syns = new ArrayList<OneSyn>();
 297     final boolean dedup = random.nextBoolean();
 298     if (VERBOSE) {
 299       System.out.println("  dedup=" + dedup);
 300     }
 301     b = new SynonymMap.Builder(dedup);
 302     for(int synIDX=0;synIDX<numSyn;synIDX++) {
 303       final String synIn = getRandomString('a', alphabetSize, _TestUtil.nextInt(random, 1, 5)).trim();
 304       OneSyn s = synMap.get(synIn);
 305       if (s == null) {
 306         s = new OneSyn();
 307         s.in = synIn;
 308         syns.add(s);
 309         s.out = new ArrayList<String>();
 310         synMap.put(synIn, s);
 311         s.keepOrig = random.nextBoolean();
 312       }
 313       final String synOut = getRandomString('0', 10, _TestUtil.nextInt(random, 1, 5)).trim();
 314       s.out.add(synOut);
 315       add(synIn, synOut, s.keepOrig);
 316       if (VERBOSE) {
 317         System.out.println("  syns[" + synIDX + "] = " + s.in + " -> " + s.out + " keepOrig=" + s.keepOrig);
 318       }
 319     }
 320
 321     tokensIn = new MockTokenizer(new StringReader("a"),
 322                                  MockTokenizer.WHITESPACE,
 323                                  true);
 324     tokensIn.reset();
 325     assertTrue(tokensIn.incrementToken());
 326     assertFalse(tokensIn.incrementToken());
 327     tokensIn.end();
 328     tokensIn.close();
 329
 330     tokensOut = new SynonymFilter(tokensIn,
 331                                      b.build(),
 332                                      true);
 333     termAtt = tokensOut.addAttribute(CharTermAttribute.class);
 334     posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
 335     offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
 336
 337     if (dedup) {
 338       pruneDups(syns);
 339     }
 340
 341     final String expected = slowSynMatcher(document, syns, 5);
 342
 343     if (VERBOSE) {
 344       System.out.println("TEST: expected=" + expected);
 345     }
 346
 347     verify(document, expected);
 348   }
 349
 350   private void pruneDups(List<OneSyn> syns) {
 351     Set<String> seen = new HashSet<String>();
 352     for(OneSyn syn : syns) {
 353       int idx = 0;
 354       while(idx < syn.out.size()) {
 355         String out = syn.out.get(idx);
 356         if (!seen.contains(out)) {
 357           seen.add(out);
 358           idx++;
 359         } else {
 360           syn.out.remove(idx);
 361         }
 362       }
 363       seen.clear();
 364     }
 365   }
 366
 367   private String randomNonEmptyString() {
 368     while(true) {
 369       final String s = _TestUtil.randomUnicodeString(random).trim();
 370       if (s.length() != 0 && s.indexOf('\u0000') == -1) {
 371         return s;
 372       }
 373     }
 374   }
 375
 376   /** simple random test, doesn't verify correctness.
 377    *  does verify it doesnt throw exceptions, or that the stream doesn't misbehave
 378    */
 379   public void testRandom2() throws Exception {
 380     final int numIters = atLeast(10);
 381     for (int i = 0; i < numIters; i++) {
 382       b = new SynonymMap.Builder(random.nextBoolean());
 383       final int numEntries = atLeast(10);
 384       for (int j = 0; j < numEntries; j++) {
 385         add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
 386       }
 387       final SynonymMap map = b.build();
 388       final boolean ignoreCase = random.nextBoolean();
 389
 390       final Analyzer analyzer = new ReusableAnalyzerBase() {
 391         @Override
 392         protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 393           Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
 394           return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
 395         }
 396       };
 397
 398       checkRandomData(random, analyzer, 1000*RANDOM_MULTIPLIER);
 399     }
 400   }
 401
 402   // LUCENE-3375
 403   public void testVanishingTerms() throws Exception {
 404     String testFile =
 405       "aaa => aaaa1 aaaa2 aaaa3\n" +
 406       "bbb => bbbb1 bbbb2\n";
 407
 408     SolrSynonymParser parser = new SolrSynonymParser(true, true, new MockAnalyzer(random));
 409     parser.add(new StringReader(testFile));
 410     final SynonymMap map = parser.build();
 411
 412     Analyzer analyzer = new ReusableAnalyzerBase() {
 413       @Override
 414       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 415         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
 416         return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
 417       }
 418     };
 419
 420     // where did my pot go?!
 421     assertAnalyzesTo(analyzer, "xyzzy bbb pot of gold",
 422                      new String[] { "xyzzy", "bbbb1", "pot", "bbbb2", "of", "gold" });
 423
 424     // this one nukes 'pot' and 'of'
 425     // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold
 426     assertAnalyzesTo(analyzer, "xyzzy aaa pot of gold",
 427                      new String[] { "xyzzy", "aaaa1", "pot", "aaaa2", "of", "aaaa3", "gold" });
 428   }
 429
 430   public void testBasic2() throws Exception {
 431     b = new SynonymMap.Builder(true);
 432     final boolean keepOrig = false;
 433     add("aaa", "aaaa1 aaaa2 aaaa3", keepOrig);
 434     add("bbb", "bbbb1 bbbb2", keepOrig);
 435     tokensIn = new MockTokenizer(new StringReader("a"),
 436                                  MockTokenizer.WHITESPACE,
 437                                  true);
 438     tokensIn.reset();
 439     assertTrue(tokensIn.incrementToken());
 440     assertFalse(tokensIn.incrementToken());
 441     tokensIn.end();
 442     tokensIn.close();
 443
 444     tokensOut = new SynonymFilter(tokensIn,
 445                                      b.build(),
 446                                      true);
 447     termAtt = tokensOut.addAttribute(CharTermAttribute.class);
 448     posIncrAtt = tokensOut.addAttribute(PositionIncrementAttribute.class);
 449     offsetAtt = tokensOut.addAttribute(OffsetAttribute.class);
 450
 451     if (keepOrig) {
 452       verify("xyzzy bbb pot of gold", "xyzzy bbb/bbbb1 pot/bbbb2 of gold");
 453       verify("xyzzy aaa pot of gold", "xyzzy aaa/aaaa1 pot/aaaa2 of/aaaa3 gold");
 454     } else {
 455       verify("xyzzy bbb pot of gold", "xyzzy bbbb1 pot/bbbb2 of gold");
 456       verify("xyzzy aaa pot of gold", "xyzzy aaaa1 pot/aaaa2 of/aaaa3 gold");
 457     }
 458   }
 459
 460   public void testMatching() throws Exception {
 461     b = new SynonymMap.Builder(true);
 462     final boolean keepOrig = false;
 463     add("a b", "ab", keepOrig);
 464     add("a c", "ac", keepOrig);
 465     add("a", "aa", keepOrig);
 466     add("b", "bb", keepOrig);
 467     add("z x c v", "zxcv", keepOrig);
 468     add("x c", "xc", keepOrig);
 469     final SynonymMap map = b.build();
 470     Analyzer a = new ReusableAnalyzerBase() {
 471       @Override
 472       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 473         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
 474         return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
 475       }
 476     };
 477
 478     checkOneTerm(a, "$", "$");
 479     checkOneTerm(a, "a", "aa");
 480     checkOneTerm(a, "b", "bb");
 481
 482     assertAnalyzesTo(a, "a $",
 483        new String[] { "aa", "$" },
 484        new int[] { 1, 1 });
 485
 486     assertAnalyzesTo(a, "$ a",
 487         new String[] { "$", "aa" },
 488         new int[] { 1, 1 });
 489
 490     assertAnalyzesTo(a, "a a",
 491         new String[] { "aa", "aa" },
 492         new int[] { 1, 1 });
 493
 494     assertAnalyzesTo(a, "z x c v",
 495         new String[] { "zxcv" },
 496         new int[] { 1 });
 497
 498     assertAnalyzesTo(a, "z x c $",
 499         new String[] { "z", "xc", "$" },
 500         new int[] { 1, 1, 1 });
 501   }
 502
 503   public void testRepeatsOff() throws Exception {
 504     b = new SynonymMap.Builder(true);
 505     final boolean keepOrig = false;
 506     add("a b", "ab", keepOrig);
 507     add("a b", "ab", keepOrig);
 508     add("a b", "ab", keepOrig);
 509     final SynonymMap map = b.build();
 510     Analyzer a = new ReusableAnalyzerBase() {
 511       @Override
 512       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 513         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
 514         return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
 515       }
 516     };
 517
 518     assertAnalyzesTo(a, "a b",
 519         new String[] { "ab" },
 520         new int[] { 1 });
 521   }
 522
 523   public void testRepeatsOn() throws Exception {
 524     b = new SynonymMap.Builder(false);
 525     final boolean keepOrig = false;
 526     add("a b", "ab", keepOrig);
 527     add("a b", "ab", keepOrig);
 528     add("a b", "ab", keepOrig);
 529     final SynonymMap map = b.build();
 530     Analyzer a = new ReusableAnalyzerBase() {
 531       @Override
 532       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 533         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
 534         return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
 535       }
 536     };
 537
 538     assertAnalyzesTo(a, "a b",
 539         new String[] { "ab", "ab", "ab" },
 540         new int[] { 1, 0, 0 });
 541   }
 542
 543   public void testRecursion() throws Exception {
 544     b = new SynonymMap.Builder(true);
 545     final boolean keepOrig = false;
 546     add("zoo", "zoo", keepOrig);
 547     final SynonymMap map = b.build();
 548     Analyzer a = new ReusableAnalyzerBase() {
 549       @Override
 550       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 551         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
 552         return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
 553       }
 554     };
 555
 556     assertAnalyzesTo(a, "zoo zoo $ zoo",
 557         new String[] { "zoo", "zoo", "$", "zoo" },
 558         new int[] { 1, 1, 1, 1 });
 559   }
 560
 561   public void testRecursion2() throws Exception {
 562     b = new SynonymMap.Builder(true);
 563     final boolean keepOrig = false;
 564     add("zoo", "zoo", keepOrig);
 565     add("zoo", "zoo zoo", keepOrig);
 566     final SynonymMap map = b.build();
 567     Analyzer a = new ReusableAnalyzerBase() {
 568       @Override
 569       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 570         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
 571         return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
 572       }
 573     };
 574
 575     // verify("zoo zoo $ zoo", "zoo/zoo zoo/zoo/zoo $/zoo zoo/zoo zoo");
 576     assertAnalyzesTo(a, "zoo zoo $ zoo",
 577         new String[] { "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo", "zoo" },
 578         new int[] { 1, 0, 1, 0, 0, 1, 0, 1, 0, 1 });
 579   }
 580
 581   public void testIncludeOrig() throws Exception {
 582     b = new SynonymMap.Builder(true);
 583     final boolean keepOrig = true;
 584     add("a b", "ab", keepOrig);
 585     add("a c", "ac", keepOrig);
 586     add("a", "aa", keepOrig);
 587     add("b", "bb", keepOrig);
 588     add("z x c v", "zxcv", keepOrig);
 589     add("x c", "xc", keepOrig);
 590     final SynonymMap map = b.build();
 591     Analyzer a = new ReusableAnalyzerBase() {
 592       @Override
 593       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 594         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
 595         return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
 596       }
 597     };
 598
 599     assertAnalyzesTo(a, "$",
 600         new String[] { "$" },
 601         new int[] { 1 });
 602     assertAnalyzesTo(a, "a",
 603         new String[] { "a", "aa" },
 604         new int[] { 1, 0 });
 605     assertAnalyzesTo(a, "a",
 606         new String[] { "a", "aa" },
 607         new int[] { 1, 0 });
 608     assertAnalyzesTo(a, "$ a",
 609         new String[] { "$", "a", "aa" },
 610         new int[] { 1, 1, 0 });
 611     assertAnalyzesTo(a, "a $",
 612         new String[] { "a", "aa", "$" },
 613         new int[] { 1, 0, 1 });
 614     assertAnalyzesTo(a, "$ a !",
 615         new String[] { "$", "a", "aa", "!" },
 616         new int[] { 1, 1, 0, 1 });
 617     assertAnalyzesTo(a, "a a",
 618         new String[] { "a", "aa", "a", "aa" },
 619         new int[] { 1, 0, 1, 0 });
 620     assertAnalyzesTo(a, "b",
 621         new String[] { "b", "bb" },
 622         new int[] { 1, 0 });
 623     assertAnalyzesTo(a, "z x c v",
 624         new String[] { "z", "zxcv", "x", "c", "v" },
 625         new int[] { 1, 0, 1, 1, 1 });
 626     assertAnalyzesTo(a, "z x c $",
 627         new String[] { "z", "x", "xc", "c", "$" },
 628         new int[] { 1, 1, 0, 1, 1 });
 629   }
 630
 631   public void testRecursion3() throws Exception {
 632     b = new SynonymMap.Builder(true);
 633     final boolean keepOrig = true;
 634     add("zoo zoo", "zoo", keepOrig);
 635     final SynonymMap map = b.build();
 636     Analyzer a = new ReusableAnalyzerBase() {
 637       @Override
 638       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 639         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
 640         return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
 641       }
 642     };
 643
 644     assertAnalyzesTo(a, "zoo zoo $ zoo",
 645         new String[] { "zoo", "zoo", "zoo", "$", "zoo" },
 646         new int[] { 1, 0, 1, 1, 1 });
 647   }
 648
 649   public void testRecursion4() throws Exception {
 650     b = new SynonymMap.Builder(true);
 651     final boolean keepOrig = true;
 652     add("zoo zoo", "zoo", keepOrig);
 653     add("zoo", "zoo zoo", keepOrig);
 654     final SynonymMap map = b.build();
 655     Analyzer a = new ReusableAnalyzerBase() {
 656       @Override
 657       protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 658         Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
 659         return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true));
 660       }
 661     };
 662
 663     assertAnalyzesTo(a, "zoo zoo $ zoo",
 664         new String[] { "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" },
 665         new int[] { 1, 0, 1, 1, 1, 0, 1 });
 666   }
 667 }