lucene-java-3.4.0/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java

   1 package org.apache.lucene.search.spell;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.IOException;
  21 import java.util.ArrayList;
  22 import java.util.Comparator;
  23 import java.util.Iterator;
  24 import java.util.List;
  25
  26 import org.apache.lucene.analysis.WhitespaceAnalyzer;
  27 import org.apache.lucene.document.Document;
  28 import org.apache.lucene.document.Field;
  29 import org.apache.lucene.index.FieldInfo.IndexOptions;
  30 import org.apache.lucene.index.IndexReader;
  31 import org.apache.lucene.index.IndexWriter;
  32 import org.apache.lucene.index.IndexWriterConfig;
  33 import org.apache.lucene.index.TieredMergePolicy;
  34 import org.apache.lucene.index.Term;
  35 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  36 import org.apache.lucene.search.BooleanClause;
  37 import org.apache.lucene.search.BooleanQuery;
  38 import org.apache.lucene.search.IndexSearcher;
  39 import org.apache.lucene.search.Query;
  40 import org.apache.lucene.search.ScoreDoc;
  41 import org.apache.lucene.search.TermQuery;
  42 import org.apache.lucene.store.AlreadyClosedException;
  43 import org.apache.lucene.store.Directory;
  44 import org.apache.lucene.util.ReaderUtil;
  45 import org.apache.lucene.util.Version;
  46
  47 /**
  48  * <p>
  49  *   Spell Checker class  (Main class) <br/>
  50  *  (initially inspired by the David Spencer code).
  51  * </p>
  52  *
  53  * <p>Example Usage:
  54  *
  55  * <pre>
  56  *  SpellChecker spellchecker = new SpellChecker(spellIndexDirectory);
  57  *  // To index a field of a user index:
  58  *  spellchecker.indexDictionary(new LuceneDictionary(my_lucene_reader, a_field));
  59  *  // To index a file containing words:
  60  *  spellchecker.indexDictionary(new PlainTextDictionary(new File("myfile.txt")));
  61  *  String[] suggestions = spellchecker.suggestSimilar("misspelt", 5);
  62  * </pre>
  63  *
  64  *
  65  * @version 1.0
  66  */
  67 public class SpellChecker implements java.io.Closeable {
  68
  69   /**
  70    * The default minimum score to use, if not specified by calling {@link #setAccuracy(float)} .
  71    */
  72   public static final float DEFAULT_ACCURACY = 0.5f;
  73
  74   /**
  75    * Field name for each word in the ngram index.
  76    */
  77   public static final String F_WORD = "word";
  78
  79   private static final Term F_WORD_TERM = new Term(F_WORD);
  80
  81   /**
  82    * the spell index
  83    */
  84   // don't modify the directory directly - see #swapSearcher()
  85   // TODO: why is this package private?
  86   Directory spellIndex;
  87   /**
  88    * Boost value for start and end grams
  89    */
  90   private float bStart = 2.0f;
  91
  92   private float bEnd = 1.0f;
  93   // don't use this searcher directly - see #swapSearcher()
  94
  95   private IndexSearcher searcher;
  96   /*
  97    * this locks all modifications to the current searcher.
  98    */
  99
 100   private final Object searcherLock = new Object();
 101   /*
 102    * this lock synchronizes all possible modifications to the
 103    * current index directory. It should not be possible to try modifying
 104    * the same index concurrently. Note: Do not acquire the searcher lock
 105    * before acquiring this lock!
 106    */
 107   private final Object modifyCurrentIndexLock = new Object();
 108
 109   private volatile boolean closed = false;
 110   // minimum score for hits generated by the spell checker query
 111
 112   private float accuracy = DEFAULT_ACCURACY;
 113
 114   private StringDistance sd;
 115   private Comparator<SuggestWord> comparator;
 116
 117   /**
 118    * Use the given directory as a spell checker index. The directory
 119    * is created if it doesn't exist yet.
 120    * @param spellIndex the spell index directory
 121    * @param sd the {@link StringDistance} measurement to use
 122    * @throws IOException if Spellchecker can not open the directory
 123    */
 124   public SpellChecker(Directory spellIndex, StringDistance sd) throws IOException {
 125     this(spellIndex, sd, SuggestWordQueue.DEFAULT_COMPARATOR);
 126   }
 127   /**
 128    * Use the given directory as a spell checker index with a
 129    * {@link LevensteinDistance} as the default {@link StringDistance}. The
 130    * directory is created if it doesn't exist yet.
 131    *
 132    * @param spellIndex
 133    *          the spell index directory
 134    * @throws IOException
 135    *           if spellchecker can not open the directory
 136    */
 137   public SpellChecker(Directory spellIndex) throws IOException {
 138     this(spellIndex, new LevensteinDistance());
 139   }
 140
 141   /**
 142    * Use the given directory as a spell checker index with the given {@link org.apache.lucene.search.spell.StringDistance} measure
 143    * and the given {@link java.util.Comparator} for sorting the results.
 144    * @param spellIndex The spelling index
 145    * @param sd The distance
 146    * @param comparator The comparator
 147    * @throws IOException if there is a problem opening the index
 148    */
 149   public SpellChecker(Directory spellIndex, StringDistance sd, Comparator<SuggestWord> comparator) throws IOException {
 150     setSpellIndex(spellIndex);
 151     setStringDistance(sd);
 152     this.comparator = comparator;
 153   }
 154
 155   /**
 156    * Use a different index as the spell checker index or re-open
 157    * the existing index if <code>spellIndex</code> is the same value
 158    * as given in the constructor.
 159    * @param spellIndexDir the spell directory to use
 160    * @throws AlreadyClosedException if the Spellchecker is already closed
 161    * @throws  IOException if spellchecker can not open the directory
 162    */
 163   // TODO: we should make this final as it is called in the constructor
 164   public void setSpellIndex(Directory spellIndexDir) throws IOException {
 165     // this could be the same directory as the current spellIndex
 166     // modifications to the directory should be synchronized
 167     synchronized (modifyCurrentIndexLock) {
 168       ensureOpen();
 169       if (!IndexReader.indexExists(spellIndexDir)) {
 170           IndexWriter writer = new IndexWriter(spellIndexDir,
 171             new IndexWriterConfig(Version.LUCENE_CURRENT,
 172                 new WhitespaceAnalyzer(Version.LUCENE_CURRENT)));
 173           writer.close();
 174       }
 175       swapSearcher(spellIndexDir);
 176     }
 177   }
 178
 179   /**
 180    * Sets the {@link java.util.Comparator} for the {@link SuggestWordQueue}.
 181    * @param comparator the comparator
 182    */
 183   public void setComparator(Comparator<SuggestWord> comparator) {
 184     this.comparator = comparator;
 185   }
 186
 187   public Comparator<SuggestWord> getComparator() {
 188     return comparator;
 189   }
 190
 191   /**
 192    * Sets the {@link StringDistance} implementation for this
 193    * {@link SpellChecker} instance.
 194    *
 195    * @param sd the {@link StringDistance} implementation for this
 196    * {@link SpellChecker} instance
 197    */
 198   public void setStringDistance(StringDistance sd) {
 199     this.sd = sd;
 200   }
 201   /**
 202    * Returns the {@link StringDistance} instance used by this
 203    * {@link SpellChecker} instance.
 204    *
 205    * @return the {@link StringDistance} instance used by this
 206    *         {@link SpellChecker} instance.
 207    */
 208   public StringDistance getStringDistance() {
 209     return sd;
 210   }
 211
 212   /**
 213    * Sets the accuracy 0 &lt; minScore &lt; 1; default {@link #DEFAULT_ACCURACY}
 214    * @param acc The new accuracy
 215    */
 216   public void setAccuracy(float acc) {
 217     this.accuracy = acc;
 218   }
 219
 220   /**
 221    * The accuracy (minimum score) to be used, unless overridden in {@link #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)}, to
 222    * decide whether a suggestion is included or not.
 223    * @return The current accuracy setting
 224    */
 225   public float getAccuracy() {
 226     return accuracy;
 227   }
 228
 229   /**
 230    * Suggest similar words.
 231    *
 232    * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
 233    * is not the same as the edit distance strategy used to calculate the best
 234    * matching spell-checked word from the hits that Lucene found, one usually has
 235    * to retrieve a couple of numSug's in order to get the true best match.
 236    *
 237    * <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
 238    * Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
 239    *
 240    * @param word the word you want a spell check done on
 241    * @param numSug the number of suggested words
 242    * @throws IOException if the underlying index throws an {@link IOException}
 243    * @throws AlreadyClosedException if the Spellchecker is already closed
 244    * @return String[]
 245    *
 246    * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
 247    */
 248   public String[] suggestSimilar(String word, int numSug) throws IOException {
 249     return this.suggestSimilar(word, numSug, null, null, false);
 250   }
 251
 252   /**
 253    * Suggest similar words.
 254    *
 255    * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
 256    * is not the same as the edit distance strategy used to calculate the best
 257    * matching spell-checked word from the hits that Lucene found, one usually has
 258    * to retrieve a couple of numSug's in order to get the true best match.
 259    *
 260    * <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
 261    * Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
 262    *
 263    * @param word the word you want a spell check done on
 264    * @param numSug the number of suggested words
 265    * @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
 266    * @throws IOException if the underlying index throws an {@link IOException}
 267    * @throws AlreadyClosedException if the Spellchecker is already closed
 268    * @return String[]
 269    *
 270    * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
 271    */
 272   public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException {
 273     return this.suggestSimilar(word, numSug, null, null, false, accuracy);
 274   }
 275
 276   /**
 277    * Suggest similar words (optionally restricted to a field of an index).
 278    *
 279    * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
 280    * is not the same as the edit distance strategy used to calculate the best
 281    * matching spell-checked word from the hits that Lucene found, one usually has
 282    * to retrieve a couple of numSug's in order to get the true best match.
 283    *
 284    * <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
 285    * Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
 286    *
 287    * <p>Uses the {@link #getAccuracy()} value passed into the constructor as the accuracy.
 288    *
 289    * @param word the word you want a spell check done on
 290    * @param numSug the number of suggested words
 291    * @param ir the indexReader of the user index (can be null see field param)
 292    * @param field the field of the user index: if field is not null, the suggested
 293    * words are restricted to the words present in this field.
 294    * @param morePopular return only the suggest words that are as frequent or more frequent than the searched word
 295    * (only if restricted mode = (indexReader!=null and field!=null)
 296    * @throws IOException if the underlying index throws an {@link IOException}
 297    * @throws AlreadyClosedException if the Spellchecker is already closed
 298    * @return String[] the sorted list of the suggest words with these 2 criteria:
 299    * first criteria: the edit distance, second criteria (only if restricted mode): the popularity
 300    * of the suggest words in the field of the user index
 301    *
 302    * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
 303    */
 304   public String[] suggestSimilar(String word, int numSug, IndexReader ir,
 305       String field, boolean morePopular) throws IOException {
 306     return suggestSimilar(word, numSug, ir, field, morePopular, accuracy);
 307   }
 308
 309
 310   /**
 311    * Suggest similar words (optionally restricted to a field of an index).
 312    *
 313    * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
 314    * is not the same as the edit distance strategy used to calculate the best
 315    * matching spell-checked word from the hits that Lucene found, one usually has
 316    * to retrieve a couple of numSug's in order to get the true best match.
 317    *
 318    * <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
 319    * Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
 320    *
 321    * @param word the word you want a spell check done on
 322    * @param numSug the number of suggested words
 323    * @param ir the indexReader of the user index (can be null see field param)
 324    * @param field the field of the user index: if field is not null, the suggested
 325    * words are restricted to the words present in this field.
 326    * @param morePopular return only the suggest words that are as frequent or more frequent than the searched word
 327    * (only if restricted mode = (indexReader!=null and field!=null)
 328    * @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
 329    * @throws IOException if the underlying index throws an {@link IOException}
 330    * @throws AlreadyClosedException if the Spellchecker is already closed
 331    * @return String[] the sorted list of the suggest words with these 2 criteria:
 332    * first criteria: the edit distance, second criteria (only if restricted mode): the popularity
 333    * of the suggest words in the field of the user index
 334    */
 335   public String[] suggestSimilar(String word, int numSug, IndexReader ir,
 336       String field, boolean morePopular, float accuracy) throws IOException {
 337     // obtainSearcher calls ensureOpen
 338     final IndexSearcher indexSearcher = obtainSearcher();
 339     try{
 340
 341       final int lengthWord = word.length();
 342
 343       final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
 344       final int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
 345       // if the word exists in the real index and we don't care for word frequency, return the word itself
 346       if (!morePopular && freq > 0) {
 347         return new String[] { word };
 348       }
 349
 350       BooleanQuery query = new BooleanQuery();
 351       String[] grams;
 352       String key;
 353
 354       for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) {
 355
 356         key = "gram" + ng; // form key
 357
 358         grams = formGrams(word, ng); // form word into ngrams (allow dups too)
 359
 360         if (grams.length == 0) {
 361           continue; // hmm
 362         }
 363
 364         if (bStart > 0) { // should we boost prefixes?
 365           add(query, "start" + ng, grams[0], bStart); // matches start of word
 366
 367         }
 368         if (bEnd > 0) { // should we boost suffixes
 369           add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word
 370
 371         }
 372         for (int i = 0; i < grams.length; i++) {
 373           add(query, key, grams[i]);
 374         }
 375       }
 376
 377       int maxHits = 10 * numSug;
 378
 379   //    System.out.println("Q: " + query);
 380       ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs;
 381   //    System.out.println("HITS: " + hits.length());
 382       SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);
 383
 384       // go thru more than 'maxr' matches in case the distance filter triggers
 385       int stop = Math.min(hits.length, maxHits);
 386       SuggestWord sugWord = new SuggestWord();
 387       for (int i = 0; i < stop; i++) {
 388
 389         sugWord.string = indexSearcher.doc(hits[i].doc).get(F_WORD); // get orig word
 390
 391         // don't suggest a word for itself, that would be silly
 392         if (sugWord.string.equals(word)) {
 393           continue;
 394         }
 395
 396         // edit distance
 397         sugWord.score = sd.getDistance(word,sugWord.string);
 398         if (sugWord.score < accuracy) {
 399           continue;
 400         }
 401
 402         if (ir != null && field != null) { // use the user index
 403           sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
 404           // don't suggest a word that is not present in the field
 405           if ((morePopular && goalFreq > sugWord.freq) || sugWord.freq < 1) {
 406             continue;
 407           }
 408         }
 409         sugQueue.insertWithOverflow(sugWord);
 410         if (sugQueue.size() == numSug) {
 411           // if queue full, maintain the minScore score
 412           accuracy = sugQueue.top().score;
 413         }
 414         sugWord = new SuggestWord();
 415       }
 416
 417       // convert to array string
 418       String[] list = new String[sugQueue.size()];
 419       for (int i = sugQueue.size() - 1; i >= 0; i--) {
 420         list[i] = sugQueue.pop().string;
 421       }
 422
 423       return list;
 424     } finally {
 425       releaseSearcher(indexSearcher);
 426     }
 427   }
 428   /**
 429    * Add a clause to a boolean query.
 430    */
 431   private static void add(BooleanQuery q, String name, String value, float boost) {
 432     Query tq = new TermQuery(new Term(name, value));
 433     tq.setBoost(boost);
 434     q.add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
 435   }
 436
 437   /**
 438    * Add a clause to a boolean query.
 439    */
 440   private static void add(BooleanQuery q, String name, String value) {
 441     q.add(new BooleanClause(new TermQuery(new Term(name, value)), BooleanClause.Occur.SHOULD));
 442   }
 443
 444   /**
 445    * Form all ngrams for a given word.
 446    * @param text the word to parse
 447    * @param ng the ngram length e.g. 3
 448    * @return an array of all ngrams in the word and note that duplicates are not removed
 449    */
 450   private static String[] formGrams(String text, int ng) {
 451     int len = text.length();
 452     String[] res = new String[len - ng + 1];
 453     for (int i = 0; i < len - ng + 1; i++) {
 454       res[i] = text.substring(i, i + ng);
 455     }
 456     return res;
 457   }
 458
 459   /**
 460    * Removes all terms from the spell check index.
 461    * @throws IOException
 462    * @throws AlreadyClosedException if the Spellchecker is already closed
 463    */
 464   public void clearIndex() throws IOException {
 465     synchronized (modifyCurrentIndexLock) {
 466       ensureOpen();
 467       final Directory dir = this.spellIndex;
 468       final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
 469           Version.LUCENE_CURRENT,
 470           new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
 471           .setOpenMode(OpenMode.CREATE));
 472       writer.close();
 473       swapSearcher(dir);
 474     }
 475   }
 476
 477   /**
 478    * Check whether the word exists in the index.
 479    * @param word
 480    * @throws IOException
 481    * @throws AlreadyClosedException if the Spellchecker is already closed
 482    * @return true if the word exists in the index
 483    */
 484   public boolean exist(String word) throws IOException {
 485     // obtainSearcher calls ensureOpen
 486     final IndexSearcher indexSearcher = obtainSearcher();
 487     try{
 488       return indexSearcher.docFreq(F_WORD_TERM.createTerm(word)) > 0;
 489     } finally {
 490       releaseSearcher(indexSearcher);
 491     }
 492   }
 493
 494   /**
 495    * Indexes the data from the given {@link Dictionary}.
 496    * @param dict Dictionary to index
 497    * @param mergeFactor mergeFactor to use when indexing
 498    * @param ramMB the max amount or memory in MB to use
 499    * @param optimize whether or not the spellcheck index should be optimized
 500    * @throws AlreadyClosedException if the Spellchecker is already closed
 501    * @throws IOException
 502    */
 503   public final void indexDictionary(Dictionary dict, int mergeFactor, int ramMB, boolean optimize) throws IOException {
 504     synchronized (modifyCurrentIndexLock) {
 505       ensureOpen();
 506       final Directory dir = this.spellIndex;
 507       final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
 508       ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setMaxMergeAtOnce(mergeFactor);
 509       IndexSearcher indexSearcher = obtainSearcher();
 510       final List<IndexReader> readers = new ArrayList<IndexReader>();
 511
 512       if (searcher.maxDoc() > 0) {
 513         ReaderUtil.gatherSubReaders(readers, searcher.getIndexReader());
 514       }
 515
 516       boolean isEmpty = readers.isEmpty();
 517
 518       try {
 519         Iterator<String> iter = dict.getWordsIterator();
 520
 521         terms: while (iter.hasNext()) {
 522           String word = iter.next();
 523
 524           int len = word.length();
 525           if (len < 3) {
 526             continue; // too short we bail but "too long" is fine...
 527           }
 528
 529           if (!isEmpty) {
 530             // we have a non-empty index, check if the term exists
 531             Term term = F_WORD_TERM.createTerm(word);
 532             for (IndexReader ir : readers) {
 533               if (ir.docFreq(term) > 0) {
 534                 continue terms;
 535               }
 536             }
 537           }
 538
 539           // ok index the word
 540           Document doc = createDocument(word, getMin(len), getMax(len));
 541           writer.addDocument(doc);
 542         }
 543       } finally {
 544         releaseSearcher(indexSearcher);
 545       }
 546       // close writer
 547       if (optimize)
 548         writer.optimize();
 549       writer.close();
 550       // also re-open the spell index to see our own changes when the next suggestion
 551       // is fetched:
 552       swapSearcher(dir);
 553     }
 554   }
 555
 556   /**
 557    * Indexes the data from the given {@link Dictionary}.
 558    * @param dict the dictionary to index
 559    * @param mergeFactor mergeFactor to use when indexing
 560    * @param ramMB the max amount or memory in MB to use
 561    * @throws IOException
 562    */
 563   public final void indexDictionary(Dictionary dict, int mergeFactor, int ramMB) throws IOException {
 564     indexDictionary(dict, mergeFactor, ramMB, true);
 565   }
 566
 567   /**
 568    * Indexes the data from the given {@link Dictionary}.
 569    * @param dict the dictionary to index
 570    * @throws IOException
 571    */
 572   public final void indexDictionary(Dictionary dict) throws IOException {
 573     indexDictionary(dict, 300, (int)IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
 574   }
 575
 576   private static int getMin(int l) {
 577     if (l > 5) {
 578       return 3;
 579     }
 580     if (l == 5) {
 581       return 2;
 582     }
 583     return 1;
 584   }
 585
 586   private static int getMax(int l) {
 587     if (l > 5) {
 588       return 4;
 589     }
 590     if (l == 5) {
 591       return 3;
 592     }
 593     return 2;
 594   }
 595
 596   private static Document createDocument(String text, int ng1, int ng2) {
 597     Document doc = new Document();
 598     // the word field is never queried on... its indexed so it can be quickly
 599     // checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos
 600     Field f = new Field(F_WORD, text, Field.Store.YES, Field.Index.NOT_ANALYZED);
 601     f.setIndexOptions(IndexOptions.DOCS_ONLY);
 602     f.setOmitNorms(true);
 603     doc.add(f); // orig term
 604     addGram(text, doc, ng1, ng2);
 605     return doc;
 606   }
 607
 608   private static void addGram(String text, Document doc, int ng1, int ng2) {
 609     int len = text.length();
 610     for (int ng = ng1; ng <= ng2; ng++) {
 611       String key = "gram" + ng;
 612       String end = null;
 613       for (int i = 0; i < len - ng + 1; i++) {
 614         String gram = text.substring(i, i + ng);
 615         Field ngramField = new Field(key, gram, Field.Store.NO, Field.Index.NOT_ANALYZED);
 616         // spellchecker does not use positional queries, but we want freqs
 617         // for scoring these multivalued n-gram fields.
 618         ngramField.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
 619         doc.add(ngramField);
 620         if (i == 0) {
 621           // only one term possible in the startXXField, TF/pos and norms aren't needed.
 622           Field startField = new Field("start" + ng, gram, Field.Store.NO, Field.Index.NOT_ANALYZED);
 623           startField.setIndexOptions(IndexOptions.DOCS_ONLY);
 624           startField.setOmitNorms(true);
 625           doc.add(startField);
 626         }
 627         end = gram;
 628       }
 629       if (end != null) { // may not be present if len==ng1
 630         // only one term possible in the endXXField, TF/pos and norms aren't needed.
 631         Field endField = new Field("end" + ng, end, Field.Store.NO, Field.Index.NOT_ANALYZED);
 632         endField.setIndexOptions(IndexOptions.DOCS_ONLY);
 633         endField.setOmitNorms(true);
 634         doc.add(endField);
 635       }
 636     }
 637   }
 638
 639   private IndexSearcher obtainSearcher() {
 640     synchronized (searcherLock) {
 641       ensureOpen();
 642       searcher.getIndexReader().incRef();
 643       return searcher;
 644     }
 645   }
 646
 647   private void releaseSearcher(final IndexSearcher aSearcher) throws IOException{
 648       // don't check if open - always decRef
 649       // don't decrement the private searcher - could have been swapped
 650       aSearcher.getIndexReader().decRef();
 651   }
 652
 653   private void ensureOpen() {
 654     if (closed) {
 655       throw new AlreadyClosedException("Spellchecker has been closed");
 656     }
 657   }
 658
 659   /**
 660    * Close the IndexSearcher used by this SpellChecker
 661    * @throws IOException if the close operation causes an {@link IOException}
 662    * @throws AlreadyClosedException if the {@link SpellChecker} is already closed
 663    */
 664   public void close() throws IOException {
 665     synchronized (searcherLock) {
 666       ensureOpen();
 667       closed = true;
 668       if (searcher != null) {
 669         searcher.close();
 670       }
 671       searcher = null;
 672     }
 673   }
 674
 675   private void swapSearcher(final Directory dir) throws IOException {
 676     /*
 677      * opening a searcher is possibly very expensive.
 678      * We rather close it again if the Spellchecker was closed during
 679      * this operation than block access to the current searcher while opening.
 680      */
 681     final IndexSearcher indexSearcher = createSearcher(dir);
 682     synchronized (searcherLock) {
 683       if(closed){
 684         indexSearcher.close();
 685         throw new AlreadyClosedException("Spellchecker has been closed");
 686       }
 687       if (searcher != null) {
 688         searcher.close();
 689       }
 690       // set the spellindex in the sync block - ensure consistency.
 691       searcher = indexSearcher;
 692       this.spellIndex = dir;
 693     }
 694   }
 695
 696   /**
 697    * Creates a new read-only IndexSearcher
 698    * @param dir the directory used to open the searcher
 699    * @return a new read-only IndexSearcher
 700    * @throws IOException f there is a low-level IO error
 701    */
 702   // for testing purposes
 703   IndexSearcher createSearcher(final Directory dir) throws IOException{
 704     return new IndexSearcher(dir, true);
 705   }
 706
 707   /**
 708    * Returns <code>true</code> if and only if the {@link SpellChecker} is
 709    * closed, otherwise <code>false</code>.
 710    *
 711    * @return <code>true</code> if and only if the {@link SpellChecker} is
 712    *         closed, otherwise <code>false</code>.
 713    */
 714   boolean isClosed(){
 715     return closed;
 716   }
 717
 718 }