lucene-java-3.4.0/lucene/src/java/org/apache/lucene/analysis/WhitespaceTokenizer.java

   1 package org.apache.lucene.analysis;
   2
   3 /**
   4  * Licensed to the Apache Software Foundation (ASF) under one or more
   5  * contributor license agreements.  See the NOTICE file distributed with
   6  * this work for additional information regarding copyright ownership.
   7  * The ASF licenses this file to You under the Apache License, Version 2.0
   8  * (the "License"); you may not use this file except in compliance with
   9  * the License.  You may obtain a copy of the License at
  10  *
  11  *     http://www.apache.org/licenses/LICENSE-2.0
  12  *
  13  * Unless required by applicable law or agreed to in writing, software
  14  * distributed under the License is distributed on an "AS IS" BASIS,
  15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16  * See the License for the specific language governing permissions and
  17  * limitations under the License.
  18  */
  19
  20 import java.io.Reader;
  21
  22 import org.apache.lucene.util.AttributeSource;
  23 import org.apache.lucene.util.Version;
  24
  25 /**
  26  * A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
  27  * Adjacent sequences of non-Whitespace characters form tokens. <a
  28  * name="version"/>
  29  * <p>
  30  * You must specify the required {@link Version} compatibility when creating
  31  * {@link WhitespaceTokenizer}:
  32  * <ul>
  33  * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
  34  * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
  35  * {@link CharTokenizer#normalize(int)} for details.</li>
  36  * </ul>
  37  */
  38 public final class WhitespaceTokenizer extends CharTokenizer {
  39
  40   /**
  41    * Construct a new WhitespaceTokenizer. * @param matchVersion Lucene version
  42    * to match See {@link <a href="#version">above</a>}
  43    *
  44    * @param in
  45    *          the input to split up into tokens
  46    */
  47   public WhitespaceTokenizer(Version matchVersion, Reader in) {
  48     super(matchVersion, in);
  49   }
  50
  51   /**
  52    * Construct a new WhitespaceTokenizer using a given {@link AttributeSource}.
  53    *
  54    * @param matchVersion
  55    *          Lucene version to match See {@link <a href="#version">above</a>}
  56    * @param source
  57    *          the attribute source to use for this {@link Tokenizer}
  58    * @param in
  59    *          the input to split up into tokens
  60    */
  61   public WhitespaceTokenizer(Version matchVersion, AttributeSource source, Reader in) {
  62     super(matchVersion, source, in);
  63   }
  64
  65   /**
  66    * Construct a new WhitespaceTokenizer using a given
  67    * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
  68    *
  69    * @param
  70    *          matchVersion Lucene version to match See
  71    *          {@link <a href="#version">above</a>}
  72    * @param factory
  73    *          the attribute factory to use for this {@link Tokenizer}
  74    * @param in
  75    *          the input to split up into tokens
  76    */
  77   public WhitespaceTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
  78     super(matchVersion, factory, in);
  79   }
  80
  81   /**
  82    * Construct a new WhitespaceTokenizer.
  83    *
  84    * @deprecated use {@link #WhitespaceTokenizer(Version, Reader)} instead. This will
  85    *             be removed in Lucene 4.0.
  86    */
  87   @Deprecated
  88   public WhitespaceTokenizer(Reader in) {
  89     super(in);
  90   }
  91
  92   /**
  93    * Construct a new WhitespaceTokenizer using a given {@link AttributeSource}.
  94    *
  95    * @deprecated use {@link #WhitespaceTokenizer(Version, AttributeSource, Reader)}
  96    *             instead. This will be removed in Lucene 4.0.
  97    */
  98   @Deprecated
  99   public WhitespaceTokenizer(AttributeSource source, Reader in) {
 100     super(source, in);
 101   }
 102
 103   /**
 104    * Construct a new WhitespaceTokenizer using a given
 105    * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
 106    *
 107    * @deprecated use {@link #WhitespaceTokenizer(Version, AttributeSource.AttributeFactory, Reader)}
 108    *             instead. This will be removed in Lucene 4.0.
 109    */
 110   @Deprecated
 111   public WhitespaceTokenizer(AttributeFactory factory, Reader in) {
 112     super(factory, in);
 113   }
 114
 115   /** Collects only characters which do not satisfy
 116    * {@link Character#isWhitespace(int)}.*/
 117   @Override
 118   protected boolean isTokenChar(int c) {
 119     return !Character.isWhitespace(c);
 120   }
 121 }