--- /dev/null
+package org.apache.lucene.analysis.th;
+
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.StopAnalyzer;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
+
+/**
+ * {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
+ * @version 0.2
+ *
+ * <p><b>NOTE</b>: This class uses the same {@link Version}
+ * dependent settings as {@link StandardAnalyzer}.</p>
+ */
+public final class ThaiAnalyzer extends ReusableAnalyzerBase {
+ private final Version matchVersion;
+
+ public ThaiAnalyzer(Version matchVersion) {
+ this.matchVersion = matchVersion;
+ }
+
+ /**
+ * Creates
+ * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * used to tokenize all the text in the provided {@link Reader}.
+ *
+ * @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
+ * built from a {@link StandardTokenizer} filtered with
+ * {@link StandardFilter}, {@link LowerCaseFilter}, {@link ThaiWordFilter}, and
+ * {@link StopFilter}
+ */
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName,
+ Reader reader) {
+ final Tokenizer source = new StandardTokenizer(matchVersion, reader);
+ TokenStream result = new StandardFilter(matchVersion, source);
+ if (matchVersion.onOrAfter(Version.LUCENE_31))
+ result = new LowerCaseFilter(matchVersion, result);
+ result = new ThaiWordFilter(matchVersion, result);
+ return new TokenStreamComponents(source, new StopFilter(matchVersion,
+ result, StopAnalyzer.ENGLISH_STOP_WORDS_SET));
+ }
+}