pylucene 3.5.0-3
[pylucene.git] / lucene-java-3.5.0 / lucene / contrib / analyzers / smartcn / src / resources / org / apache / lucene / analysis / cn / smart / stopwords.txt
diff --git a/lucene-java-3.5.0/lucene/contrib/analyzers/smartcn/src/resources/org/apache/lucene/analysis/cn/smart/stopwords.txt b/lucene-java-3.5.0/lucene/contrib/analyzers/smartcn/src/resources/org/apache/lucene/analysis/cn/smart/stopwords.txt
new file mode 100644 (file)
index 0000000..fb0d71a
--- /dev/null
@@ -0,0 +1,59 @@
+////////// Punctuation tokens to remove ////////////////
+,
+.
+`
+-
+_
+=
+?
+'
+|
+"
+(
+)
+{
+}
+[
+]
+<
+>
+*
+#
+&
+^
+$
+@
+!
+~
+:
+;
++
+/
+\
+《
+》
+—
+-
+,
+。
+、
+:
+;
+!
+?
+“
+”
+)
+(
+【
+】
+[
+]
+●
+// the line below contains an IDEOGRAPHIC SPACE character (Used as a space in Chinese)
+ 
+
+//////////////// English Stop Words ////////////////
+
+//////////////// Chinese Stop Words ////////////////