X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.5.0/lucene/contrib/analyzers/smartcn/src/resources/org/apache/lucene/analysis/cn/smart/stopwords.txt?ds=inline diff --git a/lucene-java-3.5.0/lucene/contrib/analyzers/smartcn/src/resources/org/apache/lucene/analysis/cn/smart/stopwords.txt b/lucene-java-3.5.0/lucene/contrib/analyzers/smartcn/src/resources/org/apache/lucene/analysis/cn/smart/stopwords.txt new file mode 100644 index 0000000..fb0d71a --- /dev/null +++ b/lucene-java-3.5.0/lucene/contrib/analyzers/smartcn/src/resources/org/apache/lucene/analysis/cn/smart/stopwords.txt @@ -0,0 +1,59 @@ +////////// Punctuation tokens to remove //////////////// +, +. +` +- +_ += +? +' +| +" +( +) +{ +} +[ +] +< +> +* +# +& +^ +$ +@ +! +~ +: +; ++ +/ +\ +《 +》 +— +- +, +。 +、 +: +; +! +· +? +“ +” +) +( +【 +】 +[ +] +● +// the line below contains an IDEOGRAPHIC SPACE character (Used as a space in Chinese) +  + +//////////////// English Stop Words //////////////// + +//////////////// Chinese Stop Words ////////////////