X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/data.txt?ds=sidebyside diff --git a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/data.txt b/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/data.txt deleted file mode 100644 index 5b8ce5f..0000000 --- a/lucene-java-3.4.0/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/data.txt +++ /dev/null @@ -1,48 +0,0 @@ -# German special characters are replaced: -häufig haufig - -# here the stemmer works okay, it maps related words to the same stem: -abschließen abschliess -abschließender abschliess -abschließendes abschliess -abschließenden abschliess - -Tisch tisch -Tische tisch -Tischen tisch - -Haus hau -Hauses hau -Häuser hau -Häusern hau -# here's a case where overstemming occurs, i.e. a word is -# mapped to the same stem as unrelated words: -hauen hau - -# here's a case where understemming occurs, i.e. two related words -# are not mapped to the same stem. This is the case with basically -# all irregular forms: -Drama drama -Dramen dram - -# replace "ß" with 'ss': -Ausmaß ausmass - -# fake words to test if suffixes are cut off: -xxxxxe xxxxx -xxxxxs xxxxx -xxxxxn xxxxx -xxxxxt xxxxx -xxxxxem xxxxx -xxxxxer xxxxx -xxxxxnd xxxxx -# the suffixes are also removed when combined: -xxxxxetende xxxxx - -# words that are shorter than four charcters are not changed: -xxe xxe -# -em and -er are not removed from words shorter than five characters: -xxem xxem -xxer xxer -# -nd is not removed from words shorter than six characters: -xxxnd xxxnd