1 # Licensed to the Apache Software Foundation (ASF) under one or more
2 # contributor license agreements. See the NOTICE file distributed with
3 # this work for additional information regarding copyright ownership.
4 # The ASF licenses this file to You under the Apache License, Version 2.0
5 # (the "License"); you may not use this file except in compliance with
6 # the License. You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
16 content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource
17 content.source.encoding=UTF-8
19 doc.body.tokenized=true
20 docs.file=work/top100k-out/top.fr.wikipedia.words.txt
21 content.source.forever=false
25 -NewAnalyzer(KeywordAnalyzer)
26 -SetProp(docs.file,work/top100k-out/top.fr.wikipedia.words.txt)
28 { "FrenchKeyword" { ReadTokens > : * ResetInputs } : 10
30 -NewAnalyzer(KeywordAnalyzer)
31 -SetProp(docs.file,work/top100k-out/top.de.wikipedia.words.txt)
33 { "GermanKeyword" { ReadTokens > : * ResetInputs } : 10
35 -NewAnalyzer(KeywordAnalyzer)
36 -SetProp(docs.file,work/top100k-out/top.uk.wikipedia.words.txt)
38 { "UkrainianKeyword" { ReadTokens > : * ResetInputs } : 10
40 -NewAnalyzer(KeywordAnalyzer)
41 -SetProp(docs.file,work/top100k-out/top.en.wikipedia.words.txt)
43 { "EnglishKeyword" { ReadTokens > : * ResetInputs } : 10
47 -SetProp(docs.file,work/top100k-out/top.fr.wikipedia.words.txt)
49 { "FrenchJDK" { ReadTokens > : * ResetInputs } : 10
53 -SetProp(docs.file,work/top100k-out/top.de.wikipedia.words.txt)
55 { "GermanJDK" { ReadTokens > : * ResetInputs } : 10
59 -SetProp(docs.file,work/top100k-out/top.uk.wikipedia.words.txt)
61 { "UkrainianJDK" { ReadTokens > : * ResetInputs } : 10
65 -SetProp(docs.file,work/top100k-out/top.en.wikipedia.words.txt)
67 { "EnglishJDK" { ReadTokens > : * ResetInputs } : 10
70 -NewCollationAnalyzer(impl:icu)
71 -SetProp(docs.file,work/top100k-out/top.fr.wikipedia.words.txt)
73 { "FrenchICU" { ReadTokens > : * ResetInputs } : 10
76 -NewCollationAnalyzer(impl:icu)
77 -SetProp(docs.file,work/top100k-out/top.de.wikipedia.words.txt)
79 { "GermanICU" { ReadTokens > : * ResetInputs } : 10
82 -NewCollationAnalyzer(impl:icu)
83 -SetProp(docs.file,work/top100k-out/top.uk.wikipedia.words.txt)
85 { "UkrainianICU" { ReadTokens > : * ResetInputs } : 10
88 -NewCollationAnalyzer(impl:icu)
89 -SetProp(docs.file,work/top100k-out/top.en.wikipedia.words.txt)
91 { "EnglishICU" { ReadTokens > : * ResetInputs } : 10