1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
16 WhitespaceAnalyzer, Document, Field, IndexReader, IndexWriter
21 keys = [ "2abc", "3def",
22 "4ghi", "5jkl", "6mno",
23 "7pqrs", "8tuv", "9wxyz"]
30 print "Usage: T9er <WordNet index dir> <t9 index>"
43 reader = IndexReader.open(indexDir)
45 numDocs = reader.maxDoc()
46 print "Processing", numDocs, "words"
48 writer = IndexWriter(t9dir, WhitespaceAnalyzer(), True)
50 for id in xrange(reader.maxDoc()):
51 origDoc = reader.document(id)
52 word = origDoc.get("word")
53 if word is None or len(word) == 0:
57 newDoc.add(Field("word", word,
58 Field.Store.YES, Field.Index.UN_TOKENIZED))
59 newDoc.add(Field("t9", cls.t9(word),
60 Field.Store.YES, Field.Index.UN_TOKENIZED))
61 newDoc.add(Field("length", str(len(word)),
62 Field.Store.NO, Field.Index.UN_TOKENIZED))
63 writer.addDocument(newDoc)
74 return ''.join([cls.keyMap[c] for c in word])
76 main = classmethod(main)