1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
15 # Author: Erik Hatcher
17 # to query the index generated with manindex.py
18 # python mansearch.py <query>
19 # by default, the index is stored in 'pages', which can be overriden with
20 # the MANDEX environment variable
21 # ====================================================================
26 from string import Template
27 from datetime import datetime
28 from getopt import getopt, GetoptError
31 Document, IndexSearcher, SimpleFSDirectory, File, QueryParser, \
32 StandardAnalyzer, initVM, Version
34 if __name__ == '__main__':
38 print sys.argv[0], "[--format=<format string>] [--index=<index dir>] [--stats] <query...>"
39 print "default index is found from MANDEX environment variable"
42 options, args = getopt(sys.argv[1:], '', ['format=', 'index=', 'stats'])
49 indexDir = os.environ.get('MANDEX') or 'pages'
60 class CustomTemplate(Template):
63 template = CustomTemplate(format)
65 fsDir = SimpleFSDirectory(File(indexDir))
66 searcher = IndexSearcher(fsDir, True)
68 analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
69 parser = QueryParser(Version.LUCENE_CURRENT, "keywords", analyzer)
70 parser.setDefaultOperator(QueryParser.Operator.AND)
71 query = parser.parse(' '.join(args))
72 start = datetime.now()
73 scoreDocs = searcher.search(query, 50).scoreDocs
74 duration = datetime.now() - start
76 print >>sys.stderr, "Found %d document(s) (in %s) that matched query '%s':" %(len(scoreDocs), duration, query)
78 for scoreDoc in scoreDocs:
79 doc = searcher.doc(scoreDoc.doc)
80 table = dict((field.name(), field.stringValue())
81 for field in doc.getFields())
82 print template.substitute(table)