1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
15 from lucene import Document, IndexSearcher, PythonCollector, FieldCache
18 # A Collector extension
21 class BookLinkCollector(PythonCollector):
23 def __init__(self, searcher):
24 super(BookLinkCollector, self).__init__()
26 self.searcher = searcher
29 def acceptsDocsOutOfOrder(self):
33 def setNextReader(self, reader, docBase):
35 self.docBase = docBase
36 self.urls = FieldCache.DEFAULT.getStrings(reader, "url")
37 self.titles = FieldCache.DEFAULT.getStrings(reader, "title2")
39 def collect(self, docID, score):
41 url = self.urls[docID]
42 title = self.titles[docID]
43 self.documents[url] = title
45 print "%s: %s" %(title, score)
49 return self.documents.copy()