1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
18 Document, IndexReader, Term, BooleanQuery, IndexSearcher, TermQuery, \
19 SimpleFSDirectory, File, System, BooleanClause
22 class BooksLikeThis(object):
26 indexDir = System.getProperty("index.dir")
27 directory = SimpleFSDirectory(File(indexDir))
29 reader = IndexReader.open(directory, True)
30 blt = BooksLikeThis(reader)
32 for id in xrange(reader.maxDoc()):
33 if reader.isDeleted(id):
35 doc = reader.document(id)
37 print doc.get("title").encode('utf-8')
39 docs = blt.docsLike(id, doc, 10)
41 print " None like this"
44 print " ->", doc.get("title").encode('utf-8')
46 def __init__(self, reader):
49 self.searcher = IndexSearcher(reader)
51 def docsLike(self, id, doc, max):
53 authors = doc.getValues("author")
54 authorQuery = BooleanQuery()
55 for author in authors:
56 authorQuery.add(TermQuery(Term("author", author)),
57 BooleanClause.Occur.SHOULD)
58 authorQuery.setBoost(2.0)
60 vector = self.reader.getTermFreqVector(id, "subject")
62 subjectQuery = BooleanQuery()
63 for term in vector.getTerms():
64 tq = TermQuery(Term("subject", term))
65 subjectQuery.add(tq, BooleanClause.Occur.SHOULD)
67 likeThisQuery = BooleanQuery()
68 likeThisQuery.add(authorQuery, BooleanClause.Occur.SHOULD)
69 likeThisQuery.add(subjectQuery, BooleanClause.Occur.SHOULD)
72 likeThisQuery.add(TermQuery(Term("isbn", doc.get("isbn"))),
73 BooleanClause.Occur.MUST_NOT)
75 print " Query:", likeThisQuery.toString("contents")
76 scoreDocs = self.searcher.search(likeThisQuery, 50).scoreDocs
79 for scoreDoc in scoreDocs:
80 doc = self.searcher.doc(scoreDoc.doc)
88 main = classmethod(main)