2 from django.conf import settings
3 from lucene import SimpleFSDirectory, IndexWriter, File, Field, NumericField, PolishAnalyzer, \
4 Version, Document, JavaError, IndexSearcher, QueryParser, Term
7 from librarian import dcparser
8 from catalogue.models import Book
11 class IndexStore(object):
14 self.store = SimpleFSDirectory(File(settings.SEARCH_INDEX))
16 def make_index_dir(self):
18 os.makedirs(settings.SEARCH_INDEX)
19 except OSError as exc:
20 if exc.errno == errno.EEXIST:
25 class Index(IndexStore):
27 IndexStore.__init__(self)
30 def open(self, analyzer=None):
32 analyzer = PolishAnalyzer(Version.LUCENE_34)
34 raise Exception("Index is already opened")
35 self.index = IndexWriter(self.store, analyzer, IndexWriter.MaxFieldLength.LIMITED)
42 def index_book(self, book, overwrite=True):
43 book_info = dcparser.parse(book.xml_file)
46 self.index.deleteDocuments(Term("id", str(book.id)))
49 doc.add(NumericField("id", Field.Store.YES, True).setIntValue(book.id))
50 doc.add(Field("slug", book.slug, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS))
53 for field in dcparser.BookInfo.FIELDS:
54 if hasattr(book_info, field.name):
55 if not getattr(book_info, field.name):
57 # since no type information is available, we use validator
58 type_indicator = field.validator
59 if type_indicator == dcparser.as_unicode:
60 s = getattr(book_info, field.name)
64 doc.add(Field(field.name, s, Field.Store.NO, Field.Index.ANALYZED))
65 except JavaError as je:
66 raise Exception("failed to add field: %s = '%s', %s(%s)" % (field.name, s, je.message, je.args))
67 elif type_indicator == dcparser.as_person:
68 p = getattr(book_info, field.name)
69 if isinstance(p, dcparser.Person):
72 persons = ', '.join(map(str, p))
73 doc.add(Field(field.name, persons, Field.Store.NO, Field.Index.ANALYZED))
74 elif type_indicator == dcparser.as_date:
75 dt = getattr(book_info, field.name)
76 doc.add(Field(field.name, "%04d%02d%02d" % (dt.year, dt.month, dt.day), Field.Store.NO, Field.Index.NOT_ANALYZED))
78 self.index.addDocument(doc)
84 def __exit__(self, type, value, tb):
88 class Search(IndexStore):
89 def __init__(self, default_field="description"):
90 IndexStore.__init__(self)
91 self.analyzer = PolishAnalyzer(Version.LUCENE_34)
92 self.searcher = IndexSearcher(self.store, True)
93 self.parser = QueryParser(Version.LUCENE_34, default_field, self.analyzer)
95 def query(self, query):
96 return self.parser.parse(query)
98 def search(self, query, max_results=50):
99 """Returns (books, total_hits)
102 tops = self.searcher.search(self.query(query), max_results)
104 for found in tops.scoreDocs:
105 doc = self.searcher.doc(found.doc)
106 bks.append(Book.objects.get(id=doc.get("id")))
107 return (bks, tops.totalHits)