2 from django.conf import settings
 
   3 from lucene import SimpleFSDirectory, IndexWriter, File, Field, NumericField, PolishAnalyzer, \
 
   4     Version, Document, JavaError, IndexSearcher, QueryParser, Term
 
   7 from librarian import dcparser
 
   8 from catalogue.models import Book
 
  11 class IndexStore(object):
 
  14         self.store = SimpleFSDirectory(File(settings.SEARCH_INDEX))
 
  16     def make_index_dir(self):
 
  18             os.makedirs(settings.SEARCH_INDEX)
 
  19         except OSError as exc:
 
  20             if exc.errno == errno.EEXIST:
 
  25 class Index(IndexStore):
 
  27         IndexStore.__init__(self)
 
  30     def open(self, analyzer=None):
 
  32             analyzer = PolishAnalyzer(Version.LUCENE_34)
 
  34             raise Exception("Index is already opened")
 
  35         self.index = IndexWriter(self.store, analyzer, IndexWriter.MaxFieldLength.LIMITED)
 
  42     def index_book(self, book, overwrite=True):
 
  43         book_info = dcparser.parse(book.xml_file)
 
  46             self.index.deleteDocuments(Term("id", str(book.id)))
 
  49         doc.add(NumericField("id", Field.Store.YES, True).setIntValue(book.id))
 
  50         doc.add(Field("slug", book.slug, Field.Store.NO, Field.Index.ANALYZED_NO_NORMS))
 
  53         for field in dcparser.BookInfo.FIELDS:
 
  54             if hasattr(book_info, field.name):
 
  55                 if not getattr(book_info, field.name):
 
  57                 # since no type information is available, we use validator
 
  58                 type_indicator = field.validator
 
  59                 if type_indicator == dcparser.as_unicode:
 
  60                     s = getattr(book_info, field.name)
 
  64                         doc.add(Field(field.name, s, Field.Store.NO, Field.Index.ANALYZED))
 
  65                     except JavaError as je:
 
  66                         raise Exception("failed to add field: %s = '%s', %s(%s)" % (field.name, s, je.message, je.args))
 
  67                 elif type_indicator == dcparser.as_person:
 
  68                     p = getattr(book_info, field.name)
 
  69                     if isinstance(p, dcparser.Person):
 
  72                         persons = ', '.join(map(str, p))
 
  73                     doc.add(Field(field.name, persons, Field.Store.NO, Field.Index.ANALYZED))
 
  74                 elif type_indicator == dcparser.as_date:
 
  75                     dt = getattr(book_info, field.name)
 
  76                     doc.add(Field(field.name, "%04d%02d%02d" % (dt.year, dt.month, dt.day), Field.Store.NO, Field.Index.NOT_ANALYZED))
 
  78         self.index.addDocument(doc)
 
  84     def __exit__(self, type, value, tb):
 
  88 class Search(IndexStore):
 
  89     def __init__(self, default_field="description"):
 
  90         IndexStore.__init__(self)
 
  91         self.analyzer = PolishAnalyzer(Version.LUCENE_34)
 
  92         self.searcher = IndexSearcher(self.store, True)
 
  93         self.parser = QueryParser(Version.LUCENE_34, default_field, self.analyzer)
 
  95     def query(self, query):
 
  96         return self.parser.parse(query)
 
  98     def search(self, query, max_results=50):
 
  99         """Returns (books, total_hits)
 
 102         tops = self.searcher.search(self.query(query), max_results)
 
 104         for found in tops.scoreDocs:
 
 105             doc = self.searcher.doc(found.doc)
 
 106             bks.append(Book.objects.get(id=doc.get("id")))
 
 107         return (bks, tops.totalHits)