Merge branch 'pretty' of github.com:fnp/wolnelektury into pretty

[wolnelektury.git] / apps / search / index.py
diff --git a/apps/search/index.py b/apps/search/index.py

index 7ab3de9..307376d 100644 (file)
--- a/apps/search/index.py
+++ b/apps/search/index.py
@@ -25,6 +25,7 @@ import re
  import errno
  from librarian import dcparser
  from librarian.parser import WLDocument
+from lxml import etree
  import catalogue.models
  from multiprocessing.pool import ThreadPool
  from threading import current_thread
@@ -139,7 +140,6 @@ class Snippets(object):
          self.file.write(txt)
          pos = (self.position, l)
          self.position += l
-        print "SSSS %s - %s" % (pos, txt)
          return pos
  
      def get(self, pos):
@@ -213,7 +213,7 @@ class Index(BaseIndex):
  
          for tag in catalogue.models.Tag.objects.all():
              doc = Document()
-            doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(tag.id))
+            doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(tag.id)))
              doc.add(Field("tag_name", tag.name, Field.Store.NO, Field.Index.ANALYZED))
              doc.add(Field("tag_name_pl", tag.name, Field.Store.NO, Field.Index.ANALYZED))
              doc.add(Field("tag_category", tag.category, Field.Store.NO, Field.Index.NOT_ANALYZED))
@@ -224,9 +224,9 @@ class Index(BaseIndex):
          Create a lucene document referring book id.
          """
          doc = Document()
-        doc.add(NumericField("book_id", Field.Store.YES, True).setIntValue(book.id))
+        doc.add(NumericField("book_id", Field.Store.YES, True).setIntValue(int(book.id)))
          if book.parent is not None:
-            doc.add(NumericField("parent_id", Field.Store.YES, True).setIntValue(book.parent.id))
+            doc.add(NumericField("parent_id", Field.Store.YES, True).setIntValue(int(book.parent.id)))
          return doc
  
      def remove_book(self, book):
@@ -402,6 +402,8 @@ class Index(BaseIndex):
  
                  if header.tag in self.skip_header_tags:
                      continue
+                if header.tag is etree.Comment:
+                    continue
  
                  # section content
                  content = []
@@ -454,7 +456,7 @@ class Index(BaseIndex):
  
                          # in the end, add a section text.
                  doc = add_part(snippets, header_index=position, header_type=header.tag,
-                               content=fix_format(u' '.join(filter(lambda s: s is not None, frag['content']))))
+                               content=fix_format(u' '.join(filter(lambda s: s is not None, content))))
  
                  self.index.addDocument(doc)