remove dupes in hints generated by pdcounter
[wolnelektury.git] / apps / search / index.py
index 6883978..a0bf715 100644 (file)
@@ -18,7 +18,7 @@ from lucene import SimpleFSDirectory, NIOFSDirectory, IndexWriter, IndexReader,
     # KeywordAnalyzer
 
 # Initialize jvm
-JVM = initVM(CLASSPATH)
+JVM = initVM(CLASSPATH, maxheap=settings.JVM_MAXHEAP)
 
 import sys
 import os
@@ -346,7 +346,8 @@ class Index(BaseIndex):
         book_doc = self.create_book_doc(book)
         meta_fields = self.extract_metadata(book, book_info, dc_only=['source_name', 'authors', 'title'])
         # let's not index it - it's only used for extracting publish date
-        del meta_fields['source_name']
+        if 'source_name' in meta_fields:
+            del meta_fields['source_name']
         
         for f in meta_fields.values():
             if isinstance(f, list) or isinstance(f, tuple):
@@ -1066,7 +1067,7 @@ class Search(IndexStore):
         return toks
 
     @staticmethod
-    def fuzziness(self, fuzzy):
+    def fuzziness(fuzzy):
         """Helper method to sanitize fuzziness"""
         if not fuzzy:
             return None
@@ -1104,7 +1105,7 @@ class Search(IndexStore):
         return phrase
 
     @staticmethod
-    def make_term_query(self, tokens, field='content', modal=BooleanClause.Occur.SHOULD, fuzzy=False):
+    def make_term_query(tokens, field='content', modal=BooleanClause.Occur.SHOULD, fuzzy=False):
         """
         Returns term queries joined by boolean query.
         modal - applies to boolean query
@@ -1408,8 +1409,9 @@ class Search(IndexStore):
                 else:
                     tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id"))
                     # don't add the pdcounter tag if same tag already exists
-                if not (is_pdcounter and filter(lambda t: tag.slug == t.slug, tags)):
-                    tags.append(tag)
+
+                tags.append(tag)
+
             except catalogue.models.Tag.DoesNotExist: pass
             except PDCounterAuthor.DoesNotExist: pass
             except PDCounterBook.DoesNotExist: pass