X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/80ab7c8128a710975c5d705278f01d74de557ee7..6347e58f5d14e5e43b8d0536ecdb5d69510d8ef1:/apps/search/index.py diff --git a/apps/search/index.py b/apps/search/index.py index 6883978af..a0bf71588 100644 --- a/apps/search/index.py +++ b/apps/search/index.py @@ -18,7 +18,7 @@ from lucene import SimpleFSDirectory, NIOFSDirectory, IndexWriter, IndexReader, # KeywordAnalyzer # Initialize jvm -JVM = initVM(CLASSPATH) +JVM = initVM(CLASSPATH, maxheap=settings.JVM_MAXHEAP) import sys import os @@ -346,7 +346,8 @@ class Index(BaseIndex): book_doc = self.create_book_doc(book) meta_fields = self.extract_metadata(book, book_info, dc_only=['source_name', 'authors', 'title']) # let's not index it - it's only used for extracting publish date - del meta_fields['source_name'] + if 'source_name' in meta_fields: + del meta_fields['source_name'] for f in meta_fields.values(): if isinstance(f, list) or isinstance(f, tuple): @@ -1066,7 +1067,7 @@ class Search(IndexStore): return toks @staticmethod - def fuzziness(self, fuzzy): + def fuzziness(fuzzy): """Helper method to sanitize fuzziness""" if not fuzzy: return None @@ -1104,7 +1105,7 @@ class Search(IndexStore): return phrase @staticmethod - def make_term_query(self, tokens, field='content', modal=BooleanClause.Occur.SHOULD, fuzzy=False): + def make_term_query(tokens, field='content', modal=BooleanClause.Occur.SHOULD, fuzzy=False): """ Returns term queries joined by boolean query. modal - applies to boolean query @@ -1408,8 +1409,9 @@ class Search(IndexStore): else: tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id")) # don't add the pdcounter tag if same tag already exists - if not (is_pdcounter and filter(lambda t: tag.slug == t.slug, tags)): - tags.append(tag) + + tags.append(tag) + except catalogue.models.Tag.DoesNotExist: pass except PDCounterAuthor.DoesNotExist: pass except PDCounterBook.DoesNotExist: pass