X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/967eed676fc83d15b26149047f353ac61faa8217..0b83514c0335addda3b76710aec04df93f85479d:/src/search/index.py diff --git a/src/search/index.py b/src/search/index.py index f943a4d1d..9f87b9974 100644 --- a/src/search/index.py +++ b/src/search/index.py @@ -1,29 +1,29 @@ -# -*- coding: utf-8 -*- # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -from django.conf import settings - +from functools import reduce, total_ordering +from itertools import chain +import logging +import operator import os import re +from django.conf import settings from librarian import dcparser from librarian.parser import WLDocument from lxml import etree +import scorched import catalogue.models import picture.models from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook -from itertools import chain -import scorched -from . import custom -import operator -import logging from wolnelektury.utils import makedirs +from . import custom log = logging.getLogger('search') + if os.path.isfile(settings.SOLR_STOPWORDS): stopwords = set( - line.decode('utf-8').strip() + line.strip() for line in open(settings.SOLR_STOPWORDS) if not line.startswith('#')) else: stopwords = set() @@ -557,6 +557,7 @@ class Index(SolrIndex): self.index.add(doc) +@total_ordering class SearchResult(object): def __init__(self, doc, how_found=None, query_terms=None): self.boost = 1.0 @@ -633,7 +634,10 @@ class SearchResult(object): def get_book(self): if self._book is not None: return self._book - self._book = catalogue.models.Book.objects.get(id=self.book_id) + try: + self._book = catalogue.models.Book.objects.get(id=self.book_id) + except catalogue.models.Book.DoesNotExist: + self._book = None return self._book book = property(get_book) @@ -653,12 +657,12 @@ class SearchResult(object): # to sections and fragments frags = filter(lambda r: r[self.FRAGMENT] is not None, self._hits) - sect = filter(lambda r: r[self.FRAGMENT] is None, self._hits) + sect = [hit for hit in self._hits if hit[self.FRAGMENT] is None] # sections not covered by fragments - sect = filter(lambda s: 0 == len(filter( + sect = filter(lambda s: 0 == len(list(filter( lambda f: f[self.POSITION][self.POSITION_INDEX] <= s[self.POSITION][self.POSITION_INDEX] < - f[self.POSITION][self.POSITION_INDEX] + f[self.POSITION][self.POSITION_SPAN], frags)), sect) + f[self.POSITION][self.POSITION_INDEX] + f[self.POSITION][self.POSITION_SPAN], frags))), sect) def remove_duplicates(lst, keyfn, compare): els = {} @@ -691,7 +695,7 @@ class SearchResult(object): m.update(s[self.OTHER]) sections[si] = m - hits = sections.values() + hits = list(sections.values()) for f in frags: try: @@ -712,12 +716,12 @@ class SearchResult(object): break def theme_by_name(n): - th = filter(lambda t: t.name == n, themes) + th = list(filter(lambda t: t.name == n, themes)) if th: return th[0] else: return None - themes_hit = filter(lambda a: a is not None, map(theme_by_name, themes_hit)) + themes_hit = list(filter(lambda a: a is not None, map(theme_by_name, themes_hit))) m = {'score': f[self.SCORE], 'fragment': frag, @@ -745,13 +749,17 @@ class SearchResult(object): books[r.book_id] = r return books.values() - def __cmp__(self, other): - c = cmp(self.score, other.score) - if c == 0: - # this is inverted, because earlier date is better - return cmp(other.published_date, self.published_date) - else: - return c + def get_sort_key(self): + return (-self.score, + self.published_date, + self.book.sort_key_author if self.book else '', + self.book.sort_key if self.book else '') + + def __lt__(self, other): + return self.get_sort_key() > other.get_sort_key() + + def __eq__(self, other): + return self.get_sort_key() == other.get_sort_key() def __len__(self): return len(self.hits) @@ -766,6 +774,7 @@ class SearchResult(object): return None +@total_ordering class PictureResult(object): def __init__(self, doc, how_found=None, query_terms=None): self.boost = 1.0 @@ -866,8 +875,11 @@ class PictureResult(object): books[r.picture_id] = r return books.values() - def __cmp__(self, other): - return cmp(self.score, other.score) + def __lt__(self, other): + return self.score < other.score + + def __eq__(self, other): + return self.score == other.score class Search(SolrIndex): @@ -975,8 +987,8 @@ class Search(SolrIndex): finally: snippets.close() - # remove verse end markers.. - snips = map(lambda s: s and s.replace("/\n", "\n"), snips) + # remove verse end markers.. + snips = [s.replace("/\n", "\n") if s else s for s in snips] searchresult.snippets = snips