From 154870f0416b6b387637d6035c96321410512e95 Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Thu, 14 Mar 2019 15:17:21 +0100 Subject: [PATCH] More Py3 compatibility fixes. --- README.md | 7 +- src/lesmianator/models.py | 2 +- src/search/custom.py | 111 +++++++++---------------- src/search/index.py | 53 ++++++------ src/search/templatetags/search_tags.py | 12 ++- 5 files changed, 76 insertions(+), 109 deletions(-) diff --git a/README.md b/README.md index 4127035eb..45c7e4e23 100644 --- a/README.md +++ b/README.md @@ -23,20 +23,19 @@ License Dependencies ============ - * Python 2.7 + * Python 3.4+ * All packages listed in requirements.txt * Sass>=3.2 How to deploy (development version) ============= -1. Checkout the source code from Github and enter the directory +1. Checkout the source code from Git and enter the directory 2. Install libraries (we recommend using pip): pip install -r requirements/requirements.txt - git submodule update --init -3. Setup your local configuration based on settings.py. You need to generate a new SECRET_KEY, database stuff and domain related stuff. +3. Setup your local configuration in src/wolnelektury/localsettings.py. You need to generate a new SECRET_KEY, database stuff and domain related stuff. 4. Populate database: ./manage.py migrate diff --git a/src/lesmianator/models.py b/src/lesmianator/models.py index 0595810cc..0a4c0591b 100644 --- a/src/lesmianator/models.py +++ b/src/lesmianator/models.py @@ -31,7 +31,7 @@ class Poem(models.Model): view_count = models.IntegerField(_('view count'), default=1) try: - f = open(settings.LESMIANATOR_PICKLE) + f = open(settings.LESMIANATOR_PICKLE, 'rb') global_dictionary = pickle.load(f) f.close() except (IOError, AttributeError, PickleError): diff --git a/src/search/custom.py b/src/search/custom.py index a94468769..8f7b893b5 100644 --- a/src/search/custom.py +++ b/src/search/custom.py @@ -1,45 +1,12 @@ -# -*- coding: utf-8 -*- # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -from scorched import connection -from lxml import etree +import re from urllib.parse import urlencode import warnings -from scorched import search -import copy from httplib2 import socket -import re - - -class TermVectorOptions(search.Options): - def __init__(self, schema, original=None): - self.schema = schema - if original is None: - self.fields = set() - self.positions = False - else: - self.fields = copy.copy(original.fields) - self.positions = copy.copy(original.positions) - - def update(self, positions=False, fields=None): - if fields is None: - fields = [] - if isinstance(fields, (str, bytes)): - fields = [fields] - self.schema.check_fields(fields, {"stored": True}) - self.fields.update(fields) - self.positions = positions - - def options(self): - opts = {} - if self.positions or self.fields: - opts['tv'] = 'true' - if self.positions: - opts['tv.positions'] = 'true' - if self.fields: - opts['tv.fl'] = ','.join(sorted(self.fields)) - return opts +from lxml import etree +from scorched import connection, exc, search class CustomSolrConnection(connection.SolrConnection): @@ -61,47 +28,42 @@ class CustomSolrConnection(connection.SolrConnection): ) else: kwargs = dict(method="GET") - r, c = self.request(url, **kwargs) - if r.status != 200: - raise connection.SolrError(r, c) - return c - - -# monkey patching scorched SolrSearch -search.SolrSearch.option_modules += ('term_vectorer',) - - -def __term_vector(self, positions=False, fields=None): - newself = self.clone() - newself.term_vectorer.update(positions, fields) - return newself -setattr(search.SolrSearch, 'term_vector', __term_vector) - - -def __patched__init_common_modules(self): - __original__init_common_modules(self) - self.term_vectorer = TermVectorOptions(self.schema) -__original__init_common_modules = search.SolrSearch._init_common_modules -setattr(search.SolrSearch, '_init_common_modules', __patched__init_common_modules) + response = self.request(url=url, **kwargs) + if response.status_code != 200: + raise exc.SolrError(response) + return response.content class CustomSolrInterface(connection.SolrInterface): # just copied from parent and SolrConnection -> CustomSolrConnection - def __init__(self, url, schemadoc=None, http_connection=None, mode='', retry_timeout=-1, - max_length_get_url=connection.MAX_LENGTH_GET_URL): - self.conn = CustomSolrConnection(url, http_connection, retry_timeout, max_length_get_url) - self.schemadoc = schemadoc - if 'w' not in mode: - self.writeable = False - elif 'r' not in mode: - self.readable = False - try: - self.init_schema() - except socket.error as e: - raise socket.error("Cannot connect to Solr server, and search indexing is enabled (%s)" % str(e)) + def __init__(self, url, http_connection=None, mode='', + retry_timeout=-1, max_length_get_url=connection.MAX_LENGTH_GET_URL, + search_timeout=()): + """ + :param url: url to Solr + :type url: str + :param http_connection: optional -- already existing connection + :type http_connection: requests connection + :param mode: optional -- mode (readable, writable) Solr + :type mode: str + :param retry_timeout: optional -- timeout until retry + :type retry_timeout: int + :param max_length_get_url: optional -- max length until switch to post + :type max_length_get_url: int + :param search_timeout: (optional) How long to wait for the server to + send data before giving up, as a float, or a + (connect timeout, read timeout) tuple. + :type search_timeout: float or tuple + """ + + self.conn = CustomSolrConnection( + url, http_connection, mode, retry_timeout, max_length_get_url) + self.schema = self.init_schema() + self._datefields = self._extract_datefields(self.schema) + def _analyze(self, **kwargs): - if not self.readable: + if not self.conn.readable: raise TypeError("This Solr instance is only for writing") args = { 'analysis_showmatch': True @@ -115,7 +77,10 @@ class CustomSolrInterface(connection.SolrInterface): if 'query' in kwargs: args['q'] = kwargs['q'] - params = map(lambda k, v: (k.replace('_', '.'), v), connection.params_from_dict(**args)) + params = [ + (k.replace('_', '.'), v) + for (k, v) in search.params_from_dict(**args) + ] content = self.conn.analyze(params) doc = etree.fromstring(content) @@ -178,7 +143,7 @@ class CustomSolrInterface(connection.SolrInterface): new_matches.append(m) snip = text[start:end] - new_matches.sort(lambda a, b: cmp(b[0], a[0])) + new_matches.sort(key=lambda a: -a[0]) for (s, e) in new_matches: off = -start diff --git a/src/search/index.py b/src/search/index.py index f943a4d1d..a712b0702 100644 --- a/src/search/index.py +++ b/src/search/index.py @@ -1,29 +1,29 @@ -# -*- coding: utf-8 -*- # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -from django.conf import settings - +from functools import total_ordering +from itertools import chain +import logging +import operator import os import re +from django.conf import settings from librarian import dcparser from librarian.parser import WLDocument from lxml import etree +import scorched import catalogue.models import picture.models from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook -from itertools import chain -import scorched -from . import custom -import operator -import logging from wolnelektury.utils import makedirs +from . import custom log = logging.getLogger('search') + if os.path.isfile(settings.SOLR_STOPWORDS): stopwords = set( - line.decode('utf-8').strip() + line.strip() for line in open(settings.SOLR_STOPWORDS) if not line.startswith('#')) else: stopwords = set() @@ -557,6 +557,7 @@ class Index(SolrIndex): self.index.add(doc) +@total_ordering class SearchResult(object): def __init__(self, doc, how_found=None, query_terms=None): self.boost = 1.0 @@ -653,12 +654,12 @@ class SearchResult(object): # to sections and fragments frags = filter(lambda r: r[self.FRAGMENT] is not None, self._hits) - sect = filter(lambda r: r[self.FRAGMENT] is None, self._hits) + sect = [hit for hit in self._hits if hit[self.FRAGMENT] is None] # sections not covered by fragments - sect = filter(lambda s: 0 == len(filter( + sect = filter(lambda s: 0 == len(list(filter( lambda f: f[self.POSITION][self.POSITION_INDEX] <= s[self.POSITION][self.POSITION_INDEX] < - f[self.POSITION][self.POSITION_INDEX] + f[self.POSITION][self.POSITION_SPAN], frags)), sect) + f[self.POSITION][self.POSITION_INDEX] + f[self.POSITION][self.POSITION_SPAN], frags))), sect) def remove_duplicates(lst, keyfn, compare): els = {} @@ -691,7 +692,7 @@ class SearchResult(object): m.update(s[self.OTHER]) sections[si] = m - hits = sections.values() + hits = list(sections.values()) for f in frags: try: @@ -745,13 +746,13 @@ class SearchResult(object): books[r.book_id] = r return books.values() - def __cmp__(self, other): - c = cmp(self.score, other.score) - if c == 0: - # this is inverted, because earlier date is better - return cmp(other.published_date, self.published_date) - else: - return c + def __lt__(self, other): + return (-self.score, self.published_date, self.book.sort_key_author, self.book.sort_key) > \ + (-other.score, other.published_date, other.book.sort_key_author, other.book.sort_key) + + def __eq__(self, other): + return (self.score, self.published_date, self.book.sort_key_author, self.book.sort_key) == \ + (other.score, other.published_date, other.book.sort_key_author, other.book.sort_key) def __len__(self): return len(self.hits) @@ -766,6 +767,7 @@ class SearchResult(object): return None +@total_ordering class PictureResult(object): def __init__(self, doc, how_found=None, query_terms=None): self.boost = 1.0 @@ -866,8 +868,11 @@ class PictureResult(object): books[r.picture_id] = r return books.values() - def __cmp__(self, other): - return cmp(self.score, other.score) + def __lt__(self, other): + return self.score < other.score + + def __eq__(self, other): + return self.score == other.score class Search(SolrIndex): @@ -975,8 +980,8 @@ class Search(SolrIndex): finally: snippets.close() - # remove verse end markers.. - snips = map(lambda s: s and s.replace("/\n", "\n"), snips) + # remove verse end markers.. + snips = [s.replace("/\n", "\n") if s else s for s in snips] searchresult.snippets = snips diff --git a/src/search/templatetags/search_tags.py b/src/search/templatetags/search_tags.py index 0975c2a28..05c504ce9 100644 --- a/src/search/templatetags/search_tags.py +++ b/src/search/templatetags/search_tags.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -from django import template import re +from django import template register = template.Library() @@ -12,10 +11,9 @@ register = template.Library() def book_searched(context, result): # We don't need hits which lead to sections but do not have # snippets. - hits = filter(lambda idx, h: - result.snippets[idx] is not None or ('fragment' in h and h['themes_hit']), - enumerate(result.hits)) - # print "[tmpl: from %d hits selected %d]" % (len(result.hits), len(hits)) + hits = [(idx, h) + for (idx, h) in enumerate(result.hits) + if result.snippets[idx] is not None or ('fragment' in h and h['themes_hit'])] for (idx, hit) in hits: # currently we generate one snipper per hit though. @@ -34,5 +32,5 @@ def book_searched(context, result): return { 'request': context['request'], 'book': result.book, - 'hits': zip(*hits)[1] if hits else [] + 'hits': list(zip(*hits))[1] if hits else [] } -- 2.20.1