Dependencies
============
- * Python 2.7
+ * Python 3.4+
* All packages listed in requirements.txt
* Sass>=3.2
How to deploy (development version)
=============
-1. Checkout the source code from Github and enter the directory
+1. Checkout the source code from Git and enter the directory
2. Install libraries (we recommend using pip):
pip install -r requirements/requirements.txt
- git submodule update --init
-3. Setup your local configuration based on settings.py. You need to generate a new SECRET_KEY, database stuff and domain related stuff.
+3. Setup your local configuration in src/wolnelektury/localsettings.py. You need to generate a new SECRET_KEY, database stuff and domain related stuff.
4. Populate database:
./manage.py migrate
view_count = models.IntegerField(_('view count'), default=1)
try:
- f = open(settings.LESMIANATOR_PICKLE)
+ f = open(settings.LESMIANATOR_PICKLE, 'rb')
global_dictionary = pickle.load(f)
f.close()
except (IOError, AttributeError, PickleError):
-# -*- coding: utf-8 -*-
# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from scorched import connection
-from lxml import etree
+import re
from urllib.parse import urlencode
import warnings
-from scorched import search
-import copy
from httplib2 import socket
-import re
-
-
-class TermVectorOptions(search.Options):
- def __init__(self, schema, original=None):
- self.schema = schema
- if original is None:
- self.fields = set()
- self.positions = False
- else:
- self.fields = copy.copy(original.fields)
- self.positions = copy.copy(original.positions)
-
- def update(self, positions=False, fields=None):
- if fields is None:
- fields = []
- if isinstance(fields, (str, bytes)):
- fields = [fields]
- self.schema.check_fields(fields, {"stored": True})
- self.fields.update(fields)
- self.positions = positions
-
- def options(self):
- opts = {}
- if self.positions or self.fields:
- opts['tv'] = 'true'
- if self.positions:
- opts['tv.positions'] = 'true'
- if self.fields:
- opts['tv.fl'] = ','.join(sorted(self.fields))
- return opts
+from lxml import etree
+from scorched import connection, exc, search
class CustomSolrConnection(connection.SolrConnection):
)
else:
kwargs = dict(method="GET")
- r, c = self.request(url, **kwargs)
- if r.status != 200:
- raise connection.SolrError(r, c)
- return c
-
-
-# monkey patching scorched SolrSearch
-search.SolrSearch.option_modules += ('term_vectorer',)
-
-
-def __term_vector(self, positions=False, fields=None):
- newself = self.clone()
- newself.term_vectorer.update(positions, fields)
- return newself
-setattr(search.SolrSearch, 'term_vector', __term_vector)
-
-
-def __patched__init_common_modules(self):
- __original__init_common_modules(self)
- self.term_vectorer = TermVectorOptions(self.schema)
-__original__init_common_modules = search.SolrSearch._init_common_modules
-setattr(search.SolrSearch, '_init_common_modules', __patched__init_common_modules)
+ response = self.request(url=url, **kwargs)
+ if response.status_code != 200:
+ raise exc.SolrError(response)
+ return response.content
class CustomSolrInterface(connection.SolrInterface):
# just copied from parent and SolrConnection -> CustomSolrConnection
- def __init__(self, url, schemadoc=None, http_connection=None, mode='', retry_timeout=-1,
- max_length_get_url=connection.MAX_LENGTH_GET_URL):
- self.conn = CustomSolrConnection(url, http_connection, retry_timeout, max_length_get_url)
- self.schemadoc = schemadoc
- if 'w' not in mode:
- self.writeable = False
- elif 'r' not in mode:
- self.readable = False
- try:
- self.init_schema()
- except socket.error as e:
- raise socket.error("Cannot connect to Solr server, and search indexing is enabled (%s)" % str(e))
+ def __init__(self, url, http_connection=None, mode='',
+ retry_timeout=-1, max_length_get_url=connection.MAX_LENGTH_GET_URL,
+ search_timeout=()):
+ """
+ :param url: url to Solr
+ :type url: str
+ :param http_connection: optional -- already existing connection
+ :type http_connection: requests connection
+ :param mode: optional -- mode (readable, writable) Solr
+ :type mode: str
+ :param retry_timeout: optional -- timeout until retry
+ :type retry_timeout: int
+ :param max_length_get_url: optional -- max length until switch to post
+ :type max_length_get_url: int
+ :param search_timeout: (optional) How long to wait for the server to
+ send data before giving up, as a float, or a
+ (connect timeout, read timeout) tuple.
+ :type search_timeout: float or tuple
+ """
+
+ self.conn = CustomSolrConnection(
+ url, http_connection, mode, retry_timeout, max_length_get_url)
+ self.schema = self.init_schema()
+ self._datefields = self._extract_datefields(self.schema)
+
def _analyze(self, **kwargs):
- if not self.readable:
+ if not self.conn.readable:
raise TypeError("This Solr instance is only for writing")
args = {
'analysis_showmatch': True
if 'query' in kwargs:
args['q'] = kwargs['q']
- params = map(lambda k, v: (k.replace('_', '.'), v), connection.params_from_dict(**args))
+ params = [
+ (k.replace('_', '.'), v)
+ for (k, v) in search.params_from_dict(**args)
+ ]
content = self.conn.analyze(params)
doc = etree.fromstring(content)
new_matches.append(m)
snip = text[start:end]
- new_matches.sort(lambda a, b: cmp(b[0], a[0]))
+ new_matches.sort(key=lambda a: -a[0])
for (s, e) in new_matches:
off = -start
-# -*- coding: utf-8 -*-
# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from django.conf import settings
-
+from functools import total_ordering
+from itertools import chain
+import logging
+import operator
import os
import re
+from django.conf import settings
from librarian import dcparser
from librarian.parser import WLDocument
from lxml import etree
+import scorched
import catalogue.models
import picture.models
from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook
-from itertools import chain
-import scorched
-from . import custom
-import operator
-import logging
from wolnelektury.utils import makedirs
+from . import custom
log = logging.getLogger('search')
+
if os.path.isfile(settings.SOLR_STOPWORDS):
stopwords = set(
- line.decode('utf-8').strip()
+ line.strip()
for line in open(settings.SOLR_STOPWORDS) if not line.startswith('#'))
else:
stopwords = set()
self.index.add(doc)
+@total_ordering
class SearchResult(object):
def __init__(self, doc, how_found=None, query_terms=None):
self.boost = 1.0
# to sections and fragments
frags = filter(lambda r: r[self.FRAGMENT] is not None, self._hits)
- sect = filter(lambda r: r[self.FRAGMENT] is None, self._hits)
+ sect = [hit for hit in self._hits if hit[self.FRAGMENT] is None]
# sections not covered by fragments
- sect = filter(lambda s: 0 == len(filter(
+ sect = filter(lambda s: 0 == len(list(filter(
lambda f: f[self.POSITION][self.POSITION_INDEX] <= s[self.POSITION][self.POSITION_INDEX] <
- f[self.POSITION][self.POSITION_INDEX] + f[self.POSITION][self.POSITION_SPAN], frags)), sect)
+ f[self.POSITION][self.POSITION_INDEX] + f[self.POSITION][self.POSITION_SPAN], frags))), sect)
def remove_duplicates(lst, keyfn, compare):
els = {}
m.update(s[self.OTHER])
sections[si] = m
- hits = sections.values()
+ hits = list(sections.values())
for f in frags:
try:
books[r.book_id] = r
return books.values()
- def __cmp__(self, other):
- c = cmp(self.score, other.score)
- if c == 0:
- # this is inverted, because earlier date is better
- return cmp(other.published_date, self.published_date)
- else:
- return c
+ def __lt__(self, other):
+ return (-self.score, self.published_date, self.book.sort_key_author, self.book.sort_key) > \
+ (-other.score, other.published_date, other.book.sort_key_author, other.book.sort_key)
+
+ def __eq__(self, other):
+ return (self.score, self.published_date, self.book.sort_key_author, self.book.sort_key) == \
+ (other.score, other.published_date, other.book.sort_key_author, other.book.sort_key)
def __len__(self):
return len(self.hits)
return None
+@total_ordering
class PictureResult(object):
def __init__(self, doc, how_found=None, query_terms=None):
self.boost = 1.0
books[r.picture_id] = r
return books.values()
- def __cmp__(self, other):
- return cmp(self.score, other.score)
+ def __lt__(self, other):
+ return self.score < other.score
+
+ def __eq__(self, other):
+ return self.score == other.score
class Search(SolrIndex):
finally:
snippets.close()
- # remove verse end markers..
- snips = map(lambda s: s and s.replace("/\n", "\n"), snips)
+ # remove verse end markers..
+ snips = [s.replace("/\n", "\n") if s else s for s in snips]
searchresult.snippets = snips
-# -*- coding: utf-8 -*-
# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from django import template
import re
+from django import template
register = template.Library()
def book_searched(context, result):
# We don't need hits which lead to sections but do not have
# snippets.
- hits = filter(lambda idx, h:
- result.snippets[idx] is not None or ('fragment' in h and h['themes_hit']),
- enumerate(result.hits))
- # print "[tmpl: from %d hits selected %d]" % (len(result.hits), len(hits))
+ hits = [(idx, h)
+ for (idx, h) in enumerate(result.hits)
+ if result.snippets[idx] is not None or ('fragment' in h and h['themes_hit'])]
for (idx, hit) in hits:
# currently we generate one snipper per hit though.
return {
'request': context['request'],
'book': result.book,
- 'hits': zip(*hits)[1] if hits else []
+ 'hits': list(zip(*hits))[1] if hits else []
}