From: Radek Czajka Date: Thu, 29 Mar 2012 09:59:12 +0000 (+0200) Subject: Merge branch 'custompdf' X-Git-Url: https://git.mdrn.pl/wolnelektury.git/commitdiff_plain/5eeb9dace6068f83e2b70b5222cfab0c0a5e71eb?hp=655700e4cde69f00f698a06ca18991eef784ccda Merge branch 'custompdf' Conflicts: wolnelektury/settings/__init__.py --- diff --git a/apps/api/handlers.py b/apps/api/handlers.py index 98a57426c..260384d69 100644 --- a/apps/api/handlers.py +++ b/apps/api/handlers.py @@ -83,7 +83,7 @@ class BookMediaHandler(BaseHandler): """ Responsible for representing media in Books. """ model = BookMedia - fields = ['name', 'type', 'url'] + fields = ['name', 'type', 'url', 'artist', 'director'] @classmethod def url(cls, media): @@ -91,6 +91,15 @@ class BookMediaHandler(BaseHandler): return MEDIA_BASE + media.file.url + @classmethod + def artist(cls, media): + return media.get_extra_info_value().get('artist_name', '') + + @classmethod + def director(cls, media): + return media.get_extra_info_value().get('director_name', '') + + class BookDetails(object): """Custom fields used for representing Books.""" @@ -134,7 +143,8 @@ class BookDetailHandler(BaseHandler, BookDetails): """ allowed_methods = ['GET'] fields = ['title', 'parent', 'children'] + Book.formats + [ - 'media', 'url', 'cover'] + book_tag_categories + 'media', 'url', 'cover'] + [ + category_plural[c] for c in book_tag_categories] @piwik_track def read(self, request, book): @@ -155,7 +165,8 @@ class AnonymousBooksHandler(AnonymousBaseHandler, BookDetails): fields = ['author', 'href', 'title', 'url', 'cover'] @piwik_track - def read(self, request, tags, top_level=False): + def read(self, request, tags, top_level=False, + audiobooks=False, daisy=False): """ Lists all books with given tags. :param tags: filtering tags; should be a path of categories @@ -175,17 +186,22 @@ class AnonymousBooksHandler(AnonymousBaseHandler, BookDetails): return books if books else rc.NOT_FOUND else: books = Book.tagged.with_all(tags) - elif top_level: - books = Book.objects.filter(parent=None) else: books = Book.objects.all() + + if top_level: + books = books.filter(parent=None) + if audiobooks: + books = books.filter(media__type='mp3') + if daisy: + books = books.filter(media__type='daisy') if books.exists(): return books else: return rc.NOT_FOUND - def create(self, request, tags, top_level=False): + def create(self, request, *args, **kwargs): return rc.FORBIDDEN @@ -195,7 +211,7 @@ class BooksHandler(BookDetailHandler): fields = ['author', 'href', 'title', 'url'] anonymous = AnonymousBooksHandler - def create(self, request, tags, top_level=False): + def create(self, request, *args, **kwargs): if not request.user.has_perm('catalogue.add_book'): return rc.FORBIDDEN diff --git a/apps/api/templates/api/main.html b/apps/api/templates/api/main.html index 8b6805945..5dd8229f8 100755 --- a/apps/api/templates/api/main.html +++ b/apps/api/templates/api/main.html @@ -37,14 +37,24 @@ The URLs in WolneLektury.pl API are:

diff --git a/apps/api/urls.py b/apps/api/urls.py index 7b96337e8..a22f3b772 100644 --- a/apps/api/urls.py +++ b/apps/api/urls.py @@ -54,8 +54,14 @@ urlpatterns = patterns( fragment_resource, name="api_fragment"), # books by tags - url(r'^(?P(?:(?:[a-z0-9-]+/){2}){0,6})books/$', book_list_resource), - url(r'^(?P(?:(?:[a-z0-9-]+/){2}){0,6})parent_books/$', book_list_resource, {"top_level": True}), + url(r'^(?P(?:(?:[a-z0-9-]+/){2}){0,6})books/$', + book_list_resource, name='api_book_list'), + url(r'^(?P(?:(?:[a-z0-9-]+/){2}){0,6})parent_books/$', + book_list_resource, {"top_level": True}, name='api_parent_book_list'), + url(r'^(?P(?:(?:[a-z0-9-]+/){2}){0,6})audiobooks/$', + book_list_resource, {"audiobooks": True}, name='api_audiobook_list'), + url(r'^(?P(?:(?:[a-z0-9-]+/){2}){0,6})daisy/$', + book_list_resource, {"daisy": True}, name='api_daisy_list'), url(r'^pictures/$', picture_resource), @@ -64,5 +70,5 @@ urlpatterns = patterns( url(r'^(?P(?:(?:[a-z0-9-]+/){2}){1,6})fragments/$', fragment_list_resource), # tags by category - url(r'^(?P[a-z0-9-]+)/$', tag_list_resource), + url(r'^(?P[a-z0-9-]+)/$', tag_list_resource, name='api_tag_list'), ) diff --git a/apps/catalogue/models.py b/apps/catalogue/models.py index 8b8aa0ac6..ba1a5d203 100644 --- a/apps/catalogue/models.py +++ b/apps/catalogue/models.py @@ -1007,9 +1007,11 @@ post_save.connect(_post_save_handler) @django.dispatch.receiver(post_delete, sender=Book) def _remove_book_from_index_handler(sender, instance, **kwargs): """ remove the book from search index, when it is deleted.""" + search.JVM.attachCurrentThread() idx = search.Index() idx.open(timeout=10000) # 10 seconds timeout. try: idx.remove_book(instance) + idx.index_tags() finally: idx.close() diff --git a/apps/catalogue/utils.py b/apps/catalogue/utils.py index 9de4eaa2d..29f40d16b 100644 --- a/apps/catalogue/utils.py +++ b/apps/catalogue/utils.py @@ -74,7 +74,7 @@ class LockFile(object): try: unlink(self.lockname) except OSError as oe: - if oe.errno != oe.EEXIST: + if oe.errno != EEXIST: raise oe self.lock.close() diff --git a/apps/modeltranslation/management/commands/translation2po.py b/apps/modeltranslation/management/commands/translation2po.py new file mode 100644 index 000000000..9cb34deca --- /dev/null +++ b/apps/modeltranslation/management/commands/translation2po.py @@ -0,0 +1,114 @@ + +import os +import sys +import time +from optparse import make_option +from django.conf import settings +from django.core.management.base import BaseCommand +from django.core.management.color import color_style + +import polib +import modeltranslation.models +from modeltranslation.translator import translator, NotRegistered + + +def metadata(language=''): + "get metadata for PO, given language code" + t = time.strftime('%Y-%m-%d %H:%M%z') + + return { + 'Project-Id-Version': '1.0', + 'Report-Msgid-Bugs-To': 'marcin.koziej@nowoczesnapolska.org.pl', + 'POT-Creation-Date': '%s' % t, + 'PO-Revision-Date': '%s' % t, + 'Last-Translator': 'you ', + 'Language-Team': '%s' % dict(settings.LANGUAGES).get(language, language), + 'MIME-Version': '1.0', + 'Content-Type': 'text/plain; charset=utf-8', + 'Content-Transfer-Encoding': '8bit', + } + + +def lang(field_name): + "Get the language code from localized field name" + return field_name.split('_')[-1] + + +def make_po(language=''): + "Create new POFile object for language code" + po = polib.POFile() + po.metadata = metadata(language) + return po + + +class Command(BaseCommand): + option_list = BaseCommand.option_list + ( + make_option('-d', '--directory', help='Specify which directory should hold generated PO files', dest='directory'), + make_option('-l', '--load', help='load locales back to source', action='store_true', dest='load', default=False), + ) + help = 'Export models from app to po files' + args = 'app' + + def get_models(self, app): + r = [] + for mdname in dir(app.models): + if mdname[0] == '_': continue + md = getattr(app.models, mdname) + try: + opts = translator.get_options_for_model(md) + r.append((md, opts)) + except NotRegistered: + continue + return r + + def handle(self, appname, **options): + app = __import__(appname) + if options['load']: + objects = {} + modmod = {} + for md, opts in self.get_models(app): + if not md.__name__ in objects: + objects[md.__name__] = {} + modmod['model'] = md + + for lng in zip(*settings.LANGUAGES)[0]: + pofile = os.path.join(options['directory'], lng, appname + '.po') + po = polib.pofile(pofile) + for entry in po: + loc, pk = entry.occurrences[0] + _appname, modelname, fieldname = loc.split('/') + try: + obj = objects[modelname][pk] + except KeyError: + obj = modmod['model'].objects.get(pk=pk) + objects[modelname][pk] = obj + setattr(obj, fieldname, entry.msgstr) + + for mod, objcs in objects.items(): + for o in objcs.values(): + o.save() + + else: + pofiles = {} + for md, opts in self.get_models(app): + for obj in md.objects.all().order_by('pk'): + for fld in opts.fields: + for locfld in opts.localized_fieldnames[fld]: + cur_lang = lang(locfld) + try: + po = pofiles[cur_lang] + except: + po = make_po(cur_lang) + pofiles[cur_lang] = po + + entry = polib.POEntry( + msgid=getattr(obj, '%s_%s' % (fld, settings.LANGUAGE_CODE)), + msgstr=getattr(obj, locfld), + occurrences=[('%s/%s/%s' % (appname, md.__name__, locfld), obj.id)]) + po.append(entry) + + directory = options['directory'] + for lng, po in pofiles.items(): + try: os.makedirs(os.path.join(directory, lng)) + except OSError: pass + po.save(os.path.join(directory, lng, '%s.po' % appname)) diff --git a/apps/opds/views.py b/apps/opds/views.py index 96a4cae53..e01c17283 100644 --- a/apps/opds/views.py +++ b/apps/opds/views.py @@ -384,7 +384,7 @@ class SearchFeed(AcquisitionFeed): if author: print "narrow to author %s" % author - hint.tags(srch.search_tags(author, filter=srch.term_filter(Term('tag_category', 'author')))) + hint.tags(srch.search_tags(author, filt=srch.term_filter(Term('tag_category', 'author')))) if translator: print "filter by translator %s" % translator @@ -401,7 +401,7 @@ class SearchFeed(AcquisitionFeed): if title: print "hint by book title %s" % title q = srch.make_phrase(srch.get_tokens(title, field='title'), field='title') - hint.books(*srch.search_books(q, filter=flt)) + hint.books(*srch.search_books(q, filt=flt)) toks = srch.get_tokens(query) print "tokens: %s" % toks @@ -428,7 +428,7 @@ class SearchFeed(AcquisitionFeed): srch.make_phrase(srch.get_tokens(q, field=fld), field=fld))) flt = srch.chain_filters(filters) - books = srch.search_books(TermQuery(Term('is_book', 'true')), filter=flt) + books = srch.search_books(TermQuery(Term('is_book', 'true')), filt=flt) return books def get_link(self, query): diff --git a/apps/pdcounter/models.py b/apps/pdcounter/models.py index f4832931f..af88bdb09 100644 --- a/apps/pdcounter/models.py +++ b/apps/pdcounter/models.py @@ -6,7 +6,8 @@ from django.db import models from django.db.models import permalink from django.utils.translation import ugettext as _ from datetime import datetime - +from django.db.models.signals import post_save, post_delete +import search class Author(models.Model): name = models.CharField(_('name'), max_length=50, db_index=True) @@ -85,3 +86,19 @@ class BookStub(models.Model): def pretty_title(self, html_links=False): return ', '.join((self.author, self.title)) + + +def update_index(sender, instance, **kwargs): + print "update pd index %s [update %s]" % (instance, 'created' in kwargs) + search.JVM.attachCurrentThread() + idx = search.Index() + idx.open() + try: + idx.index_tags(instance, remove_only=not 'created' in kwargs) + finally: + idx.close() + +post_delete.connect(update_index, Author) +post_delete.connect(update_index, BookStub) +post_save.connect(update_index, Author) +post_save.connect(update_index, BookStub) diff --git a/apps/search/index.py b/apps/search/index.py index b3e932285..4e71e2500 100644 --- a/apps/search/index.py +++ b/apps/search/index.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- from django.conf import settings -from lucene import SimpleFSDirectory, NIOFSDirectory, IndexWriter, IndexWriterConfig, CheckIndex, \ +from django.dispatch import Signal +from lucene import SimpleFSDirectory, NIOFSDirectory, IndexWriter, IndexReader, IndexWriterConfig, CheckIndex, \ File, Field, Integer, \ NumericField, Version, Document, JavaError, IndexSearcher, \ QueryParser, PerFieldAnalyzerWrapper, \ @@ -30,9 +31,11 @@ import catalogue.models from pdcounter.models import Author as PDCounterAuthor, BookStub as PDCounterBook from multiprocessing.pool import ThreadPool from threading import current_thread +from itertools import chain import atexit import traceback - +import logging +log = logging.getLogger('search') class WLAnalyzer(PerFieldAnalyzerWrapper): def __init__(self): @@ -92,6 +95,9 @@ class IndexStore(object): pass else: raise + def close(self): + self.store.close() + class IndexChecker(IndexStore): def __init__(self): @@ -111,7 +117,7 @@ class Snippets(object): """ SNIPPET_DIR = "snippets" - def __init__(self, book_id): + def __init__(self, book_id, revision=None): try: os.makedirs(os.path.join(settings.SEARCH_INDEX, self.SNIPPET_DIR)) except OSError as exc: @@ -119,15 +125,32 @@ class Snippets(object): pass else: raise self.book_id = book_id + self.revision = revision self.file = None + @property + def path(self): + if self.revision: fn = "%d.%d" % (self.book_id, self.revision) + else: fn = "%d" % self.book_id + + return os.path.join(settings.SEARCH_INDEX, self.SNIPPET_DIR, fn) + def open(self, mode='r'): """ Open the snippet file. Call .close() afterwards. """ if not 'b' in mode: mode += 'b' - self.file = open(os.path.join(settings.SEARCH_INDEX, self.SNIPPET_DIR, str(self.book_id)), mode) + + if 'w' in mode: + if os.path.exists(self.path): + self.revision = 1 + while True: + if not os.path.exists(self.path): + break + self.revision += 1 + + self.file = open(self.path, mode) self.position = 0 return self @@ -156,6 +179,17 @@ class Snippets(object): """Close snippet file""" self.file.close() + def remove(self): + self.revision = None + try: + os.unlink(self.path) + self.revision = 0 + while True: + self.revision += 1 + os.unlink(self.path) + except OSError: + pass + class BaseIndex(IndexStore): """ @@ -185,11 +219,15 @@ class BaseIndex(IndexStore): try: self.index.optimize() except JavaError, je: - print "Error during optimize phase, check index: %s" % je + log.error("Error during optimize phase, check index: %s" % je) self.index.close() self.index = None + index_changed.send_robust(self) + + super(BaseIndex, self).close() + def __enter__(self): self.open() return self @@ -198,6 +236,9 @@ class BaseIndex(IndexStore): self.close() +index_changed = Signal() + + class Index(BaseIndex): """ Class indexing books. @@ -205,40 +246,66 @@ class Index(BaseIndex): def __init__(self, analyzer=None): super(Index, self).__init__(analyzer) - def index_tags(self): + def index_tags(self, *tags, **kw): """ Re-index global tag list. Removes all tags from index, then index them again. Indexed fields include: id, name (with and without polish stems), category """ - q = NumericRangeQuery.newIntRange("tag_id", 0, Integer.MAX_VALUE, True, True) - self.index.deleteDocuments(q) + remove_only = kw.get('remove_only', False) + # first, remove tags from index. + if tags: + q = BooleanQuery() + for tag in tags: + b_id_cat = BooleanQuery() + + q_id = NumericRangeQuery.newIntRange("tag_id", tag.id, tag.id, True, True) + b_id_cat.add(q_id, BooleanClause.Occur.MUST) - for tag in catalogue.models.Tag.objects.exclude(category='set'): - doc = Document() - doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(tag.id))) - doc.add(Field("tag_name", tag.name, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_name_pl", tag.name, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_category", tag.category, Field.Store.NO, Field.Index.NOT_ANALYZED)) - self.index.addDocument(doc) - - for pdtag in PDCounterAuthor.objects.all(): - doc = Document() - doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(pdtag.id))) - doc.add(Field("tag_name", pdtag.name, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_name_pl", pdtag.name, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_category", 'pd_author', Field.Store.YES, Field.Index.NOT_ANALYZED)) - doc.add(Field("is_pdcounter", 'true', Field.Store.YES, Field.Index.NOT_ANALYZED)) - self.index.addDocument(doc) - - for pdtag in PDCounterBook.objects.all(): - doc = Document() - doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(pdtag.id))) - doc.add(Field("tag_name", pdtag.title, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_name_pl", pdtag.title, Field.Store.NO, Field.Index.ANALYZED)) - doc.add(Field("tag_category", 'pd_book', Field.Store.YES, Field.Index.NOT_ANALYZED)) - doc.add(Field("is_pdcounter", 'true', Field.Store.YES, Field.Index.NOT_ANALYZED)) - self.index.addDocument(doc) + if isinstance(tag, PDCounterAuthor): + q_cat = TermQuery(Term('tag_category', 'pd_author')) + elif isinstance(tag, PDCounterBook): + q_cat = TermQuery(Term('tag_category', 'pd_book')) + else: + q_cat = TermQuery(Term('tag_category', tag.category)) + b_id_cat.add(q_cat, BooleanClause.Occur.MUST) + + q.add(b_id_cat, BooleanClause.Occur.SHOULD) + else: # all + q = NumericRangeQuery.newIntRange("tag_id", 0, Integer.MAX_VALUE, True, True) + self.index.deleteDocuments(q) + + if not remove_only: + # then add them [all or just one passed] + if not tags: + tags = chain(catalogue.models.Tag.objects.exclude(category='set'), \ + PDCounterAuthor.objects.all(), \ + PDCounterBook.objects.all()) + + for tag in tags: + if isinstance(tag, PDCounterAuthor): + doc = Document() + doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(tag.id))) + doc.add(Field("tag_name", tag.name, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_name_pl", tag.name, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_category", 'pd_author', Field.Store.YES, Field.Index.NOT_ANALYZED)) + doc.add(Field("is_pdcounter", 'true', Field.Store.YES, Field.Index.NOT_ANALYZED)) + self.index.addDocument(doc) + elif isinstance(tag, PDCounterBook): + doc = Document() + doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(tag.id))) + doc.add(Field("tag_name", tag.title, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_name_pl", tag.title, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_category", 'pd_book', Field.Store.YES, Field.Index.NOT_ANALYZED)) + doc.add(Field("is_pdcounter", 'true', Field.Store.YES, Field.Index.NOT_ANALYZED)) + self.index.addDocument(doc) + else: + doc = Document() + doc.add(NumericField("tag_id", Field.Store.YES, True).setIntValue(int(tag.id))) + doc.add(Field("tag_name", tag.name, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_name_pl", tag.name, Field.Store.NO, Field.Index.ANALYZED)) + doc.add(Field("tag_category", tag.category, Field.Store.NO, Field.Index.NOT_ANALYZED)) + self.index.addDocument(doc) def create_book_doc(self, book): """ @@ -250,12 +317,16 @@ class Index(BaseIndex): doc.add(NumericField("parent_id", Field.Store.YES, True).setIntValue(int(book.parent.id))) return doc - def remove_book(self, book): + def remove_book(self, book, remove_snippets=True): """Removes a book from search index. book - Book instance.""" q = NumericRangeQuery.newIntRange("book_id", book.id, book.id, True, True) self.index.deleteDocuments(q) + if remove_snippets: + snippets = Snippets(book.id) + snippets.remove() + def index_book(self, book, book_info=None, overwrite=True): """ Indexes the book. @@ -263,7 +334,9 @@ class Index(BaseIndex): and calls self.index_content() to index the contents of the book. """ if overwrite: - self.remove_book(book) + # we don't remove snippets, since they might be still needed by + # threads using not reopened index + self.remove_book(book, remove_snippets=False) book_doc = self.create_book_doc(book) meta_fields = self.extract_metadata(book, book_info) @@ -426,6 +499,8 @@ class Index(BaseIndex): snip_pos = snippets.add(fields["content"]) doc.add(NumericField("snippets_position", Field.Store.YES, True).setIntValue(snip_pos[0])) doc.add(NumericField("snippets_length", Field.Store.YES, True).setIntValue(snip_pos[1])) + if snippets.revision: + doc.add(NumericField("snippets_revision", Field.Store.YES, True).setIntValue(snippets.revision)) if 'fragment_anchor' in fields: doc.add(Field("fragment_anchor", fields['fragment_anchor'], @@ -547,7 +622,7 @@ def log_exception_wrapper(f): try: f(*a) except Exception, e: - print("Error in indexing thread: %s" % e) + log.error("Error in indexing thread: %s" % e) traceback.print_exc() raise e return _wrap @@ -567,7 +642,6 @@ class ReusableIndex(Index): if ReusableIndex.index: self.index = ReusableIndex.index else: - print("opening index") Index.open(self, analyzer, **kw) ReusableIndex.index = self.index atexit.register(ReusableIndex.close_reusable) @@ -579,11 +653,12 @@ class ReusableIndex(Index): @staticmethod def close_reusable(): if ReusableIndex.index: - print("closing index") ReusableIndex.index.optimize() ReusableIndex.index.close() ReusableIndex.index = None + index_changed.send_robust(None) + def close(self): if ReusableIndex.index: ReusableIndex.index.commit() @@ -687,6 +762,8 @@ class SearchResult(object): return self def get_book(self): + if hasattr(self, '_book'): + return self._book return catalogue.models.Book.objects.get(id=self.book_id) book = property(get_book) @@ -705,7 +782,10 @@ class SearchResult(object): # to sections and fragments frags = filter(lambda r: r[FRAGMENT] is not None, self._hits) + sect = filter(lambda r: r[FRAGMENT] is None, self._hits) + + # sections not covered by fragments sect = filter(lambda s: 0 == len(filter( lambda f: s[POSITION][POSITION_INDEX] >= f[POSITION][POSITION_INDEX] and s[POSITION][POSITION_INDEX] < f[POSITION][POSITION_INDEX] + f[POSITION][POSITION_SPAN], @@ -713,15 +793,20 @@ class SearchResult(object): hits = [] - # remove duplicate fragments - fragments = {} - for f in frags: - fid = f[FRAGMENT] - if fid in fragments: - if fragments[fid][SCORE] >= f[SCORE]: - continue - fragments[fid] = f - frags = fragments.values() + def remove_duplicates(lst, keyfn, compare): + els = {} + for e in lst: + eif = keyfn(e) + if eif in els: + if compare(els[eif], e) >= 1: + continue + els[eif] = e + return els.values() + + # remove fragments with duplicated fid's and duplicated snippets + frags = remove_duplicates(frags, lambda f: f[FRAGMENT], lambda a, b: cmp(a[SCORE], b[SCORE])) + frags = remove_duplicates(frags, lambda f: f[OTHER]['snippets'] and f[OTHER]['snippets'][0] or f[FRAGMENT], + lambda a, b: cmp(a[SCORE], b[SCORE])) # remove duplicate sections sections = {} @@ -786,7 +871,6 @@ class SearchResult(object): for r in rl: if r.book_id in books: books[r.book_id].merge(r) - #print(u"already have one with score %f, and this one has score %f" % (books[book.id][0], found.score)) else: books[r.book_id] = r return books.values() @@ -903,12 +987,31 @@ class Search(IndexStore): IndexStore.__init__(self) self.analyzer = WLAnalyzer() # PolishAnalyzer(Version.LUCENE_34) # self.analyzer = WLAnalyzer() - self.searcher = IndexSearcher(self.store, True) + reader = IndexReader.open(self.store, True) + self.searcher = IndexSearcher(reader) self.parser = QueryParser(Version.LUCENE_34, default_field, self.analyzer) self.parent_filter = TermsFilter() self.parent_filter.addTerm(Term("is_book", "true")) + index_changed.connect(self.reopen) + + def close(self): + reader = self.searcher.getIndexReader() + self.searcher.close() + reader.close() + super(Search, self).close() + index_changed.disconnect(self.reopen) + + def reopen(self, **unused): + reader = self.searcher.getIndexReader() + rdr = reader.reopen() + if not rdr.equals(reader): + log.debug('Reopening index') + oldsearch = self.searcher + self.searcher = IndexSearcher(rdr) + oldsearch.close() + reader.close() def query(self, query): """Parse query in default Lucene Syntax. (for humans) @@ -973,7 +1076,6 @@ class Search(IndexStore): fuzzterms = [] while True: - # print("fuzz %s" % unicode(fuzzterm.term()).encode('utf-8')) ft = fuzzterm.term() if ft: fuzzterms.append(ft) @@ -1144,7 +1246,6 @@ class Search(IndexStore): topDocs = self.searcher.search(q, only_in, max_results) for found in topDocs.scoreDocs: books.append(SearchResult(self, found, how_found='search_everywhere_themesXcontent', searched=searched)) - print "* %s theme x content: %s" % (searched, books[-1]._hits) # query themes/content x author/title/tags q = BooleanQuery() @@ -1163,7 +1264,6 @@ class Search(IndexStore): topDocs = self.searcher.search(q, only_in, max_results) for found in topDocs.scoreDocs: books.append(SearchResult(self, found, how_found='search_everywhere', searched=searched)) - print "* %s scatter search: %s" % (searched, books[-1]._hits) return books @@ -1222,9 +1322,19 @@ class Search(IndexStore): length = stored.get('snippets_length') if position is None or length is None: return None + revision = stored.get('snippets_revision') + if revision: revision = int(revision) + # locate content. book_id = int(stored.get('book_id')) - snippets = Snippets(book_id).open() + snippets = Snippets(book_id, revision=revision) + + try: + snippets.open() + except IOError, e: + log.error("Cannot open snippet file for book id = %d [rev=%d], %s" % (book_id, revision, e)) + return [] + try: try: text = snippets.get((int(position), @@ -1261,45 +1371,52 @@ class Search(IndexStore): if terms: return JArray('object')(terms, Term) - def search_tags(self, query, filters=None, max_results=40, pdcounter=False): + def search_tags(self, query, filt=None, max_results=40, pdcounter=False): """ Search for Tag objects using query. """ if not pdcounter: - filters = self.chain_filters([filter, self.term_filter(Term('is_pdcounter', 'true'), inverse=True)]) - tops = self.searcher.search(query, filters, max_results) + filters = self.chain_filters([filt, self.term_filter(Term('is_pdcounter', 'true'), inverse=True)]) + tops = self.searcher.search(query, filt, max_results) tags = [] for found in tops.scoreDocs: doc = self.searcher.doc(found.doc) is_pdcounter = doc.get('is_pdcounter') category = doc.get('tag_category') - if is_pdcounter == 'true': - if category == 'pd_author': - tag = PDCounterAuthor.objects.get(id=doc.get('tag_id')) - elif category == 'pd_book': - tag = PDCounterBook.objects.get(id=doc.get('tag_id')) - tag.category = 'pd_book' # make it look more lik a tag. + try: + if is_pdcounter == 'true': + if category == 'pd_author': + tag = PDCounterAuthor.objects.get(id=doc.get('tag_id')) + elif category == 'pd_book': + tag = PDCounterBook.objects.get(id=doc.get('tag_id')) + tag.category = 'pd_book' # make it look more lik a tag. + else: + print "Warning. cannot get pdcounter tag_id=%d from db; cat=%s" % (int(doc.get('tag_id')), category) else: - print "Warning. cannot get pdcounter tag_id=%d from db; cat=%s" % (int(doc.get('tag_id')), category) - else: - tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id")) - # don't add the pdcounter tag if same tag already exists - if not (is_pdcounter and filter(lambda t: tag.slug == t.slug, tags)): - tags.append(tag) - # print "%s (%d) -> %f" % (tag, tag.id, found.score) - print 'returning %s' % tags + tag = catalogue.models.Tag.objects.get(id=doc.get("tag_id")) + # don't add the pdcounter tag if same tag already exists + if not (is_pdcounter and filter(lambda t: tag.slug == t.slug, tags)): + tags.append(tag) + except catalogue.models.Tag.DoesNotExist: pass + except PDCounterAuthor.DoesNotExist: pass + except PDCounterBook.DoesNotExist: pass + + log.debug('search_tags: %s' % tags) + return tags - def search_books(self, query, filter=None, max_results=10): + def search_books(self, query, filt=None, max_results=10): """ Searches for Book objects using query """ bks = [] - tops = self.searcher.search(query, filter, max_results) + tops = self.searcher.search(query, filt, max_results) for found in tops.scoreDocs: doc = self.searcher.doc(found.doc) - bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id"))) + try: + bks.append(catalogue.models.Book.objects.get(id=doc.get("book_id"))) + except catalogue.models.Book.DoesNotExist: pass return bks def make_prefix_phrase(self, toks, field): diff --git a/apps/search/views.py b/apps/search/views.py index 2945c42ad..dcd80b29f 100644 --- a/apps/search/views.py +++ b/apps/search/views.py @@ -54,6 +54,7 @@ def did_you_mean(query, tokens): JVM.attachCurrentThread() search = Search() + def hint(request): prefix = request.GET.get('term', '') if len(prefix) < 2: @@ -77,7 +78,7 @@ def hint(request): def category_name(c): if c.startswith('pd_'): - c=c[len('pd_'):] + c = c[len('pd_'):] return _(c) return JSONResponse( @@ -101,109 +102,117 @@ def main(request): query = None fuzzy = False #0.8 - if 'q' in request.GET: - # tags = request.GET.get('tags', '') - query = request.GET['q'] - # book_id = request.GET.get('book', None) - # book = None - # if book_id is not None: - # book = get_object_or_404(Book, id=book_id) - - # hint = search.hint() - # try: - # tag_list = Tag.get_tag_list(tags) - # except: - # tag_list = [] - - if len(query) < 2: - return render_to_response('catalogue/search_too_short.html', {'prefix': query}, - context_instance=RequestContext(request)) - - # hint.tags(tag_list) - # if book: - # hint.books(book) - tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy) - tags = split_tags(tags) - - toks = StringReader(query) - tokens_cache = {} - - author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache) - title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache) - - # Boost main author/title results with mixed search, and save some of its results for end of list. - # boost author, title results - author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache) - author_title_rest = [] - for b in author_title_mixed: - bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results) - for b2 in bks: - b2.boost *= 1.1 - if bks is []: - author_title_rest.append(b) - - # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere, - # Because the query is using only one field. - text_phrase = SearchResult.aggregate( - search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4), - search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False)) - - everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache) - - def already_found(results): - def f(e): - for r in results: - if e.book_id == r.book_id: - e.boost = 0.9 - results.append(e) - return True - return False - return f - f = already_found(author_results + title_results + text_phrase) - everywhere = filter(lambda x: not f(x), everywhere) - - author_results = SearchResult.aggregate(author_results) - title_results = SearchResult.aggregate(title_results) - - everywhere = SearchResult.aggregate(everywhere, author_title_rest) - - for res in [author_results, title_results, text_phrase, everywhere]: - res.sort(reverse=True) - for r in res: - for h in r.hits: - h['snippets'] = map(lambda s: - re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"", - re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets']) - - suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE")) - print "dym? %s" % repr(suggestion).encode('utf-8') - - results = author_results + title_results + text_phrase + everywhere - results.sort(reverse=True) - - if len(results) == 1: - fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits) - if len(fragment_hits) == 1: - #anchor = fragment_hits[0]['fragment'] - #frag = Fragment.objects.get(anchor=anchor) - return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url()) - return HttpResponseRedirect(results[0].book.get_absolute_url()) - elif len(results) == 0: - form = PublishingSuggestForm(initial={"books": query + ", "}) - return render_to_response('catalogue/search_no_hits.html', - {'tags': tags, - 'prefix': query, - "form": form, - 'did_you_mean': suggestion}, - context_instance=RequestContext(request)) - - print "TAGS: %s" % tags - return render_to_response('catalogue/search_multiple_hits.html', + query = request.GET.get('q','') + # book_id = request.GET.get('book', None) + # book = None + # if book_id is not None: + # book = get_object_or_404(Book, id=book_id) + + # hint = search.hint() + # try: + # tag_list = Tag.get_tag_list(tags) + # except: + # tag_list = [] + + if len(query) < 2: + return render_to_response('catalogue/search_too_short.html', {'prefix': query}, + context_instance=RequestContext(request)) + + # hint.tags(tag_list) + # if book: + # hint.books(book) + tags = search.hint_tags(query, pdcounter=True, prefix=False, fuzzy=fuzzy) + tags = split_tags(tags) + + toks = StringReader(query) + tokens_cache = {} + + author_results = search.search_phrase(toks, 'authors', fuzzy=fuzzy, tokens_cache=tokens_cache) + title_results = search.search_phrase(toks, 'title', fuzzy=fuzzy, tokens_cache=tokens_cache) + + # Boost main author/title results with mixed search, and save some of its results for end of list. + # boost author, title results + author_title_mixed = search.search_some(toks, ['authors', 'title', 'tags'], fuzzy=fuzzy, tokens_cache=tokens_cache) + author_title_rest = [] + for b in author_title_mixed: + bks = filter(lambda ba: ba.book_id == b.book_id, author_results + title_results) + for b2 in bks: + b2.boost *= 1.1 + if bks is []: + author_title_rest.append(b) + + # Do a phrase search but a term search as well - this can give us better snippets then search_everywhere, + # Because the query is using only one field. + text_phrase = SearchResult.aggregate( + search.search_phrase(toks, 'content', fuzzy=fuzzy, tokens_cache=tokens_cache, snippets=True, book=False, slop=4), + search.search_some(toks, ['content'], tokens_cache=tokens_cache, snippets=True, book=False)) + + everywhere = search.search_everywhere(toks, fuzzy=fuzzy, tokens_cache=tokens_cache) + + def already_found(results): + def f(e): + for r in results: + if e.book_id == r.book_id: + e.boost = 0.9 + results.append(e) + return True + return False + return f + f = already_found(author_results + title_results + text_phrase) + everywhere = filter(lambda x: not f(x), everywhere) + + author_results = SearchResult.aggregate(author_results) + title_results = SearchResult.aggregate(title_results) + + everywhere = SearchResult.aggregate(everywhere, author_title_rest) + + for res in [author_results, title_results, text_phrase, everywhere]: + res.sort(reverse=True) + for r in res: + for h in r.hits: + h['snippets'] = map(lambda s: + re.subn(r"(^[ \t\n]+|[ \t\n]+$)", u"", + re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", s)[0])[0], h['snippets']) + + suggestion = did_you_mean(query, search.get_tokens(toks, field="SIMPLE")) + + def ensure_exists(r): + try: + return r.book + except Book.DoesNotExist: + return False + + author_results = filter(ensure_exists, author_results) + title_results = filter(ensure_exists, title_results) + text_phrase = filter(ensure_exists, text_phrase) + everywhere = filter(ensure_exists, everywhere) + + results = author_results + title_results + text_phrase + everywhere + # ensure books do exists & sort them + results.sort(reverse=True) + + if len(results) == 1: + fragment_hits = filter(lambda h: 'fragment' in h, results[0].hits) + if len(fragment_hits) == 1: + #anchor = fragment_hits[0]['fragment'] + #frag = Fragment.objects.get(anchor=anchor) + return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url()) + return HttpResponseRedirect(results[0].book.get_absolute_url()) + elif len(results) == 0: + form = PublishingSuggestForm(initial={"books": query + ", "}) + return render_to_response('catalogue/search_no_hits.html', {'tags': tags, 'prefix': query, - 'results': { 'author': author_results, - 'title': title_results, - 'content': text_phrase, - 'other': everywhere}, + "form": form, 'did_you_mean': suggestion}, context_instance=RequestContext(request)) + + return render_to_response('catalogue/search_multiple_hits.html', + {'tags': tags, + 'prefix': query, + 'results': { 'author': author_results, + 'title': title_results, + 'content': text_phrase, + 'other': everywhere}, + 'did_you_mean': suggestion}, + context_instance=RequestContext(request)) diff --git a/apps/suggest/forms.py b/apps/suggest/forms.py index a81579b7a..1a5554336 100644 --- a/apps/suggest/forms.py +++ b/apps/suggest/forms.py @@ -42,14 +42,15 @@ Kontakt: %(contact)s }, fail_silently=True) if email_re.match(contact): - send_mail(u'[WolneLektury] ' + _(u'Thank you for your suggestion.'), - _(u"""\ + send_mail(u'[WolneLektury] ' + + ugettext(u'Thank you for your suggestion.'), + ugettext(u"""\ Thank you for your comment on WolneLektury.pl. The suggestion has been referred to the project coordinator.""") + u""" -- -""" + _(u'''Message sent automatically. Please do not reply.'''), +""" + ugettext(u'''Message sent automatically. Please do not reply.'''), 'no-reply@wolnelektury.pl', [contact], fail_silently=True) @@ -60,7 +61,7 @@ class PublishingSuggestForm(forms.Form): def clean(self, *args, **kwargs): if not self.cleaned_data['books'] and not self.cleaned_data['audiobooks']: - msg = _(u"One of these fields is required.") + msg = ugettext(u"One of these fields is required.") self._errors["books"] = self.error_class([msg]) self._errors["audiobooks"] = self.error_class([msg]) return super(PublishingSuggestForm, self).clean(*args, **kwargs) diff --git a/apps/wolnelektury_core/management/__init__.py b/apps/wolnelektury_core/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/apps/wolnelektury_core/management/commands/__init__.py b/apps/wolnelektury_core/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/apps/wolnelektury_core/management/commands/localepack.py b/apps/wolnelektury_core/management/commands/localepack.py new file mode 100644 index 000000000..6133762f3 --- /dev/null +++ b/apps/wolnelektury_core/management/commands/localepack.py @@ -0,0 +1,148 @@ + +from optparse import make_option +from django.conf import settings +from django.core.management.base import BaseCommand +from django.core.management.color import color_style +from django.core.management import call_command + +import os +import shutil +import tempfile + +import allauth + +ROOT = os.path.dirname(settings.PROJECT_DIR) + + +def is_our_app(mod): + return mod.__path__[0].startswith(ROOT) + + +class Locale(object): + def save(self, output_directory, languages): + pass + + def generate(self, languages): + pass + + +class AppLocale(Locale): + def __init__(self, appmod): + self.app = appmod + if not os.path.exists(os.path.join(self.path, 'locale')): + raise LookupError('No locale for app %s' % appmod) + + @property + def path(self): + return self.app.__path__[0] + + @property + def name(self): + return self.app.__name__ + + def save(self, output_directory, languages): + for lc in languages: + lc = lc[0] + if os.path.exists(os.path.join(self.path, 'locale', lc)): + shutil.copy2(os.path.join(self.path, 'locale', lc, 'LC_MESSAGES', 'django.po'), + os.path.join(output_directory, lc, self.name + '.po')) + + def load(self, input_directory, languages): + for lc in zip(*languages)[0]: + shutil.copy2(os.path.join(input_directory, lc, self.name + '.po'), + os.path.join(self.path, 'locale', lc, 'LC_MESSAGES', 'django.po')) + + def generate(self, languages): + os.chdir(self.path) + print "in %s" % os.getcwd() + try: + call_command('makemessages', all=True) + except: + pass + + +class ModelTranslation(Locale): + def __init__(self, appname): + self.appname = appname + + def save(self, output_directory, languages): + call_command('translation2po', self.appname, directory=output_directory) + + def load(self, input_directory, languages): + call_command('translation2po', self.appname, directory=input_directory, load=True) + + +class CustomLocale(Locale): + def __init__(self, app_dir, + config=os.path.join(ROOT, "babel.cfg"), + out_file=os.path.join(ROOT, 'wolnelektury/locale-contrib/django.pot'), + name=None): + self.app_dir = app_dir + self.config = config + self.out_file = out_file + self.name = name + + def generate(self, languages): + os.system('pybabel extract -F "%s" -o "%s" "%s"' % (self.config, self.out_file, self.app_dir)) + os.system('pybabel update -D django -i %s -d %s' % (self.out_file, os.path.dirname(self.out_file))) + + def po_file(self, language): + d = os.path.dirname(self.out_file) + n = os.path.basename(self.out_file).split('.')[0] + return os.path.join(d, language, 'LC_MESSAGES', n + '.po') + + def save(self, output_directory, languages): + for lc in zip(*languages)[0]: + if os.path.exists(self.po_file(lc)): + shutil.copy2(self.po_file(lc), + os.path.join(output_directory, lc, self.name + '.po')) + + def load(self, input_directory, languages): + for lc in zip(*languages)[0]: + shutil.copy2(os.path.join(input_directory, lc, self.name + '.po'), + self.po_file(lc)) + os.system('pybabel compile -D django -d %s' % os.dirname(self.out_file)) + + +SOURCES = [] + +for appn in settings.INSTALLED_APPS: + app = __import__(appn) + if is_our_app(app): + try: + SOURCES.append(AppLocale(app)) + except LookupError, e: + print "no locales in %s" % app + +SOURCES.append(ModelTranslation('infopages')) +SOURCES.append(CustomLocale(os.path.dirname(allauth.__file__), name='contrib')) + + +class Command(BaseCommand): + option_list = BaseCommand.option_list + ( + make_option('-l', '--load', help='load locales back to source', action='store_true', dest='load', default=False), + make_option('-o', '--outfile', help='Resulting zip file', dest='outfile', default='./wl-locale.zip'), + ) + help = 'Make a locale pack' + args = '' + + def handle(self, *a, **options): + tmp_dir = tempfile.mkdtemp('-wl-locale') + out_dir = os.path.join(tmp_dir, 'wl-locale') + os.mkdir(out_dir) + + try: + for lang in settings.LANGUAGES: + os.mkdir(os.path.join(out_dir, lang[0])) + + for src in SOURCES: + src.generate(settings.LANGUAGES) + src.save(out_dir, settings.LANGUAGES) + # src.save(settings.LANGUAGES) + + packname = options.get('outfile') + packname_b = os.path.basename(packname).split('.')[0] + fmt = '.'.join(os.path.basename(packname).split('.')[1:]) + shutil.make_archive(packname_b, fmt, root_dir=os.path.dirname(out_dir), base_dir=os.path.basename(out_dir)) + finally: + shutil.rmtree(tmp_dir, ignore_errors=True) diff --git a/requirements-dev.txt b/requirements-dev.txt index aec394f4b..59084374e 100755 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1 +1,3 @@ django-debug-toolbar +polib +BabelDjango diff --git a/wolnelektury/settings/auth.py b/wolnelektury/settings/auth.py index 136db060d..21210c074 100644 --- a/wolnelektury/settings/auth.py +++ b/wolnelektury/settings/auth.py @@ -6,3 +6,5 @@ EMAIL_CONFIRMATION_DAYS = 2 LOGIN_URL = '/uzytkownik/login/' LOGIN_REDIRECT_URL = '/' + +SOCIALACCOUNT_AUTO_SIGNUP = False \ No newline at end of file diff --git a/wolnelektury/settings/cache.py b/wolnelektury/settings/cache.py index cc9024063..61d3bb5c0 100644 --- a/wolnelektury/settings/cache.py +++ b/wolnelektury/settings/cache.py @@ -23,4 +23,3 @@ CACHES = { }, } CACHE_MIDDLEWARE_ANONYMOUS_ONLY=True -SEARCH_INDEX = path.join(PROJECT_DIR, '../search_index/') diff --git a/wolnelektury/settings/static.py b/wolnelektury/settings/static.py index 797433dcd..188400dc9 100644 --- a/wolnelektury/settings/static.py +++ b/wolnelektury/settings/static.py @@ -5,6 +5,7 @@ from settings.paths import PROJECT_DIR # Example: "/home/media/media.lawrence.com/" MEDIA_ROOT = path.join(PROJECT_DIR, '../media/') STATIC_ROOT = path.join(PROJECT_DIR, 'static/') +SEARCH_INDEX = path.join(PROJECT_DIR, '../search_index/') # URL that handles the media served from MEDIA_ROOT. Make sure to use a # trailing slash if there is a path component (optional in other cases).