X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/ae60b2a3949e96357477cc04f90fd0873cee8a92..91eb62bdcc6b891f12d20b1d8d5d70cbd7b12325:/src/search/management/commands/snippets.py?ds=sidebyside diff --git a/src/search/management/commands/snippets.py b/src/search/management/commands/snippets.py old mode 100755 new mode 100644 index a758317bb..62512c94b --- a/src/search/management/commands/snippets.py +++ b/src/search/management/commands/snippets.py @@ -1,63 +1,23 @@ -# -*- coding: utf-8 -*- # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # -from django.core.management.base import BaseCommand - from glob import glob -from optparse import make_option from os import path -from sys import stdout from django.conf import settings +from django.core.management.base import BaseCommand class Command(BaseCommand): - help = 'Reindex everything.' - args = '' - - option_list = BaseCommand.option_list + ( - make_option('-C', '--check-just-read', action='store_true', dest='check', default=False, - help='Check snippets utf-8'), - make_option('-c', '--check', action='store_true', dest='check2', default=False, - help='Check snippets utf-8 by walking through index'), - ) + help = 'Check snippets.' def handle(self, *args, **opts): - from search.index import Search, Snippets - - if opts['check']: - sfn = glob(settings.SEARCH_INDEX+'snippets/*') - print sfn - for fn in sfn: - print fn - bkid = int(path.basename(fn)) - with open(fn) as f: - cont = f.read() - try: - uc = cont.decode('utf-8') - except UnicodeDecodeError, ude: - print "error in snippets %d" % bkid - if opts['check2']: - s = Search() - reader = s.searcher.getIndexReader() - numdocs = reader.numDocs() - for did in range(numdocs): - doc = reader.document(did) - if doc and doc.get('book_id'): - bkid = int(doc.get('book_id')) - # import pdb; pdb.set_trace() - stdout.write("\r%d / %d" % (did, numdocs)) - stdout.flush() - ss = doc.get('snippet_position') - sl = doc.get('snippet_length') - if ss and sl: - # WTF (nie było zaimportowane) - snips = Snippets(bkid) - try: - txt = snips.get((ss, sl)) - assert len(txt) == sl - except UnicodeDecodeError, ude: - stdout.write("\nerror in snippets %d\n" % bkid) - raise ude - - stdout.write("\ndone.\n") + sfn = glob(settings.SEARCH_INDEX+'snippets/*') + for fn in sfn: + print(fn) + bkid = path.basename(fn) + with open(fn) as f: + cont = f.read() + try: + cont.decode('utf-8') + except UnicodeDecodeError: + print("error in snippets %s" % bkid)