X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/b2d342589a7889a3b096e7192453d53bd28eed7d..3596cf9db6eabb5f0aa36afe7919bc40e8ff0b9a:/src/search/management/commands/snippets.py?ds=sidebyside diff --git a/src/search/management/commands/snippets.py b/src/search/management/commands/snippets.py deleted file mode 100755 index a758317bb..000000000 --- a/src/search/management/commands/snippets.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- -# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later. -# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. -# -from django.core.management.base import BaseCommand - -from glob import glob -from optparse import make_option -from os import path -from sys import stdout -from django.conf import settings - - -class Command(BaseCommand): - help = 'Reindex everything.' - args = '' - - option_list = BaseCommand.option_list + ( - make_option('-C', '--check-just-read', action='store_true', dest='check', default=False, - help='Check snippets utf-8'), - make_option('-c', '--check', action='store_true', dest='check2', default=False, - help='Check snippets utf-8 by walking through index'), - ) - - def handle(self, *args, **opts): - from search.index import Search, Snippets - - if opts['check']: - sfn = glob(settings.SEARCH_INDEX+'snippets/*') - print sfn - for fn in sfn: - print fn - bkid = int(path.basename(fn)) - with open(fn) as f: - cont = f.read() - try: - uc = cont.decode('utf-8') - except UnicodeDecodeError, ude: - print "error in snippets %d" % bkid - if opts['check2']: - s = Search() - reader = s.searcher.getIndexReader() - numdocs = reader.numDocs() - for did in range(numdocs): - doc = reader.document(did) - if doc and doc.get('book_id'): - bkid = int(doc.get('book_id')) - # import pdb; pdb.set_trace() - stdout.write("\r%d / %d" % (did, numdocs)) - stdout.flush() - ss = doc.get('snippet_position') - sl = doc.get('snippet_length') - if ss and sl: - # WTF (nie było zaimportowane) - snips = Snippets(bkid) - try: - txt = snips.get((ss, sl)) - assert len(txt) == sl - except UnicodeDecodeError, ude: - stdout.write("\nerror in snippets %d\n" % bkid) - raise ude - - stdout.write("\ndone.\n")