1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.core.management.base import BaseCommand
8 from optparse import make_option
10 from sys import stdout
11 from django.conf import settings
14 class Command(BaseCommand):
15 help = 'Reindex everything.'
18 option_list = BaseCommand.option_list + (
19 make_option('-C', '--check-just-read', action='store_true', dest='check', default=False,
20 help='Check snippets utf-8'),
21 make_option('-c', '--check', action='store_true', dest='check2', default=False,
22 help='Check snippets utf-8 by walking through index'),
25 def handle(self, *args, **opts):
26 from search.index import Search, Snippets
29 sfn = glob(settings.SEARCH_INDEX+'snippets/*')
33 bkid = int(path.basename(fn))
37 uc = cont.decode('utf-8')
38 except UnicodeDecodeError, ude:
39 print "error in snippets %d" % bkid
42 reader = s.searcher.getIndexReader()
43 numdocs = reader.numDocs()
44 for did in range(numdocs):
45 doc = reader.document(did)
46 if doc and doc.get('book_id'):
47 bkid = int(doc.get('book_id'))
48 # import pdb; pdb.set_trace()
49 stdout.write("\r%d / %d" % (did, numdocs))
51 ss = doc.get('snippet_position')
52 sl = doc.get('snippet_length')
54 # WTF (nie było zaimportowane)
55 snips = Snippets(bkid)
57 txt = snips.get((ss, sl))
59 except UnicodeDecodeError, ude:
60 stdout.write("\nerror in snippets %d\n" % bkid)
63 stdout.write("\ndone.\n")