1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
5 from django.core.management.base import BaseCommand
8 from optparse import make_option
10 from sys import stdout
11 from django.conf import settings
13 class Command(BaseCommand):
14 help = 'Reindex everything.'
17 option_list = BaseCommand.option_list + (
18 make_option('-C', '--check-just-read', action='store_true', dest='check', default=False,
19 help='Check snippets utf-8'),
20 make_option('-c', '--check', action='store_true', dest='check2', default=False,
21 help='Check snippets utf-8 by walking through index'),
25 def handle(self, *args, **opts):
26 from catalogue.models import Book
27 from search.index import Search
30 sfn = glob(settings.SEARCH_INDEX+'snippets/*')
34 bkid = int(path.basename(fn))
38 uc = cont.decode('utf-8')
39 except UnicodeDecodeError, ude:
40 print "error in snippets %d" % bkid
43 reader = s.searcher.getIndexReader()
44 numdocs = reader.numDocs()
45 for did in range(numdocs):
46 doc = reader.document(did)
47 if doc and doc.get('book_id'):
48 bkid = int(doc.get('book_id'))
49 #import pdb; pdb.set_trace()
50 stdout.write("\r%d / %d" % (did, numdocs))
52 ss = doc.get('snippet_position')
53 sl = doc.get('snippet_length')
55 snips = Snippets(bkid)
57 txt = snips.get((ss,sl))
59 except UnicodeDecodeError, ude:
60 stdout.write("\nerror in snippets %d\n" % bkid)
63 stdout.write("\ndone.\n")