basic integrity check,
[wolnelektury.git] / apps / search / management / commands / snippets.py
1 from django.core.management.base import BaseCommand
2
3 from glob import glob
4 from optparse import make_option
5 from os import path
6 from sys import stdout
7 from django.conf import settings
8
9 class Command(BaseCommand):
10     help = 'Reindex everything.'
11     args = ''
12
13     option_list = BaseCommand.option_list + (
14         make_option('-C', '--check-just-read', action='store_true', dest='check', default=False,
15             help='Check snippets utf-8'),
16         make_option('-c', '--check', action='store_true', dest='check2', default=False,
17             help='Check snippets utf-8 by walking through index'),
18         )
19
20
21     def handle(self, *args, **opts):
22         from catalogue.models import Book
23         import search
24
25         if opts['check']:
26             sfn = glob(settings.SEARCH_INDEX+'snippets/*')
27             print sfn
28             for fn in sfn:
29                 print fn
30                 bkid = int(path.basename(fn))
31                 with open(fn) as f:
32                     cont = f.read()
33                     try:
34                         uc = cont.decode('utf-8')
35                     except UnicodeDecodeError, ude:
36                         print "error in snippets %d" % bkid
37         if opts['check2']:
38             s = search.Search()
39             reader = s.searcher.getIndexReader()
40             numdocs = reader.numDocs()
41             for did in range(numdocs):
42                 doc = reader.document(did)
43                 if doc and doc.get('book_id'):
44                     bkid = int(doc.get('book_id'))
45                     #import pdb; pdb.set_trace()
46                     stdout.write("\r%d / %d" % (did, numdocs))
47                     stdout.flush()
48                     ss  = doc.get('snippet_position')
49                     sl  = doc.get('snippet_length')
50                     if ss and sl:
51                         snips = Snippets(bkid)
52                         try:
53                             txt = snips.get((ss,sl))
54                             assert len(txt) == sl
55                         except UnicodeDecodeError, ude:
56                             stdout.write("\nerror in snippets %d\n" % bkid)
57                             raise ude
58
59             stdout.write("\ndone.\n")
60