-# -*- coding: utf-8 -*-
# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
-from django.core.management.base import BaseCommand
-
from glob import glob
-from optparse import make_option
from os import path
-from sys import stdout
from django.conf import settings
+from django.core.management.base import BaseCommand
-class Command(BaseCommand):
- help = 'Reindex everything.'
- args = ''
-
- option_list = BaseCommand.option_list + (
- make_option('-C', '--check-just-read', action='store_true', dest='check', default=False,
- help='Check snippets utf-8'),
- make_option('-c', '--check', action='store_true', dest='check2', default=False,
- help='Check snippets utf-8 by walking through index'),
- )
+class Command(BaseCommand):
+ help = 'Check snippets.'
def handle(self, *args, **opts):
- from catalogue.models import Book
- from search.index import Search
-
- if opts['check']:
- sfn = glob(settings.SEARCH_INDEX+'snippets/*')
- print sfn
- for fn in sfn:
- print fn
- bkid = int(path.basename(fn))
- with open(fn) as f:
- cont = f.read()
- try:
- uc = cont.decode('utf-8')
- except UnicodeDecodeError, ude:
- print "error in snippets %d" % bkid
- if opts['check2']:
- s = Search()
- reader = s.searcher.getIndexReader()
- numdocs = reader.numDocs()
- for did in range(numdocs):
- doc = reader.document(did)
- if doc and doc.get('book_id'):
- bkid = int(doc.get('book_id'))
- #import pdb; pdb.set_trace()
- stdout.write("\r%d / %d" % (did, numdocs))
- stdout.flush()
- ss = doc.get('snippet_position')
- sl = doc.get('snippet_length')
- if ss and sl:
- snips = Snippets(bkid)
- try:
- txt = snips.get((ss,sl))
- assert len(txt) == sl
- except UnicodeDecodeError, ude:
- stdout.write("\nerror in snippets %d\n" % bkid)
- raise ude
-
- stdout.write("\ndone.\n")
-
+ sfn = glob(settings.SEARCH_INDEX+'snippets/*')
+ for fn in sfn:
+ print(fn)
+ bkid = path.basename(fn)
+ with open(fn) as f:
+ cont = f.read()
+ try:
+ cont.decode('utf-8')
+ except UnicodeDecodeError:
+ print("error in snippets %s" % bkid)