Trivial updates.
[wolnelektury.git] / src / search / management / commands / snippets.py
index 40310ed..62512c9 100755 (executable)
@@ -1,64 +1,23 @@
-# -*- coding: utf-8 -*-
 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
 #
-from django.core.management.base import BaseCommand
-
 from glob import glob
-from optparse import make_option
 from os import path
-from sys import stdout
 from django.conf import settings
+from django.core.management.base import BaseCommand
 
-class Command(BaseCommand):
-    help = 'Reindex everything.'
-    args = ''
-
-    option_list = BaseCommand.option_list + (
-        make_option('-C', '--check-just-read', action='store_true', dest='check', default=False,
-            help='Check snippets utf-8'),
-        make_option('-c', '--check', action='store_true', dest='check2', default=False,
-            help='Check snippets utf-8 by walking through index'),
-        )
 
+class Command(BaseCommand):
+    help = 'Check snippets.'
 
     def handle(self, *args, **opts):
-        from catalogue.models import Book
-        from search.index import Search
-
-        if opts['check']:
-            sfn = glob(settings.SEARCH_INDEX+'snippets/*')
-            print sfn
-            for fn in sfn:
-                print fn
-                bkid = int(path.basename(fn))
-                with open(fn) as f:
-                    cont = f.read()
-                    try:
-                        uc = cont.decode('utf-8')
-                    except UnicodeDecodeError, ude:
-                        print "error in snippets %d" % bkid
-        if opts['check2']:
-            s = Search()
-            reader = s.searcher.getIndexReader()
-            numdocs = reader.numDocs()
-            for did in range(numdocs):
-                doc = reader.document(did)
-                if doc and doc.get('book_id'):
-                    bkid = int(doc.get('book_id'))
-                    #import pdb; pdb.set_trace()
-                    stdout.write("\r%d / %d" % (did, numdocs))
-                    stdout.flush()
-                    ss  = doc.get('snippet_position')
-                    sl  = doc.get('snippet_length')
-                    if ss and sl:
-                        snips = Snippets(bkid)
-                        try:
-                            txt = snips.get((ss,sl))
-                            assert len(txt) == sl
-                        except UnicodeDecodeError, ude:
-                            stdout.write("\nerror in snippets %d\n" % bkid)
-                            raise ude
-
-            stdout.write("\ndone.\n")
-
+        sfn = glob(settings.SEARCH_INDEX+'snippets/*')
+        for fn in sfn:
+            print(fn)
+            bkid = path.basename(fn)
+            with open(fn) as f:
+                cont = f.read()
+                try:
+                    cont.decode('utf-8')
+                except UnicodeDecodeError:
+                    print("error in snippets %s" % bkid)