# -*- coding: utf-8 -*-
# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
from django.core.management.base import BaseCommand

from glob import glob
from optparse import make_option
from os import path
from sys import stdout
from django.conf import settings


class Command(BaseCommand):
    help = 'Reindex everything.'
    args = ''

    option_list = BaseCommand.option_list + (
        make_option('-C', '--check-just-read', action='store_true', dest='check', default=False,
                    help='Check snippets utf-8'),
        make_option('-c', '--check', action='store_true', dest='check2', default=False,
                    help='Check snippets utf-8 by walking through index'),
        )

    def handle(self, *args, **opts):
        from search.index import Search, Snippets

        if opts['check']:
            sfn = glob(settings.SEARCH_INDEX+'snippets/*')
            print sfn
            for fn in sfn:
                print fn
                bkid = int(path.basename(fn))
                with open(fn) as f:
                    cont = f.read()
                    try:
                        uc = cont.decode('utf-8')
                    except UnicodeDecodeError, ude:
                        print "error in snippets %d" % bkid
        if opts['check2']:
            s = Search()
            reader = s.searcher.getIndexReader()
            numdocs = reader.numDocs()
            for did in range(numdocs):
                doc = reader.document(did)
                if doc and doc.get('book_id'):
                    bkid = int(doc.get('book_id'))
                    # import pdb; pdb.set_trace()
                    stdout.write("\r%d / %d" % (did, numdocs))
                    stdout.flush()
                    ss = doc.get('snippet_position')
                    sl = doc.get('snippet_length')
                    if ss and sl:
                        # WTF (nie było zaimportowane)
                        snips = Snippets(bkid)
                        try:
                            txt = snips.get((ss, sl))
                            assert len(txt) == sl
                        except UnicodeDecodeError, ude:
                            stdout.write("\nerror in snippets %d\n" % bkid)
                            raise ude

            stdout.write("\ndone.\n")
