1 # -*- coding: utf-8 -*-
2 # This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
3 # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
6 from cPickle import dump
7 from optparse import make_option
9 from django.core.management.base import BaseCommand
10 from django.core.management.color import color_style
11 from django.conf import settings
13 from catalogue.models import Book, Tag
15 # extract text from text file
16 re_text = re.compile(r'\n{3,}(.*?)\n*-----\n', re.S).search
19 class Command(BaseCommand):
20 option_list = BaseCommand.option_list + (
21 make_option('-t', '--tags', dest='tags', metavar='SLUG,...',
22 help='Use only books tagged with this tags'),
23 make_option('-i', '--include', dest='include', metavar='SLUG,...',
24 help='Include specific books by slug'),
25 make_option('-e', '--exclude', dest='exclude', metavar='SLUG,...',
26 help='Exclude specific books by slug')
28 help = 'Prepare data for Lesmianator.'
30 def handle(self, *args, **options):
31 self.style = color_style()
32 verbose = int(options.get('verbosity'))
33 tags = options.get('tags')
34 include = options.get('include')
35 exclude = options.get('exclude')
38 path = settings.LESMIANATOR_PICKLE
39 except AttributeError:
40 print self.style.ERROR('LESMIANATOR_PICKLE not set in the settings.')
46 books += list(Book.objects.filter(slug__in=include.split(',')).only('slug', 'txt_file'))
49 books += list(Book.tagged.with_all(Tag.objects.filter(slug__in=tags.split(','))).only('slug', 'txt_file'))
51 books = list(Book.objects.all().only('slug', 'txt_file'))
54 books = [book for book in books if book.slug not in exclude.split(',')]
59 processed = skipped = 0
62 print 'Parsing', book.slug
65 print self.style.NOTICE('%s has no TXT file' % book.slug)
68 f = open(book.txt_file.path)
71 print self.style.ERROR("Unknown text format: %s" % book.slug)
77 text = unicode(m.group(1), 'utf-8').lower()
79 mydict = lesmianator.setdefault(last_word, {})
80 mydict.setdefault(letter, 0)
82 last_word = last_word[-2:] + letter
87 print self.style.ERROR("No books with TXT files found")
89 print self.style.ERROR("No books found")
93 dump(lesmianator, open(path, 'w'))
95 print self.style.ERROR("Couldn't write to $s" % path)
98 dump(lesmianator, open(path, 'w'))
100 print "%d processed, %d skipped" % (processed, skipped)
101 print "Results dumped to %s" % path