1 # This file is part of Wolne Lektury, licensed under GNU Affero GPLv3 or later.
2 # Copyright © Fundacja Wolne Lektury. See NOTICE for more information.
5 from pickle import dump
7 from django.core.management.base import BaseCommand
8 from django.core.management.color import color_style
9 from django.conf import settings
11 from catalogue.models import Book, Tag
13 # extract text from text file
14 re_text = re.compile(r'\n{3,}(.*?)\n*-----\n', re.S).search
17 class Command(BaseCommand):
18 help = 'Prepare data for Leśmianator.'
20 def add_arguments(self, parser):
22 '-t', '--tags', dest='tags', metavar='SLUG,...',
23 help='Use only books tagged with this tags')
25 '-i', '--include', dest='include', metavar='SLUG,...',
26 help='Include specific books by slug')
28 '-e', '--exclude', dest='exclude', metavar='SLUG,...',
29 help='Exclude specific books by slug')
31 def handle(self, *args, **options):
32 self.style = color_style()
33 verbose = int(options.get('verbosity'))
34 tags = options.get('tags')
35 include = options.get('include')
36 exclude = options.get('exclude')
39 path = settings.LESMIANATOR_PICKLE
40 except AttributeError:
41 print(self.style.ERROR('LESMIANATOR_PICKLE not set in the settings.'))
47 books += list(Book.objects.filter(slug__in=include.split(',')).only('slug', 'txt_file'))
50 books += list(Book.tagged.with_all(Tag.objects.filter(slug__in=tags.split(','))).only('slug', 'txt_file'))
52 books = list(Book.objects.all().only('slug', 'txt_file'))
55 books = [book for book in books if book.slug not in exclude.split(',')]
60 processed = skipped = 0
63 print('Parsing', book.slug)
66 print(self.style.NOTICE('%s has no TXT file' % book.slug))
69 f = open(book.txt_file.path)
72 print(self.style.ERROR("Unknown text format: %s" % book.slug))
78 text = m.group(1).lower()
80 mydict = lesmianator.setdefault(last_word, {})
81 mydict.setdefault(letter, 0)
83 last_word = last_word[-2:] + letter
88 print(self.style.ERROR("No books with TXT files found"))
90 print(self.style.ERROR("No books found"))
94 dump(lesmianator, open(path, 'wb'))
96 print(self.style.ERROR("Couldn't write to $s" % path))
100 print("%d processed, %d skipped" % (processed, skipped))
101 print("Results dumped to %s" % path)