X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/d91551345b68c2bc7d96f2098691fab28276d6b8..f82c33291d3bf97d7447aba236940bdfe560f703:/apps/lesmianator/management/commands/lesmianator.py?ds=sidebyside diff --git a/apps/lesmianator/management/commands/lesmianator.py b/apps/lesmianator/management/commands/lesmianator.py index c0219214d..b2341ab31 100644 --- a/apps/lesmianator/management/commands/lesmianator.py +++ b/apps/lesmianator/management/commands/lesmianator.py @@ -3,8 +3,7 @@ # Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information. # import re -import sys -from cPickle import load, dump +from cPickle import dump from optparse import make_option from django.core.management.base import BaseCommand @@ -14,7 +13,7 @@ from django.conf import settings from catalogue.models import Book, Tag # extract text from text file -re_text = re_text = re.compile(r'\n{3,}(.*?)\n*-----\n', re.S).search +re_text = re.compile(r'\n{3,}(.*?)\n*-----\n', re.S).search class Command(BaseCommand): @@ -66,21 +65,22 @@ class Command(BaseCommand): print self.style.NOTICE('%s has no TXT file' % book.slug) skipped += 1 continue - with open(book.txt_file.path) as f: - m = re_text(f.read()) - if not m: - print self.style.ERROR("Unknown text format: %s" % book.slug) - skipped += 1 - continue - - processed += 1 - last_word = '' - text = unicode(m.group(1), 'utf-8').lower() - for letter in text: - mydict = lesmianator.setdefault(last_word, {}) - myval = mydict.setdefault(letter, 0) - mydict[letter] += 1 - last_word = last_word[-2:] + letter + f = open(book.txt_file.path) + m = re_text(f.read()) + if not m: + print self.style.ERROR("Unknown text format: %s" % book.slug) + skipped += 1 + continue + + processed += 1 + last_word = '' + text = unicode(m.group(1), 'utf-8').lower() + for letter in text: + mydict = lesmianator.setdefault(last_word, {}) + mydict.setdefault(letter, 0) + mydict[letter] += 1 + last_word = last_word[-2:] + letter + f.close() if not processed: if skipped: @@ -98,4 +98,4 @@ class Command(BaseCommand): dump(lesmianator, open(path, 'w')) if verbose >= 1: print "%d processed, %d skipped" % (processed, skipped) - print "Results dumped to %s" % path + print "Results dumped to %s" % path