fnp
/
wolnelektury.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
turniej elektrybałtów!
[wolnelektury.git]
/
apps
/
lesmianator
/
management
/
commands
/
lesmianator.py
diff --git
a/apps/lesmianator/management/commands/lesmianator.py
b/apps/lesmianator/management/commands/lesmianator.py
index
36d7144
..
c021921
100644
(file)
--- a/
apps/lesmianator/management/commands/lesmianator.py
+++ b/
apps/lesmianator/management/commands/lesmianator.py
@@
-2,6
+2,7
@@
# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
+import re
import sys
from cPickle import load, dump
from optparse import make_option
import sys
from cPickle import load, dump
from optparse import make_option
@@
-12,6
+13,9
@@
from django.conf import settings
from catalogue.models import Book, Tag
from catalogue.models import Book, Tag
+# extract text from text file
+re_text = re_text = re.compile(r'\n{3,}(.*?)\n*-----\n', re.S).search
+
class Command(BaseCommand):
option_list = BaseCommand.option_list + (
class Command(BaseCommand):
option_list = BaseCommand.option_list + (
@@
-62,13
+66,17
@@
class Command(BaseCommand):
print self.style.NOTICE('%s has no TXT file' % book.slug)
skipped += 1
continue
print self.style.NOTICE('%s has no TXT file' % book.slug)
skipped += 1
continue
- processed += 1
- last_word = ''
- for number, line in enumerate(book.txt_file):
- if number < 17:
+ with open(book.txt_file.path) as f:
+ m = re_text(f.read())
+ if not m:
+ print self.style.ERROR("Unknown text format: %s" % book.slug)
+ skipped += 1
continue
continue
- line = unicode(line, 'utf-8').lower()
- for letter in line:
+
+ processed += 1
+ last_word = ''
+ text = unicode(m.group(1), 'utf-8').lower()
+ for letter in text:
mydict = lesmianator.setdefault(last_word, {})
myval = mydict.setdefault(letter, 0)
mydict[letter] += 1
mydict = lesmianator.setdefault(last_word, {})
myval = mydict.setdefault(letter, 0)
mydict[letter] += 1
@@
-84,10
+92,10
@@
class Command(BaseCommand):
try:
dump(lesmianator, open(path, 'w'))
except:
try:
dump(lesmianator, open(path, 'w'))
except:
- print self.style.ERROR("Cou
n
ldn't write to $s" % path)
+ print self.style.ERROR("Couldn't write to $s" % path)
return
dump(lesmianator, open(path, 'w'))
if verbose >= 1:
print "%d processed, %d skipped" % (processed, skipped)
return
dump(lesmianator, open(path, 'w'))
if verbose >= 1:
print "%d processed, %d skipped" % (processed, skipped)
- print "Results dumped
d
o %s" % path
+ print "Results dumped
t
o %s" % path