From bdabaaf2da173e1778667f69f125925dcb9149df Mon Sep 17 00:00:00 2001 From: Radek Czajka Date: Mon, 6 Jun 2011 12:25:15 +0200 Subject: [PATCH] some chunk management automation --- apps/dvcs/models.py | 2 +- apps/wiki/constants.py | 14 +- apps/wiki/forms.py | 9 ++ apps/wiki/templates/wiki/book_detail.html | 61 ++++++-- apps/wiki/views.py | 79 +++++++++- apps/wiki/xml_tools.py | 183 ++++++++++++++++++---- redakcja/static/css/filelist.css | 21 ++- 7 files changed, 300 insertions(+), 69 deletions(-) diff --git a/apps/dvcs/models.py b/apps/dvcs/models.py index 5ce00c0f..dddbf3ad 100644 --- a/apps/dvcs/models.py +++ b/apps/dvcs/models.py @@ -240,7 +240,7 @@ class Document(models.Model): return rev if rev is not None else -1 def at_revision(self, rev): - if rev: + if rev is not None: return self.change_set.get(revision=rev) else: return self.head diff --git a/apps/wiki/constants.py b/apps/wiki/constants.py index fe0a4462..d75d6b4b 100644 --- a/apps/wiki/constants.py +++ b/apps/wiki/constants.py @@ -1,5 +1,13 @@ # -*- coding: utf-8 -*- -import re -RE_TRIM_BEGIN = re.compile("^$", re.M) -RE_TRIM_END = re.compile("^$", re.M) +TRIM_BEGIN = " TRIM_BEGIN " +TRIM_END = " TRIM_END " + +MASTERS = ['powiesc', + 'opowiadanie', + 'liryka_l', + 'liryka_lp', + 'dramat_wierszowany_l', + 'dramat_wierszowany_lp', + 'dramat_wspolczesny', + ] diff --git a/apps/wiki/forms.py b/apps/wiki/forms.py index 7cabe325..fb0f958a 100644 --- a/apps/wiki/forms.py +++ b/apps/wiki/forms.py @@ -8,6 +8,7 @@ from wiki.models import Book, Chunk from django.utils.translation import ugettext_lazy as _ from dvcs.models import Tag +from wiki.constants import MASTERS class DocumentTagForm(forms.Form): """ @@ -189,3 +190,11 @@ class BookForm(forms.ModelForm): class Meta: model = Book + + +class ChooseMasterForm(forms.Form): + """ + Form used for fixing the chunks in a book. + """ + + master = forms.ChoiceField(choices=((m, m) for m in MASTERS)) diff --git a/apps/wiki/templates/wiki/book_detail.html b/apps/wiki/templates/wiki/book_detail.html index d72befac..26f52145 100755 --- a/apps/wiki/templates/wiki/book_detail.html +++ b/apps/wiki/templates/wiki/book_detail.html @@ -8,27 +8,56 @@ {% for c in chunks %} - + - + - + {% endfor %} + {% if need_fixing %} + + {% endif %}
{{ c.chunk.comment }}{% if c.chunk.publishable %}P{% endif %}{% for fix in c.fix %} + + {% ifequal fix "wl" %}</>{% endifequal %} + + {% ifequal fix "bad-master" %}master{% endifequal %} + + {% ifequal fix "trim-begin" %}{% endifequal %} + + {% ifequal fix "trim-end" %}{% endifequal %} + + {% endfor %} + + {% ifequal c.grade "plain" %} + {% trans "unstructured text" %} + {% endifequal %} + + {% ifequal c.grade "xml" %} + {% trans "unknown XML" %} + {% endifequal %} + + {% ifequal c.grade "wl-broken" %} + {% trans "broken document" %} + {% endifequal %} + + [{% trans "edit" %}]{% if c.bad_master %}{{ c.bad_master }}{% endif %}{% if c.chunk.publishable %}P{% endif %} [+]
+
+ {% if choose_master %} + {{ form.master }} + {% endif %} + +
+

{% trans "Append to other book" %}

diff --git a/apps/wiki/views.py b/apps/wiki/views.py index 146db697..3739c90d 100644 --- a/apps/wiki/views.py +++ b/apps/wiki/views.py @@ -26,7 +26,7 @@ from django.middleware.gzip import GZipMiddleware import librarian.html import librarian.text -from wiki.xml_tools import GradedText +from wiki import xml_tools # # Quick hack around caching problems, TODO: use ETags @@ -399,31 +399,94 @@ def history(request, slug, chunk=None): def book(request, slug): book = get_object_or_404(Book, slug=slug) + # TODO: most of this should go somewhere else + # do we need some automation? - some_wl = False first_master = None chunks = [] + need_fixing = False + choose_master = False - for chunk in book: - graded = GradedText(chunk.materialize()) + length = len(book) + for i, chunk in enumerate(book): chunk_dict = { "chunk": chunk, - "graded": graded, + "fix": [], + "grade": "" } + graded = xml_tools.GradedText(chunk.materialize()) if graded.is_wl(): - some_wl = True master = graded.master() if first_master is None: first_master = master elif master != first_master: - chunk_dict['bad_master'] = master + chunk_dict['fix'].append('bad-master') + + if i > 0 and not graded.has_trim_begin(): + chunk_dict['fix'].append('trim-begin') + if i < length - 1 and not graded.has_trim_end(): + chunk_dict['fix'].append('trim-end') + + if chunk_dict['fix']: + chunk_dict['grade'] = 'wl-fix' + else: + chunk_dict['grade'] = 'wl' + + elif graded.is_broken_wl(): + chunk_dict['grade'] = 'wl-broken' + elif graded.is_xml(): + chunk_dict['grade'] = 'xml' + else: + chunk_dict['grade'] = 'plain' + chunk_dict['fix'].append('wl') + choose_master = True + + if chunk_dict['fix']: + need_fixing = True chunks.append(chunk_dict) + if first_master or not need_fixing: + choose_master = False + + if request.method == "POST": + form = forms.ChooseMasterForm(request.POST) + if not choose_master or form.is_valid(): + if choose_master: + first_master = form.cleaned_data['master'] + + # do the actual fixing + for c in chunks: + if not c['fix']: + continue + + text = c['chunk'].materialize() + for fix in c['fix']: + if fix == 'bad-master': + text = xml_tools.change_master(text, first_master) + elif fix == 'trim-begin': + text = xml_tools.add_trim_begin(text) + elif fix == 'trim-end': + text = xml_tools.add_trim_end(text) + elif fix == 'wl': + text = xml_tools.basic_structure(text, first_master) + author = request.user if request.user.is_authenticated() else None + description = "auto-fix: " + ", ".join(c['fix']) + c['chunk'].commit(text=text, author=author, + description=description) + + return http.HttpResponseRedirect(book.get_absolute_url()) + elif choose_master: + form = forms.ChooseMasterForm() + else: + form = None + return direct_to_template(request, "wiki/book_detail.html", extra_context={ "book": book, "chunks": chunks, - "some_wl": some_wl, + "need_fixing": need_fixing, + "choose_master": choose_master, "first_master": first_master, + "form": form, }) diff --git a/apps/wiki/xml_tools.py b/apps/wiki/xml_tools.py index a4de433c..6dc50893 100755 --- a/apps/wiki/xml_tools.py +++ b/apps/wiki/xml_tools.py @@ -1,59 +1,120 @@ +from functools import wraps import re from lxml import etree +from wiki.constants import TRIM_BEGIN, TRIM_END, MASTERS + +RE_TRIM_BEGIN = re.compile("^$" % TRIM_BEGIN, re.M) +RE_TRIM_END = re.compile("^$" % TRIM_END, re.M) + + +class ParseError(BaseException): + pass + + +def obj_memoized(f): + """ + A decorator that caches return value of object methods. + The cache is kept with the object, in a _obj_memoized property. + """ + @wraps(f) + def wrapper(self, *args, **kwargs): + if not hasattr(self, '_obj_memoized'): + self._obj_memoized = {} + key = (f.__name__,) + args + tuple(sorted(kwargs.iteritems())) + try: + return self._obj_memoized[key] + except TypeError: + return f(self, *args, **kwargs) + except KeyError: + self._obj_memoized[key] = f(self, *args, **kwargs) + return self._obj_memoized[key] + return wrapper -from wiki.constants import RE_TRIM_BEGIN, RE_TRIM_END class GradedText(object): - _is_xml = None _edoc = None - _is_wl = None - _master = None ROOT = 'utwor' - MASTERS = ['powiesc', - 'opowiadanie', - 'liryka_l', - 'liryka_lp', - 'dramat_wierszowany_l', - 'dramat_wierszowany_lp', - 'dramat_wspolczesny', - ] RDF = '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' def __init__(self, text): self._text = text + @obj_memoized def is_xml(self): - if self._is_xml is None: - try: - self._edoc = etree.fromstring(self._text) - except etree.XMLSyntaxError: - self._is_xml = False - else: - self._is_xml = True - del self._text - return self._is_xml + """ + Determines if it's a well-formed XML. + >>> GradedText("").is_xml() + True + >>> GradedText("").is_xml() + False + """ + try: + self._edoc = etree.fromstring(self._text) + except etree.XMLSyntaxError: + return False + return True + + @obj_memoized def is_wl(self): - if self._is_wl is None: - if self.is_xml(): - e = self._edoc - self._is_wl = e.tag == self.ROOT and ( - len(e) == 1 and e[0].tag in self.MASTERS or - len(e) == 2 and e[0].tag == self.RDF - and e[1].tag in self.MASTERS) - if self._is_wl: - self._master = e[-1].tag - del self._edoc - else: - self._is_wl = False - return self._is_wl + """ + Determines if it's an XML with a and a master tag. + + >>> GradedText("").is_wl() + True + >>> GradedText("").is_wl() + False + """ + if self.is_xml(): + e = self._edoc + # FIXME: there could be comments + ret = e.tag == self.ROOT and ( + len(e) == 1 and e[0].tag in MASTERS or + len(e) == 2 and e[0].tag == self.RDF + and e[1].tag in MASTERS) + if ret: + self._master = e[-1].tag + del self._edoc + return ret + else: + return False + + @obj_memoized + def is_broken_wl(self): + """ + Determines if it at least looks like broken WL file + and not just some untagged text. + + >>> GradedText("<").is_broken_wl() + True + >>> GradedText("some text").is_broken_wl() + False + """ + if self.is_wl(): + return True + text = self._text.strip() + return text.startswith('') and text.endswith('') def master(self): + """ + Gets the master tag. + + >>> GradedText("").master() + 'powiesc' + """ assert self.is_wl() return self._master + @obj_memoized + def has_trim_begin(self): + return RE_TRIM_BEGIN.search(self._text) + + @obj_memoized + def has_trim_end(self): + return RE_TRIM_END.search(self._text) + def _trim(text, trim_begin=True, trim_end=True): """ @@ -87,3 +148,57 @@ def compile_text(parts): # only trim beginning if it's not still the first non-empty texts.append(_trim(text, trim_begin=trim_begin, trim_end=False)) return "".join(texts) + + +def change_master(text, master): + """ + Changes the master tag in a WL document. + """ + e = etree.fromstring(text) + e[-1].tag = master + return etree.tostring(e, encoding="utf-8") + + +def basic_structure(text, master): + e = etree.fromstring(''' + + + +''' % (TRIM_BEGIN, TRIM_END)) + e[0].tag = master + e[0][0].tail = "\n"*3 + text + "\n"*3 + return etree.tostring(e, encoding="utf-8") + + +def add_trim_begin(text): + trim_tag = etree.Comment(TRIM_BEGIN) + e = etree.fromstring(text) + for master in e[::-1]: + if master.tag in MASTERS: + break + if master.tag not in MASTERS: + raise ParseError('No master tag found!') + + master.insert(0, trim_tag) + trim_tag.tail = '\n\n\n' + (master.text or '') + master.text = '\n' + return etree.tostring(e, encoding="utf-8") + + +def add_trim_end(text): + trim_tag = etree.Comment(TRIM_END) + e = etree.fromstring(text) + for master in e[::-1]: + if master.tag in MASTERS: + break + if master.tag not in MASTERS: + raise ParseError('No master tag found!') + + master.append(trim_tag) + trim_tag.tail = '\n' + prev = trim_tag.getprevious() + if prev is not None: + prev.tail = (prev.tail or '') + '\n\n\n' + else: + master.text = (master.text or '') + '\n\n\n' + return etree.tostring(e, encoding="utf-8") diff --git a/redakcja/static/css/filelist.css b/redakcja/static/css/filelist.css index c2f59f3d..91323c0f 100644 --- a/redakcja/static/css/filelist.css +++ b/redakcja/static/css/filelist.css @@ -100,12 +100,19 @@ td { } -.chunk-wl { - background-color: #afa; +.fix { + border: 1px solid gray; + font-size: 0.7em; + padding: 3px; } -.chunk-plain { - background-color: #aaa; -} -.chunk-xml { - background-color: #faa; + +.fix-info { + font-size: 0.7em; + font-style: italic; } + +.chunk-plain a {color: gray;} +.chunk-xml a {color: gray; font-style: italic;} +.chunk-wl-broken a {color: red;} +.chunk-wl a {color: green;} +.chunk-wl-fix a {color: black;} -- 2.20.1