return rev if rev is not None else -1
def at_revision(self, rev):
- if rev:
+ if rev is not None:
return self.change_set.get(revision=rev)
else:
return self.head
# -*- coding: utf-8 -*-
-import re
-RE_TRIM_BEGIN = re.compile("^<!-- TRIM_BEGIN -->$", re.M)
-RE_TRIM_END = re.compile("^<!-- TRIM_END -->$", re.M)
+TRIM_BEGIN = " TRIM_BEGIN "
+TRIM_END = " TRIM_END "
+
+MASTERS = ['powiesc',
+ 'opowiadanie',
+ 'liryka_l',
+ 'liryka_lp',
+ 'dramat_wierszowany_l',
+ 'dramat_wierszowany_lp',
+ 'dramat_wspolczesny',
+ ]
from django.utils.translation import ugettext_lazy as _
from dvcs.models import Tag
+from wiki.constants import MASTERS
class DocumentTagForm(forms.Form):
"""
class Meta:
model = Book
+
+
+class ChooseMasterForm(forms.Form):
+ """
+ Form used for fixing the chunks in a book.
+ """
+
+ master = forms.ChoiceField(choices=((m, m) for m in MASTERS))
<table>
{% for c in chunks %}
- <tr class="chunk-row
- {% if c.graded.is_wl %}
- chunk-wl
- {% if c.graded.bad_master %}
- chunk-bad-master
- {% endif %}
- {% else %}
- {% if c.graded.is_xml %}
- chunk-xml
- {% else %}
- chunk-plain
- {% endif %}
- {% endif %}
- ">
+ <tr class="chunk-{{ c.grade }}">
<td><a target="_blank" href="{{ c.chunk.get_absolute_url }}">{{ c.chunk.comment }}</a></td>
- <td>{% if c.chunk.publishable %}P{% endif %}</td>
+ <td>{% for fix in c.fix %}
+
+ {% ifequal fix "wl" %}<span class="fix"
+ title="{% trans "add basic document structure" %}"
+ ></></span>{% endifequal %}
+
+ {% ifequal fix "bad-master" %}<span class="fix"
+ title='{% trans "change master tag to" %} "{{ first_master }}"'
+ >master</span>{% endifequal %}
+
+ {% ifequal fix "trim-begin" %}<span class="fix"
+ title="{% trans "add begin trimming tag" %}"
+ >✁</span>{% endifequal %}
+
+ {% ifequal fix "trim-end" %}<span class="fix"
+ title="{% trans "add end trimming tag" %}"
+ >✃</span>{% endifequal %}
+
+ {% endfor %}
+
+ {% ifequal c.grade "plain" %}
+ <span class="fix-info">{% trans "unstructured text" %}</span>
+ {% endifequal %}
+
+ {% ifequal c.grade "xml" %}
+ <span class="fix-info">{% trans "unknown XML" %}</span>
+ {% endifequal %}
+
+ {% ifequal c.grade "wl-broken" %}
+ <span class="fix-info">{% trans "broken document" %}</span>
+ {% endifequal %}
+
+ </td>
<td><a href="{% url wiki_chunk_edit book.slug c.chunk.slug%}">[{% trans "edit" %}]</a></td>
- <td>{% if c.bad_master %}{{ c.bad_master }}{% endif %}</td>
+ <td>{% if c.chunk.publishable %}P{% endif %}</td>
<td><a href="{% url wiki_chunk_add book.slug c.chunk.slug %}">[+]</a></td>
</tr>
{% endfor %}
+ {% if need_fixing %}
+ <tr><td></td><td>
+ <form method="POST" action="">
+ {% if choose_master %}
+ {{ form.master }}
+ {% endif %}
+ <button type="submit">{% trans "Apply fixes" %}</button>
+ </form>
+ </td></tr>
+ {% endif %}
</table>
<p><a href="{% url wiki_book_append book.slug %}">{% trans "Append to other book" %}</a></p>
import librarian.html
import librarian.text
-from wiki.xml_tools import GradedText
+from wiki import xml_tools
#
# Quick hack around caching problems, TODO: use ETags
def book(request, slug):
book = get_object_or_404(Book, slug=slug)
+ # TODO: most of this should go somewhere else
+
# do we need some automation?
- some_wl = False
first_master = None
chunks = []
+ need_fixing = False
+ choose_master = False
- for chunk in book:
- graded = GradedText(chunk.materialize())
+ length = len(book)
+ for i, chunk in enumerate(book):
chunk_dict = {
"chunk": chunk,
- "graded": graded,
+ "fix": [],
+ "grade": ""
}
+ graded = xml_tools.GradedText(chunk.materialize())
if graded.is_wl():
- some_wl = True
master = graded.master()
if first_master is None:
first_master = master
elif master != first_master:
- chunk_dict['bad_master'] = master
+ chunk_dict['fix'].append('bad-master')
+
+ if i > 0 and not graded.has_trim_begin():
+ chunk_dict['fix'].append('trim-begin')
+ if i < length - 1 and not graded.has_trim_end():
+ chunk_dict['fix'].append('trim-end')
+
+ if chunk_dict['fix']:
+ chunk_dict['grade'] = 'wl-fix'
+ else:
+ chunk_dict['grade'] = 'wl'
+
+ elif graded.is_broken_wl():
+ chunk_dict['grade'] = 'wl-broken'
+ elif graded.is_xml():
+ chunk_dict['grade'] = 'xml'
+ else:
+ chunk_dict['grade'] = 'plain'
+ chunk_dict['fix'].append('wl')
+ choose_master = True
+
+ if chunk_dict['fix']:
+ need_fixing = True
chunks.append(chunk_dict)
+ if first_master or not need_fixing:
+ choose_master = False
+
+ if request.method == "POST":
+ form = forms.ChooseMasterForm(request.POST)
+ if not choose_master or form.is_valid():
+ if choose_master:
+ first_master = form.cleaned_data['master']
+
+ # do the actual fixing
+ for c in chunks:
+ if not c['fix']:
+ continue
+
+ text = c['chunk'].materialize()
+ for fix in c['fix']:
+ if fix == 'bad-master':
+ text = xml_tools.change_master(text, first_master)
+ elif fix == 'trim-begin':
+ text = xml_tools.add_trim_begin(text)
+ elif fix == 'trim-end':
+ text = xml_tools.add_trim_end(text)
+ elif fix == 'wl':
+ text = xml_tools.basic_structure(text, first_master)
+ author = request.user if request.user.is_authenticated() else None
+ description = "auto-fix: " + ", ".join(c['fix'])
+ c['chunk'].commit(text=text, author=author,
+ description=description)
+
+ return http.HttpResponseRedirect(book.get_absolute_url())
+ elif choose_master:
+ form = forms.ChooseMasterForm()
+ else:
+ form = None
+
return direct_to_template(request, "wiki/book_detail.html", extra_context={
"book": book,
"chunks": chunks,
- "some_wl": some_wl,
+ "need_fixing": need_fixing,
+ "choose_master": choose_master,
"first_master": first_master,
+ "form": form,
})
+from functools import wraps
import re
from lxml import etree
+from wiki.constants import TRIM_BEGIN, TRIM_END, MASTERS
+
+RE_TRIM_BEGIN = re.compile("^<!--%s-->$" % TRIM_BEGIN, re.M)
+RE_TRIM_END = re.compile("^<!--%s-->$" % TRIM_END, re.M)
+
+
+class ParseError(BaseException):
+ pass
+
+
+def obj_memoized(f):
+ """
+ A decorator that caches return value of object methods.
+ The cache is kept with the object, in a _obj_memoized property.
+ """
+ @wraps(f)
+ def wrapper(self, *args, **kwargs):
+ if not hasattr(self, '_obj_memoized'):
+ self._obj_memoized = {}
+ key = (f.__name__,) + args + tuple(sorted(kwargs.iteritems()))
+ try:
+ return self._obj_memoized[key]
+ except TypeError:
+ return f(self, *args, **kwargs)
+ except KeyError:
+ self._obj_memoized[key] = f(self, *args, **kwargs)
+ return self._obj_memoized[key]
+ return wrapper
-from wiki.constants import RE_TRIM_BEGIN, RE_TRIM_END
class GradedText(object):
- _is_xml = None
_edoc = None
- _is_wl = None
- _master = None
ROOT = 'utwor'
- MASTERS = ['powiesc',
- 'opowiadanie',
- 'liryka_l',
- 'liryka_lp',
- 'dramat_wierszowany_l',
- 'dramat_wierszowany_lp',
- 'dramat_wspolczesny',
- ]
RDF = '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF'
def __init__(self, text):
self._text = text
+ @obj_memoized
def is_xml(self):
- if self._is_xml is None:
- try:
- self._edoc = etree.fromstring(self._text)
- except etree.XMLSyntaxError:
- self._is_xml = False
- else:
- self._is_xml = True
- del self._text
- return self._is_xml
+ """
+ Determines if it's a well-formed XML.
+ >>> GradedText("<a/>").is_xml()
+ True
+ >>> GradedText("<a>").is_xml()
+ False
+ """
+ try:
+ self._edoc = etree.fromstring(self._text)
+ except etree.XMLSyntaxError:
+ return False
+ return True
+
+ @obj_memoized
def is_wl(self):
- if self._is_wl is None:
- if self.is_xml():
- e = self._edoc
- self._is_wl = e.tag == self.ROOT and (
- len(e) == 1 and e[0].tag in self.MASTERS or
- len(e) == 2 and e[0].tag == self.RDF
- and e[1].tag in self.MASTERS)
- if self._is_wl:
- self._master = e[-1].tag
- del self._edoc
- else:
- self._is_wl = False
- return self._is_wl
+ """
+ Determines if it's an XML with a <utwor> and a master tag.
+
+ >>> GradedText("<utwor><powiesc></powiesc></utwor>").is_wl()
+ True
+ >>> GradedText("<a></a>").is_wl()
+ False
+ """
+ if self.is_xml():
+ e = self._edoc
+ # FIXME: there could be comments
+ ret = e.tag == self.ROOT and (
+ len(e) == 1 and e[0].tag in MASTERS or
+ len(e) == 2 and e[0].tag == self.RDF
+ and e[1].tag in MASTERS)
+ if ret:
+ self._master = e[-1].tag
+ del self._edoc
+ return ret
+ else:
+ return False
+
+ @obj_memoized
+ def is_broken_wl(self):
+ """
+ Determines if it at least looks like broken WL file
+ and not just some untagged text.
+
+ >>> GradedText("<utwor><</utwor>").is_broken_wl()
+ True
+ >>> GradedText("some text").is_broken_wl()
+ False
+ """
+ if self.is_wl():
+ return True
+ text = self._text.strip()
+ return text.startswith('<utwor>') and text.endswith('</utwor>')
def master(self):
+ """
+ Gets the master tag.
+
+ >>> GradedText("<utwor><powiesc></powiesc></utwor>").master()
+ 'powiesc'
+ """
assert self.is_wl()
return self._master
+ @obj_memoized
+ def has_trim_begin(self):
+ return RE_TRIM_BEGIN.search(self._text)
+
+ @obj_memoized
+ def has_trim_end(self):
+ return RE_TRIM_END.search(self._text)
+
def _trim(text, trim_begin=True, trim_end=True):
"""
# only trim beginning if it's not still the first non-empty
texts.append(_trim(text, trim_begin=trim_begin, trim_end=False))
return "".join(texts)
+
+
+def change_master(text, master):
+ """
+ Changes the master tag in a WL document.
+ """
+ e = etree.fromstring(text)
+ e[-1].tag = master
+ return etree.tostring(e, encoding="utf-8")
+
+
+def basic_structure(text, master):
+ e = etree.fromstring('''<utwor>
+<master>
+<!--%s--><!--%s-->
+</master>
+</utwor>''' % (TRIM_BEGIN, TRIM_END))
+ e[0].tag = master
+ e[0][0].tail = "\n"*3 + text + "\n"*3
+ return etree.tostring(e, encoding="utf-8")
+
+
+def add_trim_begin(text):
+ trim_tag = etree.Comment(TRIM_BEGIN)
+ e = etree.fromstring(text)
+ for master in e[::-1]:
+ if master.tag in MASTERS:
+ break
+ if master.tag not in MASTERS:
+ raise ParseError('No master tag found!')
+
+ master.insert(0, trim_tag)
+ trim_tag.tail = '\n\n\n' + (master.text or '')
+ master.text = '\n'
+ return etree.tostring(e, encoding="utf-8")
+
+
+def add_trim_end(text):
+ trim_tag = etree.Comment(TRIM_END)
+ e = etree.fromstring(text)
+ for master in e[::-1]:
+ if master.tag in MASTERS:
+ break
+ if master.tag not in MASTERS:
+ raise ParseError('No master tag found!')
+
+ master.append(trim_tag)
+ trim_tag.tail = '\n'
+ prev = trim_tag.getprevious()
+ if prev is not None:
+ prev.tail = (prev.tail or '') + '\n\n\n'
+ else:
+ master.text = (master.text or '') + '\n\n\n'
+ return etree.tostring(e, encoding="utf-8")
}
-.chunk-wl {
- background-color: #afa;
+.fix {
+ border: 1px solid gray;
+ font-size: 0.7em;
+ padding: 3px;
}
-.chunk-plain {
- background-color: #aaa;
-}
-.chunk-xml {
- background-color: #faa;
+
+.fix-info {
+ font-size: 0.7em;
+ font-style: italic;
}
+
+.chunk-plain a {color: gray;}
+.chunk-xml a {color: gray; font-style: italic;}
+.chunk-wl-broken a {color: red;}
+.chunk-wl a {color: green;}
+.chunk-wl-fix a {color: black;}