+from functools import wraps
import re
from lxml import etree
+from wiki.constants import TRIM_BEGIN, TRIM_END, MASTERS
+
+RE_TRIM_BEGIN = re.compile("^<!--%s-->$" % TRIM_BEGIN, re.M)
+RE_TRIM_END = re.compile("^<!--%s-->$" % TRIM_END, re.M)
+
+
+class ParseError(BaseException):
+ pass
+
+
+def obj_memoized(f):
+ """
+ A decorator that caches return value of object methods.
+ The cache is kept with the object, in a _obj_memoized property.
+ """
+ @wraps(f)
+ def wrapper(self, *args, **kwargs):
+ if not hasattr(self, '_obj_memoized'):
+ self._obj_memoized = {}
+ key = (f.__name__,) + args + tuple(sorted(kwargs.iteritems()))
+ try:
+ return self._obj_memoized[key]
+ except TypeError:
+ return f(self, *args, **kwargs)
+ except KeyError:
+ self._obj_memoized[key] = f(self, *args, **kwargs)
+ return self._obj_memoized[key]
+ return wrapper
-from wiki.constants import RE_TRIM_BEGIN, RE_TRIM_END
class GradedText(object):
- _is_xml = None
_edoc = None
- _is_wl = None
- _master = None
ROOT = 'utwor'
- MASTERS = ['powiesc',
- 'opowiadanie',
- 'liryka_l',
- 'liryka_lp',
- 'dramat_wierszowany_l',
- 'dramat_wierszowany_lp',
- 'dramat_wspolczesny',
- ]
RDF = '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF'
def __init__(self, text):
self._text = text
+ @obj_memoized
def is_xml(self):
- if self._is_xml is None:
- try:
- self._edoc = etree.fromstring(self._text)
- except etree.XMLSyntaxError:
- self._is_xml = False
- else:
- self._is_xml = True
- del self._text
- return self._is_xml
+ """
+ Determines if it's a well-formed XML.
+ >>> GradedText("<a/>").is_xml()
+ True
+ >>> GradedText("<a>").is_xml()
+ False
+ """
+ try:
+ self._edoc = etree.fromstring(self._text)
+ except etree.XMLSyntaxError:
+ return False
+ return True
+
+ @obj_memoized
def is_wl(self):
- if self._is_wl is None:
- if self.is_xml():
- e = self._edoc
- self._is_wl = e.tag == self.ROOT and (
- len(e) == 1 and e[0].tag in self.MASTERS or
- len(e) == 2 and e[0].tag == self.RDF
- and e[1].tag in self.MASTERS)
- if self._is_wl:
- self._master = e[-1].tag
- del self._edoc
- else:
- self._is_wl = False
- return self._is_wl
+ """
+ Determines if it's an XML with a <utwor> and a master tag.
+
+ >>> GradedText("<utwor><powiesc></powiesc></utwor>").is_wl()
+ True
+ >>> GradedText("<a></a>").is_wl()
+ False
+ """
+ if self.is_xml():
+ e = self._edoc
+ # FIXME: there could be comments
+ ret = e.tag == self.ROOT and (
+ len(e) == 1 and e[0].tag in MASTERS or
+ len(e) == 2 and e[0].tag == self.RDF
+ and e[1].tag in MASTERS)
+ if ret:
+ self._master = e[-1].tag
+ del self._edoc
+ return ret
+ else:
+ return False
+
+ @obj_memoized
+ def is_broken_wl(self):
+ """
+ Determines if it at least looks like broken WL file
+ and not just some untagged text.
+
+ >>> GradedText("<utwor><</utwor>").is_broken_wl()
+ True
+ >>> GradedText("some text").is_broken_wl()
+ False
+ """
+ if self.is_wl():
+ return True
+ text = self._text.strip()
+ return text.startswith('<utwor>') and text.endswith('</utwor>')
def master(self):
+ """
+ Gets the master tag.
+
+ >>> GradedText("<utwor><powiesc></powiesc></utwor>").master()
+ 'powiesc'
+ """
assert self.is_wl()
return self._master
+ @obj_memoized
+ def has_trim_begin(self):
+ return RE_TRIM_BEGIN.search(self._text)
+
+ @obj_memoized
+ def has_trim_end(self):
+ return RE_TRIM_END.search(self._text)
+
def _trim(text, trim_begin=True, trim_end=True):
"""
# only trim beginning if it's not still the first non-empty
texts.append(_trim(text, trim_begin=trim_begin, trim_end=False))
return "".join(texts)
+
+
+def change_master(text, master):
+ """
+ Changes the master tag in a WL document.
+ """
+ e = etree.fromstring(text)
+ e[-1].tag = master
+ return etree.tostring(e, encoding="utf-8")
+
+
+def basic_structure(text, master):
+ e = etree.fromstring('''<utwor>
+<master>
+<!--%s--><!--%s-->
+</master>
+</utwor>''' % (TRIM_BEGIN, TRIM_END))
+ e[0].tag = master
+ e[0][0].tail = "\n"*3 + text + "\n"*3
+ return etree.tostring(e, encoding="utf-8")
+
+
+def add_trim_begin(text):
+ trim_tag = etree.Comment(TRIM_BEGIN)
+ e = etree.fromstring(text)
+ for master in e[::-1]:
+ if master.tag in MASTERS:
+ break
+ if master.tag not in MASTERS:
+ raise ParseError('No master tag found!')
+
+ master.insert(0, trim_tag)
+ trim_tag.tail = '\n\n\n' + (master.text or '')
+ master.text = '\n'
+ return etree.tostring(e, encoding="utf-8")
+
+
+def add_trim_end(text):
+ trim_tag = etree.Comment(TRIM_END)
+ e = etree.fromstring(text)
+ for master in e[::-1]:
+ if master.tag in MASTERS:
+ break
+ if master.tag not in MASTERS:
+ raise ParseError('No master tag found!')
+
+ master.append(trim_tag)
+ trim_tag.tail = '\n'
+ prev = trim_tag.getprevious()
+ if prev is not None:
+ prev.tail = (prev.tail or '') + '\n\n\n'
+ else:
+ master.text = (master.text or '') + '\n\n\n'
+ return etree.tostring(e, encoding="utf-8")