1 from functools import wraps
5 from catalogue.constants import TRIM_BEGIN, TRIM_END, MASTERS
7 RE_TRIM_BEGIN = re.compile("^<!--%s-->$" % TRIM_BEGIN, re.M)
8 RE_TRIM_END = re.compile("^<!--%s-->$" % TRIM_END, re.M)
11 class ParseError(BaseException):
17 A decorator that caches return value of object methods.
18 The cache is kept with the object, in a _obj_memoized property.
21 def wrapper(self, *args, **kwargs):
22 if not hasattr(self, '_obj_memoized'):
23 self._obj_memoized = {}
24 key = (f.__name__,) + args + tuple(sorted(kwargs.iteritems()))
26 return self._obj_memoized[key]
28 return f(self, *args, **kwargs)
30 self._obj_memoized[key] = f(self, *args, **kwargs)
31 return self._obj_memoized[key]
35 class GradedText(object):
39 RDF = '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF'
41 def __init__(self, text):
47 Determines if it's a well-formed XML.
49 >>> GradedText("<a/>").is_xml()
51 >>> GradedText("<a>").is_xml()
55 self._edoc = etree.fromstring(self._text)
56 except etree.XMLSyntaxError:
63 Determines if it's an XML with a <utwor> and a master tag.
65 >>> GradedText("<utwor><powiesc></powiesc></utwor>").is_wl()
67 >>> GradedText("<a></a>").is_wl()
72 # FIXME: there could be comments
73 ret = e.tag == self.ROOT and (
74 len(e) == 1 and e[0].tag in MASTERS or
75 len(e) == 2 and e[0].tag == self.RDF
76 and e[1].tag in MASTERS)
78 self._master = e[-1].tag
85 def is_broken_wl(self):
87 Determines if it at least looks like broken WL file
88 and not just some untagged text.
90 >>> GradedText("<utwor><</utwor>").is_broken_wl()
92 >>> GradedText("some text").is_broken_wl()
97 text = self._text.strip()
98 return text.startswith('<utwor>') and text.endswith('</utwor>')
104 >>> GradedText("<utwor><powiesc></powiesc></utwor>").master()
111 def has_trim_begin(self):
112 return RE_TRIM_BEGIN.search(self._text)
115 def has_trim_end(self):
116 return RE_TRIM_END.search(self._text)
119 def _trim(text, trim_begin=True, trim_end=True):
121 Cut off everything before RE_TRIM_BEGIN and after RE_TRIM_END, so
122 that eg. one big XML file can be compiled from many small XML files.
125 text = RE_TRIM_BEGIN.split(text, maxsplit=1)[-1]
127 text = RE_TRIM_END.split(text, maxsplit=1)[0]
131 def compile_text(parts):
133 Compiles full text from an iterable of parts,
134 trimming where applicable.
139 for next_text in parts:
142 # trim the end, because there's more non-empty text
143 # don't trim beginning, if `text' is the first non-empty part
144 texts.append(_trim(text, trim_begin=trim_begin))
147 # don't trim the end, because there's no more text coming after `text'
148 # only trim beginning if it's not still the first non-empty
149 texts.append(_trim(text, trim_begin=trim_begin, trim_end=False))
150 return "".join(texts)
153 def change_master(text, master):
155 Changes the master tag in a WL document.
157 e = etree.fromstring(text)
159 return etree.tostring(e, encoding="utf-8")
162 def basic_structure(text, master):
163 e = etree.fromstring('''<utwor>
167 </utwor>''' % (TRIM_BEGIN, TRIM_END))
169 e[0][0].tail = "\n"*3 + text + "\n"*3
170 return etree.tostring(e, encoding="utf-8")
173 def add_trim_begin(text):
174 trim_tag = etree.Comment(TRIM_BEGIN)
175 e = etree.fromstring(text)
176 for master in e[::-1]:
177 if master.tag in MASTERS:
179 if master.tag not in MASTERS:
180 raise ParseError('No master tag found!')
182 master.insert(0, trim_tag)
183 trim_tag.tail = '\n\n\n' + (master.text or '')
185 return etree.tostring(e, encoding="utf-8")
188 def add_trim_end(text):
189 trim_tag = etree.Comment(TRIM_END)
190 e = etree.fromstring(text)
191 for master in e[::-1]:
192 if master.tag in MASTERS:
194 if master.tag not in MASTERS:
195 raise ParseError('No master tag found!')
197 master.append(trim_tag)
199 prev = trim_tag.getprevious()
201 prev.tail = (prev.tail or '') + '\n\n\n'
203 master.text = (master.text or '') + '\n\n\n'
204 return etree.tostring(e, encoding="utf-8")