X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/96a96c16513a98378e2f75ead26a22c358795182..0d6bbe91d75e82866aaa232eee4a036759446a86:/lib/markupstring.py diff --git a/lib/markupstring.py b/lib/markupstring.py deleted file mode 100644 index 0e273f2a1..000000000 --- a/lib/markupstring.py +++ /dev/null @@ -1,97 +0,0 @@ -# Code taken from ActiveState Python recipes: -# http://code.activestate.com/recipes/389023/ -# -# Changed by Marek Stepniowski to handle unicode characters -import xml.sax - - -class simpleHandler(xml.sax.ContentHandler): - """A simple handler that provides us with indices of marked up content.""" - def __init__(self): - self.elements = [] #this will contain a list of elements and their start/end indices - self.open_elements = [] #this holds info on open elements while we wait for their close - self.content = "" - - def startElement(self, name, attrs): - if name == 'foobar': return # we require an outer wrapper, which we promptly ignore. - self.open_elements.append({'name':name, - 'attrs':attrs.copy(), - 'start':len(self.content), - }) - - def endElement(self, name): - if name == 'foobar': return # we require an outer wrapper, which we promptly ignore. - for i in range(len(self.open_elements)): - e = self.open_elements[i] - if e['name'] == name: - # append a (start,end), name, attrs - self.elements.append(((e['start'], #start position - len(self.content)), # current (end) position - e['name'], e['attrs']) - ) - del self.open_elements[i] - return - - def characters(self, chunk): - self.content += chunk - - -class MarkupString(unicode): - """A simple class for dealing with marked up strings. When we are sliced, we return - valid marked up strings, preserving markup.""" - def __init__(self, string): - unicode.__init__(self) - self.handler = simpleHandler() - xml.sax.parseString((u"%s" % string).encode('utf-8'), self.handler) - self.raw = self.handler.content - - def __getitem__(self, n): - return self.__getslice__(n, n + 1) - - def __getslice__(self, s, e): - # only include relevant elements - if not e or e > len(self.raw): e = len(self.raw) - elements = filter(lambda tp: (tp[0][1] >= s and # end after the start... - tp[0][0] <= e # and start before the end - ), - self.handler.elements) - ends = {} - starts = {} - for el in elements: - # cycle through elements that effect our slice and keep track of - # where their start and end tags should go. - pos = el[0] - name = el[1] - attrs = el[2] - # write our start tag - stag = "<%s" % name - for k, v in attrs.items(): stag += " %s=%s" % (k, xml.sax.saxutils.quoteattr(v)) - stag += ">" - etag = "" % name # simple end tag - spos = pos[0] - epos = pos[1] - if spos < s: spos = s - if epos > e: epos = e - if epos != spos: # we don't care about tags that don't markup any text - if not starts.has_key(spos): starts[spos] = [] - starts[spos].append(stag) - if not ends.has_key(epos): ends[epos] = [] - ends[epos].append(etag) - outbuf = "" # our actual output string - for pos in range(s, e): # we move through positions - char = self.raw[pos] - if ends.has_key(pos): # if there are endtags to insert... - for et in ends[pos]: outbuf += et - if starts.has_key(pos): # if there are start tags to insert - mystarts = starts[pos] - # reverse these so the order works out,e.g. - mystarts.reverse() - for st in mystarts: outbuf += st - outbuf += char - if ends.has_key(e): - for et in ends[e]: outbuf += et - return MarkupString(outbuf) - - def __len__(self): - return len(self.raw) -