<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;"># Code taken from ActiveState Python recipes:
# http://code.activestate.com/recipes/389023/
#
# Changed by Marek Stepniowski &lt;marek@stepniowski.com&gt; to handle unicode characters
import xml.sax


class simpleHandler(xml.sax.ContentHandler):
    """A simple handler that provides us with indices of marked up content."""
    def __init__(self):        
        self.elements = [] #this will contain a list of elements and their start/end indices
        self.open_elements = [] #this holds info on open elements while we wait for their close
        self.content = ""

    def startElement(self,name,attrs):
        if name=='foobar': return # we require an outer wrapper, which we promptly ignore.
        self.open_elements.append({'name':name,
                                   'attrs':attrs.copy(),
                                   'start':len(self.content),
                                   })

    def endElement(self, name):
        if name=='foobar': return # we require an outer wrapper, which we promptly ignore.
        for i in range(len(self.open_elements)):
            e = self.open_elements[i]
            if e['name']==name:
                # append a  (start,end), name, attrs
                self.elements.append(((e['start'], #start position
                                       len(self.content)),# current (end) position
                                      e['name'],e['attrs'])
                                     )
                del self.open_elements[i]
                return

    def characters(self, chunk):
        self.content += chunk


class MarkupString(unicode):
    """A simple class for dealing with marked up strings. When we are sliced, we return
    valid marked up strings, preserving markup."""
    def __init__(self, string):        
        unicode.__init__(self, string)
        self.handler = simpleHandler()
        xml.sax.parseString((u"&lt;foobar&gt;%s&lt;/foobar&gt;" % string).encode('utf-8'), self.handler)
        self.raw = self.handler.content

    def __getitem__(self, n):
        return self.__getslice__(n,n+1)

    def __getslice__(self, s, e):
        # only include relevant elements
        if not e or e &gt; len(self.raw): e = len(self.raw)
        elements = filter(lambda tp: (tp[0][1] &gt;= s and # end after the start...
                                      tp[0][0] &lt;= e # and start before the end
                                      ),
                          self.handler.elements)
        ends = {}
        starts = {}
        for el in elements:
            # cycle through elements that effect our slice and keep track of
            # where their start and end tags should go.
            pos = el[0]
            name = el[1]
            attrs = el[2]
            # write our start tag &lt;stag att="val"...&gt;
            stag = "&lt;%s"%name
            for k,v in attrs.items(): stag += " %s=%s"%(k,xml.sax.saxutils.quoteattr(v))
            stag += "&gt;"
            etag = "&lt;/%s&gt;"%name # simple end tag
            spos = pos[0]
            epos = pos[1]
            if spos &lt; s: spos=s
            if epos &gt; e: epos=e
            if epos != spos: # we don't care about tags that don't markup any text
                if not starts.has_key(spos): starts[spos]=[]
                starts[spos].append(stag)
                if not ends.has_key(epos): ends[epos]=[]
                ends[epos].append(etag)
        outbuf = "" # our actual output string
        for pos in range(s,e): # we move through positions
            char = self.raw[pos]
            if ends.has_key(pos):  # if there are endtags to insert...
                for et in ends[pos]: outbuf += et
            if starts.has_key(pos): # if there are start tags to insert
                mystarts = starts[pos]
                # reverse these so the order works out,e.g. &lt;i&gt;&lt;b&gt;&lt;u&gt;&lt;/u&gt;&lt;/b&gt;&lt;/i&gt;
                mystarts.reverse()
                for st in mystarts: outbuf += st
            outbuf += char
        if ends.has_key(e):
            for et in ends[e]: outbuf+= et
        return MarkupString(outbuf)

    def __len__(self):
        return len(self.raw)

</pre></body></html>