X-Git-Url: https://git.mdrn.pl/wolnelektury.git/blobdiff_plain/8bd5af0d1af393ad09e7fc162dc6ad82548bfbe1..139af81833f2b6727618d8e2c30cc7157c42e8a1:/lib/markupstring.py diff --git a/lib/markupstring.py b/lib/markupstring.py index f31e148cd..0e273f2a1 100644 --- a/lib/markupstring.py +++ b/lib/markupstring.py @@ -5,50 +5,50 @@ import xml.sax -class simpleHandler (xml.sax.ContentHandler): +class simpleHandler(xml.sax.ContentHandler): """A simple handler that provides us with indices of marked up content.""" - def __init__ (self): + def __init__(self): self.elements = [] #this will contain a list of elements and their start/end indices self.open_elements = [] #this holds info on open elements while we wait for their close self.content = "" - def startElement (self,name,attrs): - if name=='foobar': return # we require an outer wrapper, which we promptly ignore. + def startElement(self, name, attrs): + if name == 'foobar': return # we require an outer wrapper, which we promptly ignore. self.open_elements.append({'name':name, 'attrs':attrs.copy(), 'start':len(self.content), }) - def endElement (self, name): - if name=='foobar': return # we require an outer wrapper, which we promptly ignore. + def endElement(self, name): + if name == 'foobar': return # we require an outer wrapper, which we promptly ignore. for i in range(len(self.open_elements)): e = self.open_elements[i] - if e['name']==name: + if e['name'] == name: # append a (start,end), name, attrs self.elements.append(((e['start'], #start position - len(self.content)),# current (end) position - e['name'],e['attrs']) + len(self.content)), # current (end) position + e['name'], e['attrs']) ) del self.open_elements[i] return - def characters (self, chunk): + def characters(self, chunk): self.content += chunk -class MarkupString (unicode): +class MarkupString(unicode): """A simple class for dealing with marked up strings. When we are sliced, we return valid marked up strings, preserving markup.""" - def __init__ (self, string): - unicode.__init__(self, string) + def __init__(self, string): + unicode.__init__(self) self.handler = simpleHandler() xml.sax.parseString((u"%s" % string).encode('utf-8'), self.handler) self.raw = self.handler.content - def __getitem__ (self, n): - return self.__getslice__(n,n+1) + def __getitem__(self, n): + return self.__getslice__(n, n + 1) - def __getslice__ (self, s, e): + def __getslice__(self, s, e): # only include relevant elements if not e or e > len(self.raw): e = len(self.raw) elements = filter(lambda tp: (tp[0][1] >= s and # end after the start... @@ -64,21 +64,21 @@ class MarkupString (unicode): name = el[1] attrs = el[2] # write our start tag - stag = "<%s"%name - for k,v in attrs.items(): stag += " %s=%s"%(k,xml.sax.saxutils.quoteattr(v)) + stag = "<%s" % name + for k, v in attrs.items(): stag += " %s=%s" % (k, xml.sax.saxutils.quoteattr(v)) stag += ">" - etag = ""%name # simple end tag + etag = "" % name # simple end tag spos = pos[0] epos = pos[1] - if spos < s: spos=s - if epos > e: epos=e + if spos < s: spos = s + if epos > e: epos = e if epos != spos: # we don't care about tags that don't markup any text - if not starts.has_key(spos): starts[spos]=[] + if not starts.has_key(spos): starts[spos] = [] starts[spos].append(stag) - if not ends.has_key(epos): ends[epos]=[] + if not ends.has_key(epos): ends[epos] = [] ends[epos].append(etag) outbuf = "" # our actual output string - for pos in range(s,e): # we move through positions + for pos in range(s, e): # we move through positions char = self.raw[pos] if ends.has_key(pos): # if there are endtags to insert... for et in ends[pos]: outbuf += et @@ -89,6 +89,9 @@ class MarkupString (unicode): for st in mystarts: outbuf += st outbuf += char if ends.has_key(e): - for et in ends[e]: outbuf+= et + for et in ends[e]: outbuf += et return MarkupString(outbuf) + def __len__(self): + return len(self.raw) +