1 # ====================================================================
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13 # ====================================================================
15 from StringIO import StringIO
16 from HTMLParser import HTMLParser
19 class InputStreamReader(object):
21 def __init__(self, inputStream, encoding):
23 super(InputStreamReader, self).__init__()
24 self.inputStream = inputStream
25 self.encoding = encoding or 'utf-8'
27 def _read(self, length):
29 return self.inputStream.read(length)
31 def read(self, length=-1):
33 text = self._read(length)
34 text = unicode(text, self.encoding)
40 self.inputStream.close()
43 class HTMLReader(object):
45 def __init__(self, reader):
49 class htmlParser(HTMLParser):
53 HTMLParser.__init__(self)
55 self.buffer = StringIO()
58 def handle_data(self, data):
60 self.buffer.write(data)
62 def _read(self, length):
65 size = buffer.tell() - self.position
67 if length > 0 and size > length:
68 buffer.seek(self.position)
69 data = buffer.read(length)
70 self.position += len(data)
74 buffer.seek(self.position)
75 data = buffer.read(size)
84 self.parser = htmlParser()
86 def read(self, length=-1):
89 data = self.reader.read(length)
91 self.parser.feed(data)
92 data = self.parser._read(length)