1 # Copyright 2006 Joe Wreschnig <piman@sacredchao.net>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License version 2 as
5 # published by the Free Software Foundation.
7 # $Id: ogg.py 4275 2008-06-01 06:32:37Z piman $
9 """Read and write Ogg bitstreams and pages.
11 This module reads and writes a subset of the Ogg bitstream format
12 version 0. It does *not* read or write Ogg Vorbis files! For that,
13 you should use mutagen.oggvorbis.
15 This implementation is based on the RFC 3533 standard found at
16 http://www.xiph.org/ogg/doc/rfc3533.txt.
23 from cStringIO import StringIO
25 from mutagen import FileType
26 from mutagen._util import cdata, insert_bytes, delete_bytes
29 """Ogg stream parsing errors."""
32 class OggPage(object):
33 """A single Ogg page (not necessarily a single encoded packet).
35 A page is a header of 26 bytes, followed by the length of the
36 data, followed by the data.
38 The constructor is givin a file-like object pointing to the start
39 of an Ogg page. After the constructor is finished it is pointing
40 to the start of the next page.
43 version -- stream structure version (currently always 0)
44 position -- absolute stream position (default -1)
45 serial -- logical stream serial number (default 0)
46 sequence -- page sequence number within logical stream (default 0)
47 offset -- offset this page was read from (default None)
48 complete -- if the last packet on this page is complete (default True)
49 packets -- list of raw packet data (default [])
51 Note that if 'complete' is false, the next page's 'continued'
52 property must be true (so set both when constructing pages).
54 If a file-like object is supplied to the constructor, the above
55 attributes will be filled in based on it.
66 def __init__(self, fileobj=None):
72 self.offset = fileobj.tell()
74 header = fileobj.read(27)
79 (oggs, self.version, self.__type_flags, self.position,
80 self.serial, self.sequence, crc, segments) = struct.unpack(
83 raise error("unable to read full header; got %r" % header)
86 raise error("read %r, expected %r, at 0x%x" % (
87 oggs, "OggS", fileobj.tell() - 27))
90 raise error("version %r unsupported" % self.version)
94 lacing_bytes = fileobj.read(segments)
95 if len(lacing_bytes) != segments:
96 raise error("unable to read %r lacing bytes" % segments)
97 for c in map(ord, lacing_bytes):
100 lacings.append(total)
103 lacings.append(total)
104 self.complete = False
106 self.packets = map(fileobj.read, lacings)
107 if map(len, self.packets) != lacings:
108 raise error("unable to read full data")
110 def __eq__(self, other):
111 """Two Ogg pages are the same if they write the same data."""
113 return (self.write() == other.write())
114 except AttributeError:
118 attrs = ['version', 'position', 'serial', 'sequence', 'offset',
119 'complete', 'continued', 'first', 'last']
120 values = ["%s=%r" % (attr, getattr(self, attr)) for attr in attrs]
121 return "<%s %s, %d bytes in %d packets>" % (
122 type(self).__name__, " ".join(values), sum(map(len, self.packets)),
126 """Return a string encoding of the page header and data.
128 A ValueError is raised if the data is too big to fit in a
133 struct.pack("<4sBBqIIi", "OggS", self.version, self.__type_flags,
134 self.position, self.serial, self.sequence, 0)
138 for datum in self.packets:
139 quot, rem = divmod(len(datum), 255)
140 lacing_data.append("\xff" * quot + chr(rem))
141 lacing_data = "".join(lacing_data)
142 if not self.complete and lacing_data.endswith("\x00"):
143 lacing_data = lacing_data[:-1]
144 data.append(chr(len(lacing_data)))
145 data.append(lacing_data)
146 data.extend(self.packets)
149 # Python's CRC is swapped relative to Ogg's needs.
150 crc = ~zlib.crc32(data.translate(cdata.bitswap), -1)
151 # Although we're using to_int_be, this actually makes the CRC
152 # a proper le integer, since Python's CRC is byteswapped.
153 crc = cdata.to_int_be(crc).translate(cdata.bitswap)
154 data = data[:22] + crc + data[26:]
158 size = 27 # Initial header size
159 for datum in self.packets:
160 quot, rem = divmod(len(datum), 255)
162 if not self.complete and rem == 0:
163 # Packet contains a multiple of 255 bytes and is not
164 # terminated, so we don't have a \x00 at the end.
166 size += sum(map(len, self.packets))
169 size = property(__size, doc="Total frame size.")
171 def __set_flag(self, bit, val):
173 if val: self.__type_flags |= mask
174 else: self.__type_flags &= ~mask
176 continued = property(
177 lambda self: cdata.test_bit(self.__type_flags, 0),
178 lambda self, v: self.__set_flag(0, v),
179 doc="The first packet is continued from the previous page.")
182 lambda self: cdata.test_bit(self.__type_flags, 1),
183 lambda self, v: self.__set_flag(1, v),
184 doc="This is the first page of a logical bitstream.")
187 lambda self: cdata.test_bit(self.__type_flags, 2),
188 lambda self, v: self.__set_flag(2, v),
189 doc="This is the last page of a logical bitstream.")
191 def renumber(klass, fileobj, serial, start):
192 """Renumber pages belonging to a specified logical stream.
194 fileobj must be opened with mode r+b or w+b.
196 Starting at page number 'start', renumber all pages belonging
197 to logical stream 'serial'. Other pages will be ignored.
199 fileobj must point to the start of a valid Ogg page; any
200 occuring after it and part of the specified logical stream
201 will be numbered. No adjustment will be made to the data in
202 the pages nor the granule position; only the page number, and
205 If an error occurs (e.g. non-Ogg data is found), fileobj will
206 be left pointing to the place in the stream the error occured,
207 but the invalid data will be left intact (since this function
208 does not change the total file size).
213 try: page = OggPage(fileobj)
217 if page.serial != serial:
218 # Wrong stream, skip this page.
220 # Changing the number can't change the page size,
221 # so seeking back based on the current size is safe.
222 fileobj.seek(-page.size, 1)
223 page.sequence = number
224 fileobj.write(page.write())
225 fileobj.seek(page.offset + page.size, 0)
227 renumber = classmethod(renumber)
229 def to_packets(klass, pages, strict=False):
230 """Construct a list of packet data from a list of Ogg pages.
232 If strict is true, the first page must start a new packet,
233 and the last page must end the last packet.
236 serial = pages[0].serial
237 sequence = pages[0].sequence
241 if pages[0].continued:
242 raise ValueError("first packet is continued")
243 if not pages[-1].complete:
244 raise ValueError("last packet does not complete")
245 elif pages and pages[0].continued:
249 if serial != page.serial:
250 raise ValueError("invalid serial number in %r" % page)
251 elif sequence != page.sequence:
252 raise ValueError("bad sequence number in %r" % page)
255 if page.continued: packets[-1] += page.packets[0]
256 else: packets.append(page.packets[0])
257 packets.extend(page.packets[1:])
260 to_packets = classmethod(to_packets)
262 def from_packets(klass, packets, sequence=0,
263 default_size=4096, wiggle_room=2048):
264 """Construct a list of Ogg pages from a list of packet data.
266 The algorithm will generate pages of approximately
267 default_size in size (rounded down to the nearest multiple of
268 255). However, it will also allow pages to increase to
269 approximately default_size + wiggle_room if allowing the
270 wiggle room would finish a packet (only one packet will be
271 finished in this way per page; if the next packet would fit
272 into the wiggle room, it still starts on a new page).
274 This method reduces packet fragmentation when packet sizes are
275 slightly larger than the default page size, while still
276 ensuring most pages are of the average size.
278 Pages are numbered started at 'sequence'; other information is
282 chunk_size = (default_size // 255) * 255
287 page.sequence = sequence
289 for packet in packets:
290 page.packets.append("")
292 data, packet = packet[:chunk_size], packet[chunk_size:]
293 if page.size < default_size and len(page.packets) < 255:
294 page.packets[-1] += data
296 # If we've put any packet data into this page yet,
297 # we need to mark it incomplete. However, we can
298 # also have just started this packet on an already
299 # full page, in which case, just start the new
300 # page with this packet.
302 page.complete = False
303 if len(page.packets) == 1:
309 page.continued = not pages[-1].complete
310 page.sequence = pages[-1].sequence + 1
311 page.packets.append(data)
313 if len(packet) < wiggle_room:
314 page.packets[-1] += packet
321 from_packets = classmethod(from_packets)
323 def replace(klass, fileobj, old_pages, new_pages):
324 """Replace old_pages with new_pages within fileobj.
326 old_pages must have come from reading fileobj originally.
327 new_pages are assumed to have the 'same' data as old_pages,
328 and so the serial and sequence numbers will be copied, as will
329 the flags for the first and last pages.
331 fileobj will be resized and pages renumbered as necessary. As
332 such, it must be opened r+b or w+b.
335 # Number the new pages starting from the first old page.
336 first = old_pages[0].sequence
337 for page, seq in zip(new_pages, range(first, first + len(new_pages))):
339 page.serial = old_pages[0].serial
341 new_pages[0].first = old_pages[0].first
342 new_pages[0].last = old_pages[0].last
343 new_pages[0].continued = old_pages[0].continued
345 new_pages[-1].first = old_pages[-1].first
346 new_pages[-1].last = old_pages[-1].last
347 new_pages[-1].complete = old_pages[-1].complete
348 if not new_pages[-1].complete and len(new_pages[-1].packets) == 1:
349 new_pages[-1].position = -1L
351 new_data = "".join(map(klass.write, new_pages))
353 # Make room in the file for the new data.
354 delta = len(new_data)
355 fileobj.seek(old_pages[0].offset, 0)
356 insert_bytes(fileobj, delta, old_pages[0].offset)
357 fileobj.seek(old_pages[0].offset, 0)
358 fileobj.write(new_data)
359 new_data_end = old_pages[0].offset + delta
361 # Go through the old pages and delete them. Since we shifted
362 # the data down the file, we need to adjust their offsets. We
363 # also need to go backwards, so we don't adjust the deltas of
366 for old_page in old_pages:
367 adj_offset = old_page.offset + delta
368 delete_bytes(fileobj, old_page.size, adj_offset)
370 # Finally, if there's any discrepency in length, we need to
371 # renumber the pages for the logical stream.
372 if len(old_pages) != len(new_pages):
373 fileobj.seek(new_data_end, 0)
374 serial = new_pages[-1].serial
375 sequence = new_pages[-1].sequence + 1
376 klass.renumber(fileobj, serial, sequence)
377 replace = classmethod(replace)
379 def find_last(klass, fileobj, serial):
380 """Find the last page of the stream 'serial'.
382 If the file is not multiplexed this function is fast. If it is,
383 it must read the whole the stream.
385 This finds the last page in the actual file object, or the last
386 page in the stream (with eos set), whichever comes first.
389 # For non-muxed streams, look at the last page.
390 try: fileobj.seek(-256*256, 2)
392 # The file is less than 64k in length.
394 data = fileobj.read()
395 try: index = data.rindex("OggS")
397 raise error("unable to find final Ogg header")
398 stringobj = StringIO(data[index:])
401 page = OggPage(stringobj)
405 if page.serial == serial:
406 if page.last: return page
407 else: best_page = page
408 else: best_page = None
410 # The stream is muxed, so use the slow way.
413 page = OggPage(fileobj)
415 page = OggPage(fileobj)
416 while page.serial != serial:
417 page = OggPage(fileobj)
424 find_last = classmethod(find_last)
426 class OggFileType(FileType):
427 """An generic Ogg file."""
432 _mimes = ["application/ogg", "application/x-ogg"]
434 def load(self, filename):
435 """Load file information from a filename."""
437 self.filename = filename
438 fileobj = file(filename, "rb")
441 self.info = self._Info(fileobj)
442 self.tags = self._Tags(fileobj, self.info)
445 # The streaminfo gave us real length information,
446 # don't waste time scanning the Ogg.
449 last_page = OggPage.find_last(fileobj, self.info.serial)
450 samples = last_page.position
452 denom = self.info.sample_rate
453 except AttributeError:
454 denom = self.info.fps
455 self.info.length = samples / float(denom)
458 raise self._Error, e, sys.exc_info()[2]
460 raise self._Error, "no appropriate stream found"
464 def delete(self, filename=None):
465 """Remove tags from a file.
467 If no filename is given, the one most recently loaded is used.
470 filename = self.filename
473 fileobj = file(filename, "rb+")
475 try: self.tags._inject(fileobj)
477 raise self._Error, e, sys.exc_info()[2]
479 raise self._Error, "no appropriate stream found"
483 def save(self, filename=None):
484 """Save a tag to a file.
486 If no filename is given, the one most recently loaded is used.
489 filename = self.filename
490 fileobj = file(filename, "rb+")
492 try: self.tags._inject(fileobj)
494 raise self._Error, e, sys.exc_info()[2]
496 raise self._Error, "no appropriate stream found"