1 # Copyright 2006 Joe Wreschnig <piman@sacredchao.net>
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License version 2 as
5 # published by the Free Software Foundation.
7 # $Id: m4a.py 4275 2008-06-01 06:32:37Z piman $
9 """Read and write MPEG-4 audio files with iTunes metadata.
11 This module will read MPEG-4 audio information and metadata,
12 as found in Apple's M4A (aka MP4, M4B, M4P) files.
14 There is no official specification for this format. The source code
15 for TagLib, FAAD, and various MPEG specifications at
16 http://developer.apple.com/documentation/QuickTime/QTFF/,
17 http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt,
18 and http://wiki.multimedia.cx/index.php?title=Apple_QuickTime were all
21 This module does not support 64 bit atom sizes, and so will not
22 work on metadata over 4GB.
28 from cStringIO import StringIO
30 from mutagen import FileType, Metadata
31 from mutagen._constants import GENRES
32 from mutagen._util import cdata, insert_bytes, delete_bytes, DictProxy
34 class error(IOError): pass
35 class M4AMetadataError(error): pass
36 class M4AStreamInfoError(error): pass
37 class M4AMetadataValueError(ValueError, M4AMetadataError): pass
41 "mutagen.m4a is deprecated; use mutagen.mp4 instead.", DeprecationWarning)
43 # This is not an exhaustive list of container atoms, but just the
44 # ones this module needs to peek inside.
45 _CONTAINERS = ["moov", "udta", "trak", "mdia", "meta", "ilst",
46 "stbl", "minf", "stsd"]
47 _SKIP_SIZE = { "meta": 4 }
49 __all__ = ['M4A', 'Open', 'delete', 'M4ACover']
55 format -- format of the image (either FORMAT_JPEG or FORMAT_PNG)
60 def __new__(cls, data, format=None):
61 self = str.__new__(cls, data)
62 if format is None: format= M4ACover.FORMAT_JPEG
67 """An individual atom.
70 children -- list child atoms (or None for non-container atoms)
71 length -- length of this atom, including length and name
72 name -- four byte name of the atom, as a str
73 offset -- location in the constructor-given fileobj of this atom
75 This structure should only be used internally by Mutagen.
80 def __init__(self, fileobj):
81 self.offset = fileobj.tell()
82 self.length, self.name = struct.unpack(">I4s", fileobj.read(8))
84 raise error("64 bit atom sizes are not supported")
88 if self.name in _CONTAINERS:
90 fileobj.seek(_SKIP_SIZE.get(self.name, 0), 1)
91 while fileobj.tell() < self.offset + self.length:
92 self.children.append(Atom(fileobj))
94 fileobj.seek(self.offset + self.length, 0)
96 def render(name, data):
97 """Render raw atom data."""
98 # this raises OverflowError if Py_ssize_t can't handle the atom data
100 if size <= 0xFFFFFFFF:
101 return struct.pack(">I4s", size, name) + data
103 return struct.pack(">I4sQ", 1, name, size + 8) + data
104 render = staticmethod(render)
106 def __getitem__(self, remaining):
107 """Look up a child atom, potentially recursively.
109 e.g. atom['udta', 'meta'] => <Atom name='meta' ...>
113 elif self.children is None:
114 raise KeyError("%r is not a container" % self.name)
115 for child in self.children:
116 if child.name == remaining[0]:
117 return child[remaining[1:]]
119 raise KeyError, "%r not found" % remaining[0]
122 klass = self.__class__.__name__
123 if self.children is None:
124 return "<%s name=%r length=%r offset=%r>" % (
125 klass, self.name, self.length, self.offset)
127 children = "\n".join([" " + line for child in self.children
128 for line in repr(child).splitlines()])
129 return "<%s name=%r length=%r offset=%r\n%s>" % (
130 klass, self.name, self.length, self.offset, children)
133 """Root atoms in a given file.
136 atoms -- a list of top-level atoms as Atom objects
138 This structure should only be used internally by Mutagen.
140 def __init__(self, fileobj):
145 while fileobj.tell() < end:
146 self.atoms.append(Atom(fileobj))
148 def path(self, *names):
149 """Look up and return the complete path of an atom.
151 For example, atoms.path('moov', 'udta', 'meta') will return a
152 list of three atoms, corresponding to the moov, udta, and meta
157 path.append(path[-1][name,])
160 def __getitem__(self, names):
161 """Look up a child atom.
163 'names' may be a list of atoms (['moov', 'udta']) or a string
164 specifying the complete path ('moov.udta').
166 if isinstance(names, basestring):
167 names = names.split(".")
168 for child in self.atoms:
169 if child.name == names[0]:
170 return child[names[1:]]
172 raise KeyError, "%s not found" % names[0]
175 return "\n".join([repr(child) for child in self.atoms])
177 class M4ATags(DictProxy, Metadata):
178 """Dictionary containing Apple iTunes metadata list key/values.
180 Keys are four byte identifiers, except for freeform ('----')
181 keys. Values are usually unicode strings, but some atoms have a
184 trkn, disk -- tuple of 16 bit ints (current, total)
186 covr -- list of M4ACover objects (which are tagged strs)
187 gnre -- not supported. Use '\\xa9gen' instead.
189 The freeform '----' frames use a key in the format '----:mean:name'
190 where 'mean' is usually 'com.apple.iTunes' and 'name' is a unique
191 identifier for this frame. The value is a str, but is probably
192 text that can be decoded as UTF-8.
194 M4A tag data cannot exist outside of the structure of an M4A file,
195 so this class should not be manually instantiated.
197 Unknown non-text tags are removed.
200 def load(self, atoms, fileobj):
201 try: ilst = atoms["moov.udta.meta.ilst"]
202 except KeyError, key:
203 raise M4AMetadataError(key)
204 for atom in ilst.children:
205 fileobj.seek(atom.offset + 8)
206 data = fileobj.read(atom.length - 8)
207 parse = self.__atoms.get(atom.name, (M4ATags.__parse_text,))[0]
208 parse(self, atom, data)
210 def __key_sort((key1, v1), (key2, v2)):
211 # iTunes always writes the tags in order of "relevance", try
212 # to copy it as closely as possible.
213 order = ["\xa9nam", "\xa9ART", "\xa9wrt", "\xa9alb",
214 "\xa9gen", "gnre", "trkn", "disk",
215 "\xa9day", "cpil", "tmpo", "\xa9too",
216 "----", "covr", "\xa9lyr"]
217 order = dict(zip(order, range(len(order))))
219 # If there's no key-based way to distinguish, order by length.
220 # If there's still no way, go by string comparison on the
221 # values, so we at least have something determinstic.
222 return (cmp(order.get(key1[:4], last), order.get(key2[:4], last)) or
223 cmp(len(v1), len(v2)) or cmp(v1, v2))
224 __key_sort = staticmethod(__key_sort)
226 def save(self, filename):
227 """Save the metadata to the given filename."""
230 items.sort(self.__key_sort)
231 for key, value in items:
232 render = self.__atoms.get(
233 key[:4], (None, M4ATags.__render_text))[1]
234 values.append(render(self, key, value))
235 data = Atom.render("ilst", "".join(values))
237 # Find the old atoms.
238 fileobj = file(filename, "rb+")
240 atoms = Atoms(fileobj)
244 if moov != atoms.atoms[-1]:
245 # "Free" the old moov block. Something in the mdat
246 # block is not happy when its offset changes and it
247 # won't play back. So, rather than try to figure that
248 # out, just move the moov atom to the end of the file.
249 offset = self.__move_moov(fileobj, moov)
254 path = atoms.path("moov", "udta", "meta", "ilst")
256 self.__save_new(fileobj, atoms, data, offset)
258 self.__save_existing(fileobj, atoms, path, data, offset)
262 def __move_moov(self, fileobj, moov):
263 fileobj.seek(moov.offset)
264 data = fileobj.read(moov.length)
265 fileobj.seek(moov.offset)
266 free = Atom.render("free", "\x00" * (moov.length - 8))
269 # Figure out how far we have to shift all our successive
270 # seek calls, relative to what the atoms say.
271 old_end = fileobj.tell()
273 return old_end - moov.offset
275 def __save_new(self, fileobj, atoms, ilst, offset):
276 hdlr = Atom.render("hdlr", "\x00" * 8 + "mdirappl" + "\x00" * 9)
277 meta = Atom.render("meta", "\x00\x00\x00\x00" + hdlr + ilst)
278 moov, udta = atoms.path("moov", "udta")
279 insert_bytes(fileobj, len(meta), udta.offset + offset + 8)
280 fileobj.seek(udta.offset + offset + 8)
282 self.__update_parents(fileobj, [moov, udta], len(meta), offset)
284 def __save_existing(self, fileobj, atoms, path, data, offset):
285 # Replace the old ilst atom.
287 delta = len(data) - ilst.length
288 fileobj.seek(ilst.offset + offset)
290 insert_bytes(fileobj, delta, ilst.offset + offset)
292 delete_bytes(fileobj, -delta, ilst.offset + offset)
293 fileobj.seek(ilst.offset + offset)
295 self.__update_parents(fileobj, path, delta, offset)
297 def __update_parents(self, fileobj, path, delta, offset):
298 # Update all parent atoms with the new size.
300 fileobj.seek(atom.offset + offset)
301 size = cdata.uint_be(fileobj.read(4)) + delta
302 fileobj.seek(atom.offset + offset)
303 fileobj.write(cdata.to_uint_be(size))
305 def __render_data(self, key, flags, data):
306 data = struct.pack(">2I", flags, 0) + data
307 return Atom.render(key, Atom.render("data", data))
309 def __parse_freeform(self, atom, data):
311 fileobj = StringIO(data)
312 mean_length = cdata.uint_be(fileobj.read(4))
313 # skip over 8 bytes of atom name, flags
314 mean = fileobj.read(mean_length - 4)[8:]
315 name_length = cdata.uint_be(fileobj.read(4))
316 name = fileobj.read(name_length - 4)[8:]
317 value_length = cdata.uint_be(fileobj.read(4))
318 # Name, flags, and reserved bytes
319 value = fileobj.read(value_length - 4)[12:]
321 # Some ---- atoms have no data atom, I have no clue why
322 # they actually end up in the file.
325 self["%s:%s:%s" % (atom.name, mean, name)] = value
326 def __render_freeform(self, key, value):
327 dummy, mean, name = key.split(":", 2)
328 mean = struct.pack(">I4sI", len(mean) + 12, "mean", 0) + mean
329 name = struct.pack(">I4sI", len(name) + 12, "name", 0) + name
330 value = struct.pack(">I4s2I", len(value) + 16, "data", 0x1, 0) + value
331 final = mean + name + value
332 return Atom.render("----", mean + name + value)
334 def __parse_pair(self, atom, data):
335 self[atom.name] = struct.unpack(">2H", data[18:22])
336 def __render_pair(self, key, value):
338 if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
339 data = struct.pack(">4H", 0, track, total, 0)
340 return self.__render_data(key, 0, data)
342 raise M4AMetadataValueError("invalid numeric pair %r" % (value,))
344 def __render_pair_no_trailing(self, key, value):
346 if 0 <= track < 1 << 16 and 0 <= total < 1 << 16:
347 data = struct.pack(">3H", 0, track, total)
348 return self.__render_data(key, 0, data)
350 raise M4AMetadataValueError("invalid numeric pair %r" % (value,))
352 def __parse_genre(self, atom, data):
353 # Translate to a freeform genre.
354 genre = cdata.short_be(data[16:18])
355 if "\xa9gen" not in self:
356 try: self["\xa9gen"] = GENRES[genre - 1]
357 except IndexError: pass
359 def __parse_tempo(self, atom, data):
360 self[atom.name] = cdata.short_be(data[16:18])
361 def __render_tempo(self, key, value):
362 if 0 <= value < 1 << 16:
363 return self.__render_data(key, 0x15, cdata.to_ushort_be(value))
365 raise M4AMetadataValueError("invalid short integer %r" % value)
367 def __parse_compilation(self, atom, data):
368 try: self[atom.name] = bool(ord(data[16:17]))
369 except TypeError: self[atom.name] = False
371 def __render_compilation(self, key, value):
372 return self.__render_data(key, 0x15, chr(bool(value)))
374 def __parse_cover(self, atom, data):
375 length, name, format = struct.unpack(">I4sI", data[:12])
377 raise M4AMetadataError(
378 "unexpected atom %r inside 'covr'" % name)
379 if format not in (M4ACover.FORMAT_JPEG, M4ACover.FORMAT_PNG):
380 format = M4ACover.FORMAT_JPEG
381 self[atom.name]= M4ACover(data[16:length], format)
382 def __render_cover(self, key, value):
383 try: format = value.format
384 except AttributeError: format = M4ACover.FORMAT_JPEG
385 data = Atom.render("data", struct.pack(">2I", format, 0) + value)
386 return Atom.render(key, data)
388 def __parse_text(self, atom, data):
389 flags = cdata.uint_be(data[8:12])
391 self[atom.name] = data[16:].decode('utf-8', 'replace')
392 def __render_text(self, key, value):
393 return self.__render_data(key, 0x1, value.encode('utf-8'))
395 def delete(self, filename):
400 "----": (__parse_freeform, __render_freeform),
401 "trkn": (__parse_pair, __render_pair),
402 "disk": (__parse_pair, __render_pair_no_trailing),
403 "gnre": (__parse_genre, None),
404 "tmpo": (__parse_tempo, __render_tempo),
405 "cpil": (__parse_compilation, __render_compilation),
406 "covr": (__parse_cover, __render_cover),
411 for key, value in self.iteritems():
412 key = key.decode('latin1')
413 try: values.append("%s=%s" % (key, value))
414 except UnicodeDecodeError:
415 values.append("%s=[%d bytes of data]" % (key, len(value)))
416 return "\n".join(values)
418 class M4AInfo(object):
419 """MPEG-4 stream information.
422 bitrate -- bitrate in bits per second, as an int
423 length -- file length in seconds, as a float
428 def __init__(self, atoms, fileobj):
429 hdlr = atoms["moov.trak.mdia.hdlr"]
430 fileobj.seek(hdlr.offset)
431 if "soun" not in fileobj.read(hdlr.length):
432 raise M4AStreamInfoError("track has no audio data")
434 mdhd = atoms["moov.trak.mdia.mdhd"]
435 fileobj.seek(mdhd.offset)
436 data = fileobj.read(mdhd.length)
437 if ord(data[8]) == 0:
443 end = offset + struct.calcsize(format)
444 unit, length = struct.unpack(format, data[offset:end])
445 self.length = float(length) / unit
448 atom = atoms["moov.trak.mdia.minf.stbl.stsd"]
449 fileobj.seek(atom.offset)
450 data = fileobj.read(atom.length)
451 self.bitrate = cdata.uint_be(data[-17:-13])
452 except (ValueError, KeyError):
453 # Bitrate values are optional.
457 return "MPEG-4 audio, %.2f seconds, %d bps" % (
458 self.length, self.bitrate)
461 """An MPEG-4 audio file, probably containing AAC.
463 If more than one track is present in the file, the first is used.
464 Only audio ('soun') tracks will be read.
467 _mimes = ["audio/mp4", "audio/x-m4a", "audio/mpeg4", "audio/aac"]
469 def load(self, filename):
470 self.filename = filename
471 fileobj = file(filename, "rb")
473 atoms = Atoms(fileobj)
474 try: self.info = M4AInfo(atoms, fileobj)
475 except StandardError, err:
476 raise M4AStreamInfoError, err, sys.exc_info()[2]
477 try: self.tags = M4ATags(atoms, fileobj)
478 except M4AMetadataError:
480 except StandardError, err:
481 raise M4AMetadataError, err, sys.exc_info()[2]
486 self.tags = M4ATags()
488 def score(filename, fileobj, header):
489 return ("ftyp" in header) + ("mp4" in header)
490 score = staticmethod(score)
494 def delete(filename):
495 """Remove tags from a file."""
496 M4A(filename).delete()