3 # Copyright 2005 Joe Wreschnig <piman@sacredchao.net>
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License version 2 as
7 # published by the Free Software Foundation.
9 # $Id: apev2.py 4275 2008-06-01 06:32:37Z piman $
11 """APEv2 reading and writing.
13 The APEv2 format is most commonly used with Musepack files, but is
14 also the format of choice for WavPack and other formats. Some MP3s
15 also have APEv2 tags, but this can cause problems with many MP3
18 APEv2 tags, like Vorbis comments, are freeform key=value pairs. APEv2
19 keys can be any ASCII string with characters from 0x20 to 0x7E,
20 between 2 and 255 characters long. Keys are case-sensitive, but
21 readers are recommended to be case insensitive, and it is forbidden to
22 multiple keys which differ only in case. Keys are usually stored
23 title-cased (e.g. 'Artist' rather than 'artist').
25 APEv2 values are slightly more structured than Vorbis comments; values
26 are flagged as one of text, binary, or an external reference (usually
29 Based off the format specification found at
30 http://wiki.hydrogenaudio.org/index.php?title=APEv2_specification.
33 __all__ = ["APEv2", "APEv2File", "Open", "delete"]
36 from cStringIO import StringIO
38 def is_valid_apev2_key(key):
39 return (2 <= len(key) <= 255 and min(key) >= ' ' and max(key) <= '~' and
40 key not in ["OggS", "TAG", "ID3", "MP+"])
42 # There are three different kinds of APE tag values.
43 # "0: Item contains text information coded in UTF-8
44 # 1: Item contains binary information
45 # 2: Item is a locator of external stored information [e.g. URL]
47 TEXT, BINARY, EXTERNAL = range(3)
50 HAS_NO_FOOTER = 1L << 30
53 class error(IOError): pass
54 class APENoHeaderError(error, ValueError): pass
55 class APEUnsupportedVersionError(error, ValueError): pass
56 class APEBadItemError(error, ValueError): pass
58 from mutagen import Metadata, FileType
59 from mutagen._util import DictMixin, cdata, utf8, delete_bytes
61 class _APEv2Data(object):
62 # Store offsets of the important parts of the file.
63 start = header = data = footer = end = None
64 # Footer or header; seek here and read 32 to get version/size/items/flags
74 # The tag is at the start rather than the end. A tag at both
75 # the start and end of the file (i.e. the tag is the whole file)
76 # is not considered to be at the start.
79 def __init__(self, fileobj):
80 self.__find_metadata(fileobj)
81 self.metadata = max(self.header, self.footer)
82 if self.metadata is None: return
83 self.__fill_missing(fileobj)
84 self.__fix_brokenness(fileobj)
85 if self.data is not None:
86 fileobj.seek(self.data)
87 self.tag = fileobj.read(self.size)
89 def __find_metadata(self, fileobj):
90 # Try to find a header or footer.
92 # Check for a simple footer.
93 try: fileobj.seek(-32, 2)
97 if fileobj.read(8) == "APETAGEX":
99 self.footer = self.metadata = fileobj.tell()
102 # Check for an APEv2 tag followed by an ID3v1 tag at the end.
104 fileobj.seek(-128, 2)
105 if fileobj.read(3) == "TAG":
107 fileobj.seek(-35, 1) # "TAG" + header length
108 if fileobj.read(8) == "APETAGEX":
110 self.footer = fileobj.tell()
113 # ID3v1 tag at the end, maybe preceded by Lyrics3v2.
114 # (http://www.id3.org/lyrics3200.html)
115 # (header length - "APETAGEX") - "LYRICS200"
117 if fileobj.read(9) == 'LYRICS200':
118 fileobj.seek(-15, 1) # "LYRICS200" + size tag
119 try: offset = int(fileobj.read(6))
123 fileobj.seek(-32 - offset - 6, 1)
124 if fileobj.read(8) == "APETAGEX":
126 self.footer = fileobj.tell()
132 # Check for a tag at the start.
134 if fileobj.read(8) == "APETAGEX":
135 self.is_at_start = True
138 def __fill_missing(self, fileobj):
139 fileobj.seek(self.metadata + 8)
140 self.version = fileobj.read(4)
141 self.size = cdata.uint_le(fileobj.read(4))
142 self.items = cdata.uint_le(fileobj.read(4))
143 self.flags = cdata.uint_le(fileobj.read(4))
145 if self.header is not None:
146 self.data = self.header + 32
147 # If we're reading the header, the size is the header
148 # offset + the size, which includes the footer.
149 self.end = self.data + self.size
150 fileobj.seek(self.end - 32, 0)
151 if fileobj.read(8) == "APETAGEX":
152 self.footer = self.end - 32
153 elif self.footer is not None:
154 self.end = self.footer + 32
155 self.data = self.end - self.size
156 if self.flags & HAS_HEADER:
157 self.header = self.data - 32
159 self.header = self.data
160 else: raise APENoHeaderError("No APE tag found")
162 def __fix_brokenness(self, fileobj):
163 # Fix broken tags written with PyMusepack.
164 if self.header is not None: start = self.header
165 else: start = self.data
169 # Clean up broken writing from pre-Mutagen PyMusepack.
170 # It didn't remove the first 24 bytes of header.
171 try: fileobj.seek(-24, 1)
175 if fileobj.read(8) == "APETAGEX":
177 start = fileobj.tell()
181 class APEv2(DictMixin, Metadata):
182 """A file with an APEv2 tag.
184 ID3v1 tags are silently ignored and overwritten.
189 def __init__(self, *args, **kwargs):
192 super(APEv2, self).__init__(*args, **kwargs)
193 # Internally all names are stored as lowercase, but the case
194 # they were set with is remembered and used when saving. This
195 # is roughly in line with the standard, which says that keys
196 # are case-sensitive but two keys differing only in case are
197 # not allowed, and recommends case-insensitive
201 """Return tag key=value pairs in a human-readable format."""
204 return "\n".join(["%s=%s" % (k, v.pprint()) for k, v in items])
206 def load(self, filename):
207 """Load tags from a filename."""
208 self.filename = filename
209 fileobj = file(filename, "rb")
211 data = _APEv2Data(fileobj)
216 self.__casemap.clear()
217 self.__parse_tag(data.tag, data.items)
219 raise APENoHeaderError("No APE tag found")
221 def __parse_tag(self, tag, count):
222 fileobj = StringIO(tag)
224 for i in range(count):
225 size = cdata.uint_le(fileobj.read(4))
226 flags = cdata.uint_le(fileobj.read(4))
228 # Bits 1 and 2 bits are flags, 0-3
229 # Bit 0 is read/write flag, ignored
230 kind = (flags & 6) >> 1
232 raise APEBadItemError("value type must be 0, 1, or 2")
233 key = value = fileobj.read(1)
234 while key[-1:] != '\x00' and value:
235 value = fileobj.read(1)
237 if key[-1:] == "\x00":
239 value = fileobj.read(size)
240 self[key] = APEValue(value, kind)
242 def __getitem__(self, key):
243 if not is_valid_apev2_key(key):
244 raise KeyError("%r is not a valid APEv2 key" % key)
245 return self.__dict[key.lower()]
247 def __delitem__(self, key):
248 if not is_valid_apev2_key(key):
249 raise KeyError("%r is not a valid APEv2 key" % key)
250 del(self.__dict[key.lower()])
252 def __setitem__(self, key, value):
253 """'Magic' value setter.
255 This function tries to guess at what kind of value you want to
256 store. If you pass in a valid UTF-8 or Unicode string, it
257 treats it as a text value. If you pass in a list, it treats it
258 as a list of string/Unicode values. If you pass in a string
259 that is not valid UTF-8, it assumes it is a binary value.
261 If you need to force a specific type of value (e.g. binary
262 data that also happens to be valid UTF-8, or an external
263 reference), use the APEValue factory and set the value to the
265 from mutagen.apev2 import APEValue, EXTERNAL
266 tag['Website'] = APEValue('http://example.org', EXTERNAL)
269 if not is_valid_apev2_key(key):
270 raise KeyError("%r is not a valid APEv2 key" % key)
272 if not isinstance(value, _APEValue):
273 # let's guess at the content if we're not already a value...
274 if isinstance(value, unicode):
275 # unicode? we've got to be text.
276 value = APEValue(utf8(value), TEXT)
277 elif isinstance(value, list):
279 value = APEValue("\0".join(map(utf8, value)), TEXT)
281 try: dummy = value.decode("utf-8")
283 # invalid UTF8 text, probably binary
284 value = APEValue(value, BINARY)
286 # valid UTF8, probably text
287 value = APEValue(value, TEXT)
288 self.__casemap[key.lower()] = key
289 self.__dict[key.lower()] = value
292 return [self.__casemap.get(key, key) for key in self.__dict.keys()]
294 def save(self, filename=None):
295 """Save changes to a file.
297 If no filename is given, the one most recently loaded is used.
299 Tags are always written at the end of the file, and include
300 a header and a footer.
303 filename = filename or self.filename
305 fileobj = file(filename, "r+b")
307 fileobj = file(filename, "w+b")
308 data = _APEv2Data(fileobj)
311 delete_bytes(fileobj, data.end - data.start, data.start)
312 elif data.start is not None:
313 fileobj.seek(data.start)
314 # Delete an ID3v1 tag if present, too.
318 # "APE tags items should be sorted ascending by size... This is
319 # not a MUST, but STRONGLY recommended. Actually the items should
320 # be sorted by importance/byte, but this is not feasible."
321 tags = [v._internal(k) for k, v in self.items()]
322 tags.sort(lambda a, b: cmp(len(a), len(b)))
326 header = "APETAGEX%s%s" %(
327 # version, tag size, item count, flags
328 struct.pack("<4I", 2000, len(tags) + 32, num_tags,
329 HAS_HEADER | IS_HEADER),
331 fileobj.write(header)
335 footer = "APETAGEX%s%s" %(
336 # version, tag size, item count, flags
337 struct.pack("<4I", 2000, len(tags) + 32, num_tags,
340 fileobj.write(footer)
343 def delete(self, filename=None):
344 """Remove tags from a file."""
345 filename = filename or self.filename
346 fileobj = file(filename, "r+b")
348 data = _APEv2Data(fileobj)
349 if data.start is not None and data.size is not None:
350 delete_bytes(fileobj, data.end - data.start, data.start)
357 def delete(filename):
358 """Remove tags from a file."""
359 try: APEv2(filename).delete()
360 except APENoHeaderError: pass
362 def APEValue(value, kind):
363 """APEv2 tag value factory.
365 Use this if you need to specify the value's type manually. Binary
366 and text data are automatically detected by APEv2.__setitem__.
368 if kind == TEXT: return APETextValue(value, kind)
369 elif kind == BINARY: return APEBinaryValue(value, kind)
370 elif kind == EXTERNAL: return APEExtValue(value, kind)
371 else: raise ValueError("kind must be TEXT, BINARY, or EXTERNAL")
373 class _APEValue(object):
374 def __init__(self, value, kind):
379 return len(self.value)
383 # Packed format for an item:
389 def _internal(self, key):
391 struct.pack("<2I", len(self.value), self.kind << 1),
395 return "%s(%r, %d)" % (type(self).__name__, self.value, self.kind)
397 class APETextValue(_APEValue):
398 """An APEv2 text value.
400 Text values are Unicode/UTF-8 strings. They can be accessed like
401 strings (with a null seperating the values), or arrays of strings."""
403 def __unicode__(self):
404 return unicode(str(self), "utf-8")
407 """Iterate over the strings of the value (not the characters)"""
408 return iter(unicode(self).split("\0"))
410 def __getitem__(self, index):
411 return unicode(self).split("\0")[index]
414 return self.value.count("\0") + 1
416 def __cmp__(self, other):
417 return cmp(unicode(self), other)
419 def __setitem__(self, index, value):
421 values[index] = value.encode("utf-8")
422 self.value = "\0".join(values).encode("utf-8")
425 return " / ".join(self)
427 class APEBinaryValue(_APEValue):
428 """An APEv2 binary value."""
430 def pprint(self): return "[%d bytes]" % len(self)
432 class APEExtValue(_APEValue):
433 """An APEv2 external value.
435 External values are usually URI or IRI strings.
437 def pprint(self): return "[External] %s" % unicode(self)
439 class APEv2File(FileType):
443 def __init__(self, fileobj): pass
444 pprint = staticmethod(lambda: "Unknown format with APEv2 tag.")
446 def load(self, filename):
447 self.filename = filename
448 self.info = self._Info(file(filename, "rb"))
449 try: self.tags = APEv2(filename)
450 except error: self.tags = None
453 if self.tags is None:
456 raise ValueError("%r already has tags: %r" % (self, self.tags))
458 def score(filename, fileobj, header):
459 try: fileobj.seek(-160, 2)
462 footer = fileobj.read()
463 filename = filename.lower()
464 return (("APETAGEX" in footer) - header.startswith("ID3"))
465 score = staticmethod(score)