lib/mutagen/apev2.py

   1 # An APEv2 tag reader
   2 #
   3 # Copyright 2005 Joe Wreschnig <piman@sacredchao.net>
   4 #
   5 # This program is free software; you can redistribute it and/or modify
   6 # it under the terms of the GNU General Public License version 2 as
   7 # published by the Free Software Foundation.
   8 #
   9 # $Id: apev2.py 4275 2008-06-01 06:32:37Z piman $
  10
  11 """APEv2 reading and writing.
  12
  13 The APEv2 format is most commonly used with Musepack files, but is
  14 also the format of choice for WavPack and other formats. Some MP3s
  15 also have APEv2 tags, but this can cause problems with many MP3
  16 decoders and taggers.
  17
  18 APEv2 tags, like Vorbis comments, are freeform key=value pairs. APEv2
  19 keys can be any ASCII string with characters from 0x20 to 0x7E,
  20 between 2 and 255 characters long.  Keys are case-sensitive, but
  21 readers are recommended to be case insensitive, and it is forbidden to
  22 multiple keys which differ only in case.  Keys are usually stored
  23 title-cased (e.g. 'Artist' rather than 'artist').
  24
  25 APEv2 values are slightly more structured than Vorbis comments; values
  26 are flagged as one of text, binary, or an external reference (usually
  27 a URI).
  28
  29 Based off the format specification found at
  30 http://wiki.hydrogenaudio.org/index.php?title=APEv2_specification.
  31 """
  32
  33 __all__ = ["APEv2", "APEv2File", "Open", "delete"]
  34
  35 import struct
  36 from cStringIO import StringIO
  37
  38 def is_valid_apev2_key(key):
  39     return (2 <= len(key) <= 255 and min(key) >= ' ' and max(key) <= '~' and
  40             key not in ["OggS", "TAG", "ID3", "MP+"])
  41
  42 # There are three different kinds of APE tag values.
  43 # "0: Item contains text information coded in UTF-8
  44 #  1: Item contains binary information
  45 #  2: Item is a locator of external stored information [e.g. URL]
  46 #  3: reserved"
  47 TEXT, BINARY, EXTERNAL = range(3)
  48
  49 HAS_HEADER = 1L << 31
  50 HAS_NO_FOOTER = 1L << 30
  51 IS_HEADER  = 1L << 29
  52
  53 class error(IOError): pass
  54 class APENoHeaderError(error, ValueError): pass
  55 class APEUnsupportedVersionError(error, ValueError): pass
  56 class APEBadItemError(error, ValueError): pass
  57
  58 from mutagen import Metadata, FileType
  59 from mutagen._util import DictMixin, cdata, utf8, delete_bytes
  60
  61 class _APEv2Data(object):
  62     # Store offsets of the important parts of the file.
  63     start = header = data = footer = end = None
  64     # Footer or header; seek here and read 32 to get version/size/items/flags
  65     metadata = None
  66     # Actual tag data
  67     tag = None
  68
  69     version = None
  70     size = None
  71     items = None
  72     flags = 0
  73
  74     # The tag is at the start rather than the end. A tag at both
  75     # the start and end of the file (i.e. the tag is the whole file)
  76     # is not considered to be at the start.
  77     is_at_start = False
  78
  79     def __init__(self, fileobj):
  80         self.__find_metadata(fileobj)
  81         self.metadata = max(self.header, self.footer)
  82         if self.metadata is None: return
  83         self.__fill_missing(fileobj)
  84         self.__fix_brokenness(fileobj)
  85         if self.data is not None:
  86             fileobj.seek(self.data)
  87             self.tag = fileobj.read(self.size)
  88
  89     def __find_metadata(self, fileobj):
  90         # Try to find a header or footer.
  91
  92         # Check for a simple footer.
  93         try: fileobj.seek(-32, 2)
  94         except IOError:
  95             fileobj.seek(0, 2)
  96             return
  97         if fileobj.read(8) == "APETAGEX":
  98             fileobj.seek(-8, 1)
  99             self.footer = self.metadata = fileobj.tell()
 100             return
 101
 102         # Check for an APEv2 tag followed by an ID3v1 tag at the end.
 103         try:
 104             fileobj.seek(-128, 2)
 105             if fileobj.read(3) == "TAG":
 106
 107                 fileobj.seek(-35, 1) # "TAG" + header length
 108                 if fileobj.read(8) == "APETAGEX":
 109                     fileobj.seek(-8, 1)
 110                     self.footer = fileobj.tell()
 111                     return
 112
 113                 # ID3v1 tag at the end, maybe preceded by Lyrics3v2.
 114                 # (http://www.id3.org/lyrics3200.html)
 115                 # (header length - "APETAGEX") - "LYRICS200"
 116                 fileobj.seek(15, 1)
 117                 if fileobj.read(9) == 'LYRICS200':
 118                     fileobj.seek(-15, 1) # "LYRICS200" + size tag
 119                     try: offset = int(fileobj.read(6))
 120                     except ValueError:
 121                         raise IOError
 122
 123                     fileobj.seek(-32 - offset - 6, 1)
 124                     if fileobj.read(8) == "APETAGEX":
 125                         fileobj.seek(-8, 1)
 126                         self.footer = fileobj.tell()
 127                         return
 128
 129         except IOError:
 130             pass
 131
 132         # Check for a tag at the start.
 133         fileobj.seek(0, 0)
 134         if fileobj.read(8) == "APETAGEX":
 135             self.is_at_start = True
 136             self.header = 0
 137
 138     def __fill_missing(self, fileobj):
 139         fileobj.seek(self.metadata + 8)
 140         self.version = fileobj.read(4)
 141         self.size = cdata.uint_le(fileobj.read(4))
 142         self.items = cdata.uint_le(fileobj.read(4))
 143         self.flags = cdata.uint_le(fileobj.read(4))
 144
 145         if self.header is not None:
 146             self.data = self.header + 32
 147             # If we're reading the header, the size is the header
 148             # offset + the size, which includes the footer.
 149             self.end = self.data + self.size
 150             fileobj.seek(self.end - 32, 0)
 151             if fileobj.read(8) == "APETAGEX":
 152                 self.footer = self.end - 32
 153         elif self.footer is not None:
 154             self.end = self.footer + 32
 155             self.data = self.end - self.size
 156             if self.flags & HAS_HEADER:
 157                 self.header = self.data - 32
 158             else:
 159                 self.header = self.data
 160         else: raise APENoHeaderError("No APE tag found")
 161
 162     def __fix_brokenness(self, fileobj):
 163         # Fix broken tags written with PyMusepack.
 164         if self.header is not None: start = self.header
 165         else: start = self.data
 166         fileobj.seek(start)
 167
 168         while start > 0:
 169             # Clean up broken writing from pre-Mutagen PyMusepack.
 170             # It didn't remove the first 24 bytes of header.
 171             try: fileobj.seek(-24, 1)
 172             except IOError:
 173                 break
 174             else:
 175                 if fileobj.read(8) == "APETAGEX":
 176                     fileobj.seek(-8, 1)
 177                     start = fileobj.tell()
 178                 else: break
 179         self.start = start
 180
 181 class APEv2(DictMixin, Metadata):
 182     """A file with an APEv2 tag.
 183
 184     ID3v1 tags are silently ignored and overwritten.
 185     """
 186
 187     filename = None
 188
 189     def __init__(self, *args, **kwargs):
 190         self.__casemap = {}
 191         self.__dict = {}
 192         super(APEv2, self).__init__(*args, **kwargs)
 193         # Internally all names are stored as lowercase, but the case
 194         # they were set with is remembered and used when saving.  This
 195         # is roughly in line with the standard, which says that keys
 196         # are case-sensitive but two keys differing only in case are
 197         # not allowed, and recommends case-insensitive
 198         # implementations.
 199
 200     def pprint(self):
 201         """Return tag key=value pairs in a human-readable format."""
 202         items = self.items()
 203         items.sort()
 204         return "\n".join(["%s=%s" % (k, v.pprint()) for k, v in items])
 205
 206     def load(self, filename):
 207         """Load tags from a filename."""
 208         self.filename = filename
 209         fileobj = file(filename, "rb")
 210         try:
 211             data = _APEv2Data(fileobj)
 212         finally:
 213             fileobj.close()
 214         if data.tag:
 215             self.clear()
 216             self.__casemap.clear()
 217             self.__parse_tag(data.tag, data.items)
 218         else:
 219             raise APENoHeaderError("No APE tag found")
 220
 221     def __parse_tag(self, tag, count):
 222         fileobj = StringIO(tag)
 223
 224         for i in range(count):
 225             size = cdata.uint_le(fileobj.read(4))
 226             flags = cdata.uint_le(fileobj.read(4))
 227
 228             # Bits 1 and 2 bits are flags, 0-3
 229             # Bit 0 is read/write flag, ignored
 230             kind = (flags & 6) >> 1
 231             if kind == 3:
 232                 raise APEBadItemError("value type must be 0, 1, or 2")
 233             key = value = fileobj.read(1)
 234             while key[-1:] != '\x00' and value:
 235                 value = fileobj.read(1)
 236                 key += value
 237             if key[-1:] == "\x00":
 238                 key = key[:-1]
 239             value = fileobj.read(size)
 240             self[key] = APEValue(value, kind)
 241
 242     def __getitem__(self, key):
 243         if not is_valid_apev2_key(key):
 244             raise KeyError("%r is not a valid APEv2 key" % key)
 245         return self.__dict[key.lower()]
 246
 247     def __delitem__(self, key):
 248         if not is_valid_apev2_key(key):
 249             raise KeyError("%r is not a valid APEv2 key" % key)
 250         del(self.__dict[key.lower()])
 251
 252     def __setitem__(self, key, value):
 253         """'Magic' value setter.
 254
 255         This function tries to guess at what kind of value you want to
 256         store. If you pass in a valid UTF-8 or Unicode string, it
 257         treats it as a text value. If you pass in a list, it treats it
 258         as a list of string/Unicode values.  If you pass in a string
 259         that is not valid UTF-8, it assumes it is a binary value.
 260
 261         If you need to force a specific type of value (e.g. binary
 262         data that also happens to be valid UTF-8, or an external
 263         reference), use the APEValue factory and set the value to the
 264         result of that:
 265             from mutagen.apev2 import APEValue, EXTERNAL
 266             tag['Website'] = APEValue('http://example.org', EXTERNAL)
 267         """
 268
 269         if not is_valid_apev2_key(key):
 270             raise KeyError("%r is not a valid APEv2 key" % key)
 271
 272         if not isinstance(value, _APEValue):
 273             # let's guess at the content if we're not already a value...
 274             if isinstance(value, unicode):
 275                 # unicode? we've got to be text.
 276                 value = APEValue(utf8(value), TEXT)
 277             elif isinstance(value, list):
 278                 # list? text.
 279                 value = APEValue("\0".join(map(utf8, value)), TEXT)
 280             else:
 281                 try: dummy = value.decode("utf-8")
 282                 except UnicodeError:
 283                     # invalid UTF8 text, probably binary
 284                     value = APEValue(value, BINARY)
 285                 else:
 286                     # valid UTF8, probably text
 287                     value = APEValue(value, TEXT)
 288         self.__casemap[key.lower()] = key
 289         self.__dict[key.lower()] = value
 290
 291     def keys(self):
 292         return [self.__casemap.get(key, key) for key in self.__dict.keys()]
 293
 294     def save(self, filename=None):
 295         """Save changes to a file.
 296
 297         If no filename is given, the one most recently loaded is used.
 298
 299         Tags are always written at the end of the file, and include
 300         a header and a footer.
 301         """
 302
 303         filename = filename or self.filename
 304         try:
 305             fileobj = file(filename, "r+b")
 306         except IOError:
 307             fileobj = file(filename, "w+b")
 308         data = _APEv2Data(fileobj)
 309
 310         if data.is_at_start:
 311             delete_bytes(fileobj, data.end - data.start, data.start)
 312         elif data.start is not None:
 313             fileobj.seek(data.start)
 314             # Delete an ID3v1 tag if present, too.
 315             fileobj.truncate()
 316         fileobj.seek(0, 2)
 317
 318         # "APE tags items should be sorted ascending by size... This is
 319         # not a MUST, but STRONGLY recommended. Actually the items should
 320         # be sorted by importance/byte, but this is not feasible."
 321         tags = [v._internal(k) for k, v in self.items()]
 322         tags.sort(lambda a, b: cmp(len(a), len(b)))
 323         num_tags = len(tags)
 324         tags = "".join(tags)
 325
 326         header = "APETAGEX%s%s" %(
 327             # version, tag size, item count, flags
 328             struct.pack("<4I", 2000, len(tags) + 32, num_tags,
 329                         HAS_HEADER | IS_HEADER),
 330             "\0" * 8)
 331         fileobj.write(header)
 332
 333         fileobj.write(tags)
 334
 335         footer = "APETAGEX%s%s" %(
 336             # version, tag size, item count, flags
 337             struct.pack("<4I", 2000, len(tags) + 32, num_tags,
 338                         HAS_HEADER),
 339             "\0" * 8)
 340         fileobj.write(footer)
 341         fileobj.close()
 342
 343     def delete(self, filename=None):
 344         """Remove tags from a file."""
 345         filename = filename or self.filename
 346         fileobj = file(filename, "r+b")
 347         try:
 348             data = _APEv2Data(fileobj)
 349             if data.start is not None and data.size is not None:
 350                 delete_bytes(fileobj, data.end - data.start, data.start)
 351         finally:
 352             fileobj.close()
 353         self.clear()
 354
 355 Open = APEv2
 356
 357 def delete(filename):
 358     """Remove tags from a file."""
 359     try: APEv2(filename).delete()
 360     except APENoHeaderError: pass
 361
 362 def APEValue(value, kind):
 363     """APEv2 tag value factory.
 364
 365     Use this if you need to specify the value's type manually.  Binary
 366     and text data are automatically detected by APEv2.__setitem__.
 367     """
 368     if kind == TEXT: return APETextValue(value, kind)
 369     elif kind == BINARY: return APEBinaryValue(value, kind)
 370     elif kind == EXTERNAL: return APEExtValue(value, kind)
 371     else: raise ValueError("kind must be TEXT, BINARY, or EXTERNAL")
 372
 373 class _APEValue(object):
 374     def __init__(self, value, kind):
 375         self.kind = kind
 376         self.value = value
 377
 378     def __len__(self):
 379         return len(self.value)
 380     def __str__(self):
 381         return self.value
 382
 383     # Packed format for an item:
 384     # 4B: Value length
 385     # 4B: Value type
 386     # Key name
 387     # 1B: Null
 388     # Key value
 389     def _internal(self, key):
 390         return "%s%s\0%s" %(
 391             struct.pack("<2I", len(self.value), self.kind << 1),
 392             key, self.value)
 393
 394     def __repr__(self):
 395         return "%s(%r, %d)" % (type(self).__name__, self.value, self.kind)
 396
 397 class APETextValue(_APEValue):
 398     """An APEv2 text value.
 399
 400     Text values are Unicode/UTF-8 strings. They can be accessed like
 401     strings (with a null seperating the values), or arrays of strings."""
 402
 403     def __unicode__(self):
 404         return unicode(str(self), "utf-8")
 405
 406     def __iter__(self):
 407         """Iterate over the strings of the value (not the characters)"""
 408         return iter(unicode(self).split("\0"))
 409
 410     def __getitem__(self, index):
 411         return unicode(self).split("\0")[index]
 412
 413     def __len__(self):
 414         return self.value.count("\0") + 1
 415
 416     def __cmp__(self, other):
 417         return cmp(unicode(self), other)
 418
 419     def __setitem__(self, index, value):
 420         values = list(self)
 421         values[index] = value.encode("utf-8")
 422         self.value = "\0".join(values).encode("utf-8")
 423
 424     def pprint(self):
 425         return " / ".join(self)
 426
 427 class APEBinaryValue(_APEValue):
 428     """An APEv2 binary value."""
 429
 430     def pprint(self): return "[%d bytes]" % len(self)
 431
 432 class APEExtValue(_APEValue):
 433     """An APEv2 external value.
 434
 435     External values are usually URI or IRI strings.
 436     """
 437     def pprint(self): return "[External] %s" % unicode(self)
 438
 439 class APEv2File(FileType):
 440     class _Info(object):
 441         length = 0
 442         bitrate = 0
 443         def __init__(self, fileobj): pass
 444         pprint = staticmethod(lambda: "Unknown format with APEv2 tag.")
 445
 446     def load(self, filename):
 447         self.filename = filename
 448         self.info = self._Info(file(filename, "rb"))
 449         try: self.tags = APEv2(filename)
 450         except error: self.tags = None
 451
 452     def add_tags(self):
 453         if self.tags is None:
 454             self.tags = APEv2()
 455         else:
 456             raise ValueError("%r already has tags: %r" % (self, self.tags))
 457
 458     def score(filename, fileobj, header):
 459         try: fileobj.seek(-160, 2)
 460         except IOError:
 461             fileobj.seek(0)
 462         footer = fileobj.read()
 463         filename = filename.lower()
 464         return (("APETAGEX" in footer) - header.startswith("ID3"))
 465     score = staticmethod(score)