1 # id3 support for mutagen
2 # Copyright (C) 2005 Michael Urman
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of version 2 of the GNU General Public License as
6 # published by the Free Software Foundation.
8 # $Id: id3.py 4275 2008-06-01 06:32:37Z piman $
10 """ID3v2 reading and writing.
12 This is based off of the following references:
13 http://www.id3.org/id3v2.4.0-structure.txt
14 http://www.id3.org/id3v2.4.0-frames.txt
15 http://www.id3.org/id3v2.3.0.html
16 http://www.id3.org/id3v2-00.txt
17 http://www.id3.org/id3v1.html
19 Its largest deviation from the above (versions 2.3 and 2.2) is that it
20 will not interpret the / characters as a separator, and will almost
21 always accept null separators to generate multi-valued text frames.
23 Because ID3 frame structure differs between frame types, each frame is
24 implemented as a different class (e.g. TIT2 as mutagen.id3.TIT2). Each
25 frame's documentation contains a list of its attributes.
27 Since this file's documentation is a little unwieldy, you are probably
28 interested in the 'ID3' class to start with.
31 __all__ = ['ID3', 'ID3FileType', 'Frames', 'Open', 'delete']
33 import struct; from struct import unpack, pack
34 from zlib import error as zlibError
35 from warnings import warn
38 from mutagen._util import insert_bytes, delete_bytes, DictProxy
40 class error(Exception): pass
41 class ID3NoHeaderError(error, ValueError): pass
42 class ID3BadUnsynchData(error, ValueError): pass
43 class ID3BadCompressedData(error, ValueError): pass
44 class ID3TagError(error, ValueError): pass
45 class ID3UnsupportedVersionError(error, NotImplementedError): pass
46 class ID3EncryptionUnsupportedError(error, NotImplementedError): pass
47 class ID3JunkFrameError(error, ValueError): pass
49 class ID3Warning(error, UserWarning): pass
51 def is_valid_frame_id(frame_id):
52 return frame_id.isalnum() and frame_id.isupper()
54 class ID3(DictProxy, mutagen.Metadata):
55 """A file with an ID3v2 tag.
58 version -- ID3 tag version as a tuple
59 unknown_frames -- raw frame data of any unknown frames found
60 size -- the total size of the ID3 tag, including the header
72 def __init__(self, *args, **kwargs):
73 self.unknown_frames = []
74 super(ID3, self).__init__(*args, **kwargs)
76 def __fullread(self, size):
79 raise ValueError('Requested bytes (%s) less than zero' % size)
80 if size > self.__filesize:
81 raise EOFError('Requested %#x of %#x (%s)' %
82 (long(size), long(self.__filesize), self.filename))
83 except AttributeError: pass
84 data = self.__fileobj.read(size)
85 if len(data) != size: raise EOFError
86 self.__readbytes += size
89 def load(self, filename, known_frames=None, translate=True):
90 """Load tags from a filename.
93 filename -- filename to load tag data from
94 known_frames -- dict mapping frame IDs to Frame objects
95 translate -- Update all tags to ID3v2.4 internally. Mutagen is
96 only capable of writing ID3v2.4 tags, so if you
97 intend to save, this must be true.
99 Example of loading a custom frame:
100 my_frames = dict(mutagen.id3.Frames)
101 class XMYF(Frame): ...
102 my_frames["XMYF"] = XMYF
103 mutagen.id3.ID3(filename, known_frames=my_frames)
106 from os.path import getsize
107 self.filename = filename
108 self.__known_frames = known_frames
109 self.__fileobj = file(filename, 'rb')
110 self.__filesize = getsize(filename)
116 raise ID3NoHeaderError("%s: too small (%d bytes)" %(
117 filename, self.__filesize))
118 except (ID3NoHeaderError, ID3UnsupportedVersionError), err:
121 stack = sys.exc_info()[2]
122 try: self.__fileobj.seek(-128, 2)
123 except EnvironmentError: raise err, None, stack
125 frames = ParseID3v1(self.__fileobj.read(128))
126 if frames is not None:
127 self.version = (1, 1)
128 map(self.add, frames.values())
129 else: raise err, None, stack
131 frames = self.__known_frames
133 if (2,3,0) <= self.version: frames = Frames
134 elif (2,2,0) <= self.version: frames = Frames_2_2
135 data = self.__fullread(self.size - 10)
136 for frame in self.__read_frames(data, frames=frames):
137 if isinstance(frame, Frame): self.add(frame)
138 else: self.unknown_frames.append(frame)
140 self.__fileobj.close()
146 def getall(self, key):
147 """Return all frames with a given name (the list may be empty).
149 This is best explained by examples:
150 id3.getall('TIT2') == [id3['TIT2']]
151 id3.getall('TTTT') == []
152 id3.getall('TXXX') == [TXXX(desc='woo', text='bar'),
153 TXXX(desc='baz', text='quuuux'), ...]
155 Since this is based on the frame's HashKey, which is
156 colon-separated, you can use it to do things like
157 getall('COMM:MusicMatch') or getall('TXXX:QuodLibet:').
159 if key in self: return [self[key]]
162 return [v for s,v in self.items() if s.startswith(key)]
164 def delall(self, key):
165 """Delete all tags of a given kind; see getall."""
166 if key in self: del(self[key])
169 for k in filter(lambda s: s.startswith(key), self.keys()):
172 def setall(self, key, values):
173 """Delete frames of the given type and add frames in 'values'."""
176 self[tag.HashKey] = tag
179 """Return tags in a human-readable format.
181 "Human-readable" is used loosely here. The format is intended
182 to mirror that used for Vorbis or APEv2 output, e.g.
184 However, ID3 frames can have multiple keys:
185 POPM=user@example.org=3 128/255
187 return "\n".join(map(Frame.pprint, self.values()))
189 def loaded_frame(self, tag):
190 """Deprecated; use the add method."""
191 # turn 2.2 into 2.3/2.4 tags
192 if len(type(tag).__name__) == 3: tag = type(tag).__base__(tag)
193 self[tag.HashKey] = tag
195 # add = loaded_frame (and vice versa) break applications that
196 # expect to be able to override loaded_frame (e.g. Quod Libet),
197 # as does making loaded_frame call add.
198 def add(self, frame):
199 """Add a frame to the tag."""
200 return self.loaded_frame(frame)
202 def __load_header(self):
204 data = self.__fullread(10)
205 id3, vmaj, vrev, flags, size = unpack('>3sBBB4s', data)
207 self.size = BitPaddedInt(size) + 10
208 self.version = (2, vmaj, vrev)
211 raise ID3NoHeaderError("'%s' doesn't start with an ID3 tag" % fn)
212 if vmaj not in [2, 3, 4]:
213 raise ID3UnsupportedVersionError("'%s' ID3v2.%d not supported"
217 if (2,4,0) <= self.version and (flags & 0x0f):
218 raise ValueError("'%s' has invalid flags %#02x" % (fn, flags))
219 elif (2,3,0) <= self.version and (flags & 0x1f):
220 raise ValueError("'%s' has invalid flags %#02x" % (fn, flags))
223 if self.version >= (2,4,0):
224 # "Where the 'Extended header size' is the size of the whole
225 # extended header, stored as a 32 bit synchsafe integer."
226 self.__extsize = BitPaddedInt(self.__fullread(4)) - 4
228 # "Where the 'Extended header size', currently 6 or 10 bytes,
230 self.__extsize = unpack('>L', self.__fullread(4))[0]
231 self.__extdata = self.__fullread(self.__extsize)
233 def __determine_bpi(self, data, frames):
234 if self.version < (2,4,0): return int
235 # have to special case whether to use bitpaddedints here
236 # spec says to use them, but iTunes has it wrong
238 # count number of tags found as BitPaddedInt and how far past
241 while o < len(data)-10:
242 name, size, flags = unpack('>4sLH', data[o:o+10])
243 size = BitPaddedInt(size)
245 if name in frames: asbpi += 1
246 bpioff = o - len(data)
248 # count number of tags found as int and how far past
251 while o < len(data)-10:
252 name, size, flags = unpack('>4sLH', data[o:o+10])
254 if name in frames: asint += 1
255 intoff = o - len(data)
257 # if more tags as int, or equal and bpi is past and int is not
258 if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)):
262 def __read_frames(self, data, frames):
263 if self.version < (2,4,0) and self.f_unsynch:
264 try: data = unsynch.decode(data)
265 except ValueError: pass
267 if (2,3,0) <= self.version:
268 bpi = self.__determine_bpi(data, frames)
271 try: name, size, flags = unpack('>4sLH', header)
272 except struct.error: return # not enough header
273 if name.strip('\x00') == '': return
275 framedata = data[10:10+size]
276 data = data[10+size:]
277 if size == 0: continue # drop empty frames
278 try: tag = frames[name]
280 if is_valid_frame_id(name): yield header + framedata
282 try: yield self.__load_framedata(tag, flags, framedata)
283 except NotImplementedError: yield header + framedata
284 except ID3JunkFrameError: pass
286 elif (2,2,0) <= self.version:
289 try: name, size = unpack('>3s3s', header)
290 except struct.error: return # not enough header
291 size, = struct.unpack('>L', '\x00'+size)
292 if name.strip('\x00') == '': return
293 framedata = data[6:6+size]
295 if size == 0: continue # drop empty frames
296 try: tag = frames[name]
298 if is_valid_frame_id(name): yield header + framedata
300 try: yield self.__load_framedata(tag, 0, framedata)
301 except NotImplementedError: yield header + framedata
302 except ID3JunkFrameError: pass
304 def __load_framedata(self, tag, flags, framedata):
305 return tag.fromData(self, flags, framedata)
307 f_unsynch = property(lambda s: bool(s.__flags & 0x80))
308 f_extended = property(lambda s: bool(s.__flags & 0x40))
309 f_experimental = property(lambda s: bool(s.__flags & 0x20))
310 f_footer = property(lambda s: bool(s.__flags & 0x10))
312 #f_crc = property(lambda s: bool(s.__extflags & 0x8000))
314 def save(self, filename=None, v1=1):
315 """Save changes to a file.
317 If no filename is given, the one most recently loaded is used.
320 v1 -- if 0, ID3v1 tags will be removed
321 if 1, ID3v1 tags will be updated but not added
322 if 2, ID3v1 tags will be created and/or updated
324 The lack of a way to update only an ID3v1 tag is intentional.
327 # Sort frames by 'importance'
328 order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"]
329 order = dict(zip(order, range(len(order))))
331 frames = self.items()
332 frames.sort(lambda a, b: cmp(order.get(a[0][:4], last),
333 order.get(b[0][:4], last)))
335 framedata = [self.__save_frame(frame) for (key, frame) in frames]
336 framedata.extend([data for data in self.unknown_frames
340 self.delete(filename)
341 except EnvironmentError, err:
342 from errno import ENOENT
343 if err.errno != ENOENT: raise
346 framedata = ''.join(framedata)
347 framesize = len(framedata)
349 if filename is None: filename = self.filename
350 try: f = open(filename, 'rb+')
352 from errno import ENOENT
353 if err.errno != ENOENT: raise
354 f = open(filename, 'ab') # create, then reopen
355 f = open(filename, 'rb+')
358 try: id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata)
359 except struct.error: id3, insize = '', 0
360 insize = BitPaddedInt(insize)
361 if id3 != 'ID3': insize = -10
363 if insize >= framesize: outsize = insize
364 else: outsize = (framesize + 1023) & ~0x3FF
365 framedata += '\x00' * (outsize - framesize)
367 framesize = BitPaddedInt.to_str(outsize, width=4)
369 header = pack('>3sBBB4s', 'ID3', 4, 0, flags, framesize)
370 data = header + framedata
372 if (insize < outsize):
373 insert_bytes(f, outsize-insize, insize+10)
380 from errno import EINVAL
381 if err.errno != EINVAL: raise
382 f.seek(0, 2) # ensure read won't get "TAG"
384 if f.read(3) == "TAG":
386 if v1 > 0: f.write(MakeID3v1(self))
390 f.write(MakeID3v1(self))
395 def delete(self, filename=None, delete_v1=True, delete_v2=True):
396 """Remove tags from a file.
398 If no filename is given, the one most recently loaded is used.
401 delete_v1 -- delete any ID3v1 tag
402 delete_v2 -- delete any ID3v2 tag
405 filename = self.filename
406 delete(filename, delete_v1, delete_v2)
409 def __save_frame(self, frame):
411 if self.PEDANTIC and isinstance(frame, TextFrame):
412 if len(str(frame)) == 0: return ''
413 framedata = frame._writeData()
414 usize = len(framedata)
416 framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib')
417 flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN
418 datasize = BitPaddedInt.to_str(len(framedata), width=4)
419 header = pack('>4s4sH', type(frame).__name__, datasize, flags)
420 return header + framedata
422 def update_to_v24(self):
423 """Convert older tags into an ID3v2.4 tag.
425 This updates old ID3v2 frames to ID3v2.4 ones (e.g. TYER to
426 TDRC). If you intend to save tags, you must call this function
427 at some point; it is called by default when loading the tag.
430 if self.version < (2,3,0): del self.unknown_frames[:]
433 # TDAT, TYER, and TIME have been turned into TDRC.
435 if str(self.get("TYER", "")).strip("\x00"):
436 date = str(self.pop("TYER"))
437 if str(self.get("TDAT", "")).strip("\x00"):
438 dat = str(self.pop("TDAT"))
439 date = "%s-%s-%s" % (date, dat[2:], dat[:2])
440 if str(self.get("TIME", "")).strip("\x00"):
441 time = str(self.pop("TIME"))
442 date += "T%s:%s:00" % (time[:2], time[2:])
443 if "TDRC" not in self:
444 self.add(TDRC(encoding=0, text=date))
445 except UnicodeDecodeError:
446 # Old ID3 tags have *lots* of Unicode problems, so if TYER
447 # is bad, just chuck the frames.
450 # TORY can be the first part of a TDOR.
453 if "TDOR" not in self:
455 self.add(TDOR(encoding=0, text=str(f)))
456 except UnicodeDecodeError:
462 if "TIPL" not in self:
463 self.add(TIPL(encoding=f.encoding, people=f.people))
466 # Get rid of "(xx)Foobr" format.
467 self["TCON"].genres = self["TCON"].genres
469 if self.version < (2, 3):
470 # ID3v2.2 PIC frames are slightly different.
471 pics = self.getall("APIC")
472 mimes = { "PNG": "image/png", "JPG": "image/jpeg" }
476 encoding=pic.encoding, mime=mimes.get(pic.mime, pic.mime),
477 type=pic.type, desc=pic.desc, data=pic.data)
480 # ID3v2.2 LNK frames are just way too different to upgrade.
483 # These can't be trivially translated to any ID3v2.4 tags, or
484 # should have been removed already.
485 for key in ["RVAD", "EQUA", "TRDA", "TSIZ", "TDAT", "TIME", "CRM"]:
486 if key in self: del(self[key])
488 def delete(filename, delete_v1=True, delete_v2=True):
489 """Remove tags from a file.
492 delete_v1 -- delete any ID3v1 tag
493 delete_v2 -- delete any ID3v2 tag
496 f = open(filename, 'rb+')
503 if f.read(3) == "TAG":
507 # technically an insize=0 tag is invalid, but we delete it anyway
508 # (primarily because we used to write it)
512 try: id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata)
513 except struct.error: id3, insize = '', -1
514 insize = BitPaddedInt(insize)
515 if id3 == 'ID3' and insize >= 0:
516 delete_bytes(f, insize + 10, 0)
518 class BitPaddedInt(int):
519 def __new__(cls, value, bits=7, bigendian=True):
520 "Strips 8-bits bits out of every byte"
522 if isinstance(value, (int, long)):
525 bytes.append(value & ((1<<bits)-1))
527 if isinstance(value, str):
528 bytes = [ord(byte) & mask for byte in value]
529 if bigendian: bytes.reverse()
531 for shift, byte in zip(range(0, len(bytes)*bits, bits), bytes):
532 numeric_value += byte << shift
533 if isinstance(numeric_value, long):
534 self = long.__new__(BitPaddedLong, numeric_value)
536 self = int.__new__(BitPaddedInt, numeric_value)
538 self.bigendian = bigendian
541 def as_str(value, bits=7, bigendian=True, width=4):
542 bits = getattr(value, 'bits', bits)
543 bigendian = getattr(value, 'bigendian', bigendian)
548 bytes.append(value & mask)
549 value = value >> bits
550 # PCNT and POPM use growing integers of at least 4 bytes as counters.
551 if width == -1: width = max(4, len(bytes))
552 if len(bytes) > width:
553 raise ValueError, 'Value too wide (%d bytes)' % len(bytes)
554 else: bytes.extend([0] * (width-len(bytes)))
555 if bigendian: bytes.reverse()
556 return ''.join(map(chr, bytes))
557 to_str = staticmethod(as_str)
559 class BitPaddedLong(long):
560 def as_str(value, bits=7, bigendian=True, width=4):
561 return BitPaddedInt.to_str(value, bits, bigendian, width)
562 to_str = staticmethod(as_str)
564 class unsynch(object):
568 append = output.append
574 if val >= '\xE0': raise ValueError('invalid sync-safe string')
575 elif val != '\x00': append(val)
577 if not safe: raise ValueError('string ended unsafe')
578 return ''.join(output)
579 decode = staticmethod(decode)
584 append = output.append
588 if val == '\xFF': safe = False
589 elif val == '\x00' or val >= '\xE0':
596 if not safe: append('\x00')
597 return ''.join(output)
598 encode = staticmethod(encode)
601 def __init__(self, name): self.name = name
602 def __hash__(self): raise TypeError("Spec objects are unhashable")
604 class ByteSpec(Spec):
605 def read(self, frame, data): return ord(data[0]), data[1:]
606 def write(self, frame, value): return chr(value)
607 def validate(self, frame, value): return value
609 class IntegerSpec(Spec):
610 def read(self, frame, data):
611 return int(BitPaddedInt(data, bits=8)), ''
612 def write(self, frame, value):
613 return BitPaddedInt.to_str(value, bits=8, width=-1)
614 def validate(self, frame, value):
617 class SizedIntegerSpec(Spec):
618 def __init__(self, name, size):
619 self.name, self.__sz = name, size
620 def read(self, frame, data):
621 return int(BitPaddedInt(data[:self.__sz], bits=8)), data[self.__sz:]
622 def write(self, frame, value):
623 return BitPaddedInt.to_str(value, bits=8, width=self.__sz)
624 def validate(self, frame, value):
627 class EncodingSpec(ByteSpec):
628 def read(self, frame, data):
629 enc, data = super(EncodingSpec, self).read(frame, data)
630 if enc < 16: return enc, data
631 else: return 0, chr(enc)+data
633 def validate(self, frame, value):
634 if 0 <= value <= 3: return value
635 if value is None: return None
636 raise ValueError, 'Invalid Encoding: %r' % value
638 class StringSpec(Spec):
639 def __init__(self, name, length):
640 super(StringSpec, self).__init__(name)
642 def read(s, frame, data): return data[:s.len], data[s.len:]
643 def write(s, frame, value):
644 if value is None: return '\x00' * s.len
645 else: return (str(value) + '\x00' * s.len)[:s.len]
646 def validate(s, frame, value):
647 if value is None: return None
648 if isinstance(value, basestring) and len(value) == s.len: return value
649 raise ValueError, 'Invalid StringSpec[%d] data: %r' % (s.len, value)
651 class BinaryDataSpec(Spec):
652 def read(self, frame, data): return data, ''
653 def write(self, frame, value): return str(value)
654 def validate(self, frame, value): return str(value)
656 class EncodedTextSpec(Spec):
657 # Okay, seriously. This is private and defined explicitly and
658 # completely by the ID3 specification. You can't just add
659 # encodings here however you want.
660 _encodings = ( ('latin1', '\x00'), ('utf16', '\x00\x00'),
661 ('utf_16_be', '\x00\x00'), ('utf8', '\x00') )
663 def read(self, frame, data):
664 enc, term = self._encodings[frame.encoding]
668 data, ret = data.split(term, 1)
673 offset = data.index(term, offset+1)
674 if offset & 1: continue
675 data, ret = data[0:offset], data[offset+2:]; break
676 except ValueError: pass
678 if len(data) < len(term): return u'', ret
679 return data.decode(enc), ret
681 def write(self, frame, value):
682 enc, term = self._encodings[frame.encoding]
683 return value.encode(enc) + term
685 def validate(self, frame, value): return unicode(value)
687 class MultiSpec(Spec):
688 def __init__(self, name, *specs, **kw):
689 super(MultiSpec, self).__init__(name)
691 self.sep = kw.get('sep')
693 def read(self, frame, data):
697 for spec in self.specs:
698 value, data = spec.read(frame, data)
700 if len(self.specs) != 1: values.append(record)
701 else: values.append(record[0])
704 def write(self, frame, value):
706 if len(self.specs) == 1:
708 data.append(self.specs[0].write(frame, v))
711 for v, s in zip(record, self.specs):
712 data.append(s.write(frame, v))
715 def validate(self, frame, value):
716 if value is None: return []
717 if self.sep and isinstance(value, basestring):
718 value = value.split(self.sep)
719 if isinstance(value, list):
720 if len(self.specs) == 1:
721 return [self.specs[0].validate(frame, v) for v in value]
724 [s.validate(frame, v) for (v,s) in zip(val, self.specs)]
726 raise ValueError, 'Invalid MultiSpec data: %r' % value
728 class EncodedNumericTextSpec(EncodedTextSpec): pass
729 class EncodedNumericPartTextSpec(EncodedTextSpec): pass
731 class Latin1TextSpec(EncodedTextSpec):
732 def read(self, frame, data):
733 if '\x00' in data: data, ret = data.split('\x00',1)
735 return data.decode('latin1'), ret
737 def write(self, data, value):
738 return value.encode('latin1') + '\x00'
740 def validate(self, frame, value): return unicode(value)
742 class ID3TimeStamp(object):
743 """A time stamp in ID3v2 format.
745 This is a restricted form of the ISO 8601 standard; time stamps
748 Or some partial form (YYYY-MM-DD HH, YYYY, etc.).
750 The 'text' attribute contains the raw text data of the time stamp.
754 def __init__(self, text):
755 if isinstance(text, ID3TimeStamp): text = text.text
758 __formats = ['%04d'] + ['%02d'] * 5
759 __seps = ['-', '-', ' ', ':', ':', 'x']
761 parts = [self.year, self.month, self.day,
762 self.hour, self.minute, self.second]
764 for i, part in enumerate(iter(iter(parts).next, None)):
765 pieces.append(self.__formats[i]%part + self.__seps[i])
766 return u''.join(pieces)[:-1]
768 def set_text(self, text, splitre=re.compile('[-T:/.]|\s+')):
769 year, month, day, hour, minute, second = \
770 splitre.split(text + ':::::')[:6]
771 for a in 'year month day hour minute second'.split():
772 try: v = int(locals()[a])
773 except ValueError: v = None
776 text = property(get_text, set_text, doc="ID3v2.4 date and time.")
778 def __str__(self): return self.text
779 def __repr__(self): return repr(self.text)
780 def __cmp__(self, other): return cmp(self.text, other.text)
781 def encode(self, *args): return self.text.encode(*args)
783 class TimeStampSpec(EncodedTextSpec):
784 def read(self, frame, data):
785 value, data = super(TimeStampSpec, self).read(frame, data)
786 return self.validate(frame, value), data
788 def write(self, frame, data):
789 return super(TimeStampSpec, self).write(frame,
790 data.text.replace(' ', 'T'))
792 def validate(self, frame, value):
793 try: return ID3TimeStamp(value)
794 except TypeError: raise ValueError, "Invalid ID3TimeStamp: %r" % value
796 class ChannelSpec(ByteSpec):
797 (OTHER, MASTER, FRONTRIGHT, FRONTLEFT, BACKRIGHT, BACKLEFT, FRONTCENTRE,
798 BACKCENTRE, SUBWOOFER) = range(9)
800 class VolumeAdjustmentSpec(Spec):
801 def read(self, frame, data):
802 value, = unpack('>h', data[0:2])
803 return value/512.0, data[2:]
805 def write(self, frame, value):
806 return pack('>h', int(round(value * 512)))
808 def validate(self, frame, value): return value
810 class VolumePeakSpec(Spec):
811 def read(self, frame, data):
812 # http://bugs.xmms.org/attachment.cgi?id=113&action=view
815 bytes = min(4, (bits + 7) >> 3)
816 # not enough frame data
817 if bytes + 1 > len(data): raise ID3JunkFrameError
818 shift = ((8 - (bits & 7)) & 7) + (4 - bytes) * 8
819 for i in range(1, bytes+1):
823 return (float(peak) / (2**31-1)), data[1+bytes:]
825 def write(self, frame, value):
826 # always write as 16 bits for sanity.
827 return "\x10" + pack('>H', int(round(value * 32768)))
829 def validate(self, frame, value): return value
831 class SynchronizedTextSpec(EncodedTextSpec):
832 def read(self, frame, data):
834 encoding, term = self._encodings[frame.encoding]
837 value_idx = data.index(term)
838 value = data[:value_idx].decode(encoding)
839 time, = struct.unpack(">I", data[value_idx+l:value_idx+l+4])
840 texts.append((value, time))
841 data = data[value_idx+l+4:]
844 def write(self, frame, value):
846 encoding, term = self._encodings[frame.encoding]
847 for text, time in frame.text:
848 text = text.encode(encoding) + term
849 data.append(text + struct.pack(">I", time))
852 def validate(self, frame, value):
855 class KeyEventSpec(Spec):
856 def read(self, frame, data):
858 while len(data) >= 5:
859 events.append(struct.unpack(">bI", data[:5]))
863 def write(self, frame, value):
864 return "".join([struct.pack(">bI", *event) for event in value])
866 def validate(self, frame, value):
869 class VolumeAdjustmentsSpec(Spec):
870 # Not to be confused with VolumeAdjustmentSpec.
871 def read(self, frame, data):
873 while len(data) >= 4:
874 freq, adj = struct.unpack(">Hh", data[:4])
878 adjustments[freq] = adj
879 adjustments = adjustments.items()
881 return adjustments, data
883 def write(self, frame, value):
885 return "".join([struct.pack(">Hh", int(freq * 2), int(adj * 512))
886 for (freq, adj) in value])
888 def validate(self, frame, value):
891 class ASPIIndexSpec(Spec):
892 def read(self, frame, data):
900 warn("invalid bit count in ASPI (%d)" % frame.b, ID3Warning)
903 indexes = data[:frame.N * size]
904 data = data[frame.N * size:]
905 return list(struct.unpack(">" + format * frame.N, indexes)), data
907 def write(self, frame, values):
908 if frame.b == 16: format = "H"
909 elif frame.b == 8: format = "B"
910 else: raise ValueError("frame.b must be 8 or 16")
911 return struct.pack(">" + format * frame.N, *values)
913 def validate(self, frame, values):
917 """Fundamental unit of ID3 data.
919 ID3 tags are split into frames. Each frame has a potentially
920 different structure, and so this base class is not very featureful.
923 FLAG23_ALTERTAG = 0x8000
924 FLAG23_ALTERFILE = 0x4000
925 FLAG23_READONLY = 0x2000
926 FLAG23_COMPRESS = 0x0080
927 FLAG23_ENCRYPT = 0x0040
928 FLAG23_GROUP = 0x0020
930 FLAG24_ALTERTAG = 0x4000
931 FLAG24_ALTERFILE = 0x2000
932 FLAG24_READONLY = 0x1000
933 FLAG24_GROUPID = 0x0040
934 FLAG24_COMPRESS = 0x0008
935 FLAG24_ENCRYPT = 0x0004
936 FLAG24_UNSYNCH = 0x0002
937 FLAG24_DATALEN = 0x0001
940 def __init__(self, *args, **kwargs):
941 if len(args)==1 and len(kwargs)==0 and isinstance(args[0], type(self)):
943 for checker in self._framespec:
944 val = checker.validate(self, getattr(other, checker.name))
945 setattr(self, checker.name, val)
947 for checker, val in zip(self._framespec, args):
948 setattr(self, checker.name, checker.validate(self, val))
949 for checker in self._framespec[len(args):]:
950 validated = checker.validate(
951 self, kwargs.get(checker.name, None))
952 setattr(self, checker.name, validated)
956 doc="an internal key used to ensure frame uniqueness in a tag")
958 lambda s: type(s).__name__,
959 doc="ID3v2 three or four character frame ID")
962 """Python representation of a frame.
964 The string returned is a valid Python expression to construct
965 a copy of this frame.
968 for attr in self._framespec:
969 kw.append('%s=%r' % (attr.name, getattr(self, attr.name)))
970 return '%s(%s)' % (type(self).__name__, ', '.join(kw))
972 def _readData(self, data):
974 for reader in self._framespec:
976 try: value, data = reader.read(self, data)
977 except UnicodeDecodeError:
978 raise ID3JunkFrameError
979 else: raise ID3JunkFrameError
980 setattr(self, reader.name, value)
981 if data.strip('\x00'):
982 warn('Leftover data: %s: %r (from %r)' % (
983 type(self).__name__, data, odata),
986 def _writeData(self):
988 for writer in self._framespec:
989 data.append(writer.write(self, getattr(self, writer.name)))
993 """Return a human-readable representation of the frame."""
994 return "%s=%s" % (type(self).__name__, self._pprint())
997 return "[unrepresentable data]"
999 def fromData(cls, id3, tflags, data):
1000 """Construct this ID3 frame from raw string data."""
1002 if (2,4,0) <= id3.version:
1003 if tflags & (Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN):
1004 # The data length int is syncsafe in 2.4 (but not 2.3).
1005 # However, we don't actually need the data length int,
1006 # except to work around a QL 0.12 bug, and in that case
1007 # all we need are the raw bytes.
1008 datalen_bytes = data[:4]
1010 if tflags & Frame.FLAG24_UNSYNCH or id3.f_unsynch:
1011 try: data = unsynch.decode(data)
1012 except ValueError, err:
1014 raise ID3BadUnsynchData, '%s: %r' % (err, data)
1015 if tflags & Frame.FLAG24_ENCRYPT:
1016 raise ID3EncryptionUnsupportedError
1017 if tflags & Frame.FLAG24_COMPRESS:
1018 try: data = data.decode('zlib')
1019 except zlibError, err:
1020 # the initial mutagen that went out with QL 0.12 did not
1021 # write the 4 bytes of uncompressed size. Compensate.
1022 data = datalen_bytes + data
1023 try: data = data.decode('zlib')
1024 except zlibError, err:
1026 raise ID3BadCompressedData, '%s: %r' % (err, data)
1028 elif (2,3,0) <= id3.version:
1029 if tflags & Frame.FLAG23_COMPRESS:
1030 usize, = unpack('>L', data[:4])
1032 if tflags & Frame.FLAG23_ENCRYPT:
1033 raise ID3EncryptionUnsupportedError
1034 if tflags & Frame.FLAG23_COMPRESS:
1035 try: data = data.decode('zlib')
1036 except zlibError, err:
1038 raise ID3BadCompressedData, '%s: %r' % (err, data)
1041 frame._rawdata = data
1042 frame._flags = tflags
1043 frame._readData(data)
1045 fromData = classmethod(fromData)
1048 raise TypeError("Frame objects are unhashable")
1050 class FrameOpt(Frame):
1051 """A frame with optional parts.
1053 Some ID3 frames have optional data; this class extends Frame to
1054 provide support for those parts.
1058 def __init__(self, *args, **kwargs):
1059 super(FrameOpt, self).__init__(*args, **kwargs)
1060 for spec in self._optionalspec:
1061 if spec.name in kwargs:
1062 validated = spec.validate(self, kwargs[spec.name])
1063 setattr(self, spec.name, validated)
1066 def _readData(self, data):
1068 for reader in self._framespec:
1069 if len(data): value, data = reader.read(self, data)
1070 else: raise ID3JunkFrameError
1071 setattr(self, reader.name, value)
1073 for reader in self._optionalspec:
1074 if len(data): value, data = reader.read(self, data)
1076 setattr(self, reader.name, value)
1077 if data.strip('\x00'):
1078 warn('Leftover data: %s: %r (from %r)' % (
1079 type(self).__name__, data, odata),
1082 def _writeData(self):
1084 for writer in self._framespec:
1085 data.append(writer.write(self, getattr(self, writer.name)))
1086 for writer in self._optionalspec:
1087 try: data.append(writer.write(self, getattr(self, writer.name)))
1088 except AttributeError: break
1089 return ''.join(data)
1093 for attr in self._framespec:
1094 kw.append('%s=%r' % (attr.name, getattr(self, attr.name)))
1095 for attr in self._optionalspec:
1096 if hasattr(self, attr.name):
1097 kw.append('%s=%r' % (attr.name, getattr(self, attr.name)))
1098 return '%s(%s)' % (type(self).__name__, ', '.join(kw))
1101 class TextFrame(Frame):
1104 Text frames support casts to unicode or str objects, as well as
1105 list-like indexing, extend, and append.
1107 Iterating over a TextFrame iterates over its strings, not its
1110 Text frames have a 'text' attribute which is the list of strings,
1111 and an 'encoding' attribute; 0 for ISO-8859 1, 1 UTF-16, 2 for
1112 UTF-16BE, and 3 for UTF-8. If you don't want to worry about
1113 encodings, just set it to 3.
1116 _framespec = [ EncodingSpec('encoding'),
1117 MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000') ]
1118 def __str__(self): return self.__unicode__().encode('utf-8')
1119 def __unicode__(self): return u'\u0000'.join(self.text)
1120 def __eq__(self, other):
1121 if isinstance(other, str): return str(self) == other
1122 elif isinstance(other, unicode):
1123 return u'\u0000'.join(self.text) == other
1124 return self.text == other
1125 def __getitem__(self, item): return self.text[item]
1126 def __iter__(self): return iter(self.text)
1127 def append(self, value): return self.text.append(value)
1128 def extend(self, value): return self.text.extend(value)
1129 def _pprint(self): return " / ".join(self.text)
1131 class NumericTextFrame(TextFrame):
1132 """Numerical text strings.
1134 The numeric value of these frames can be gotten with unary plus, e.g.
1135 frame = TLEN('12345')
1139 _framespec = [ EncodingSpec('encoding'),
1140 MultiSpec('text', EncodedNumericTextSpec('text'), sep=u'\u0000') ]
1143 """Return the numerical value of the string."""
1144 return int(self.text[0])
1146 class NumericPartTextFrame(TextFrame):
1147 """Multivalue numerical text strings.
1149 These strings indicate 'part (e.g. track) X of Y', and unary plus
1150 returns the first value:
1151 frame = TRCK('4/15')
1152 track = +frame # track == 4
1155 _framespec = [ EncodingSpec('encoding'),
1156 MultiSpec('text', EncodedNumericPartTextSpec('text'), sep=u'\u0000') ]
1158 return int(self.text[0].split("/")[0])
1160 class TimeStampTextFrame(TextFrame):
1161 """A list of time stamps.
1163 The 'text' attribute in this frame is a list of ID3TimeStamp
1164 objects, not a list of strings.
1167 _framespec = [ EncodingSpec('encoding'),
1168 MultiSpec('text', TimeStampSpec('stamp'), sep=u',') ]
1169 def __str__(self): return self.__unicode__().encode('utf-8')
1170 def __unicode__(self): return ','.join([stamp.text for stamp in self.text])
1172 return " / ".join([stamp.text for stamp in self.text])
1174 class UrlFrame(Frame):
1175 """A frame containing a URL string.
1177 The ID3 specification is silent about IRIs and normalized URL
1178 forms. Mutagen assumes all URLs in files are encoded as Latin 1,
1179 but string conversion of this frame returns a UTF-8 representation
1180 for compatibility with other string conversions.
1182 The only sane way to handle URLs in MP3s is to restrict them to
1186 _framespec = [ Latin1TextSpec('url') ]
1187 def __str__(self): return self.url.encode('utf-8')
1188 def __unicode__(self): return self.url
1189 def __eq__(self, other): return self.url == other
1190 def _pprint(self): return self.url
1192 class UrlFrameU(UrlFrame):
1193 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.url))
1195 class TALB(TextFrame): "Album"
1196 class TBPM(NumericTextFrame): "Beats per minute"
1197 class TCOM(TextFrame): "Composer"
1199 class TCON(TextFrame):
1200 """Content type (Genre)
1202 ID3 has several ways genres can be represented; for convenience,
1203 use the 'genres' property rather than the 'text' attribute.
1206 from mutagen._constants import GENRES
1208 def __get_genres(self):
1211 genre_re = re.compile(r"((?:\((?P<id>[0-9]+|RX|CR)\))*)(?P<str>.+)?")
1212 for value in self.text:
1214 try: genres.append(self.GENRES[int(value)])
1215 except IndexError: genres.append(u"Unknown")
1216 elif value == "CR": genres.append(u"Cover")
1217 elif value == "RX": genres.append(u"Remix")
1220 genreid, dummy, genrename = genre_re.match(value).groups()
1223 for gid in genreid[1:-1].split(")("):
1224 if gid.isdigit() and int(gid) < len(self.GENRES):
1225 gid = unicode(self.GENRES[int(gid)])
1226 newgenres.append(gid)
1227 elif gid == "CR": newgenres.append(u"Cover")
1228 elif gid == "RX": newgenres.append(u"Remix")
1229 else: newgenres.append(u"Unknown")
1232 # "Unescaping" the first parenthesis
1233 if genrename.startswith("(("): genrename = genrename[1:]
1234 if genrename not in newgenres: newgenres.append(genrename)
1236 genres.extend(newgenres)
1240 def __set_genres(self, genres):
1241 if isinstance(genres, basestring): genres = [genres]
1242 self.text = map(self.__decode, genres)
1244 def __decode(self, value):
1245 if isinstance(value, str):
1246 enc = EncodedTextSpec._encodings[self.encoding][0]
1247 return value.decode(enc)
1250 genres = property(__get_genres, __set_genres, None,
1251 "A list of genres parsed from the raw text data.")
1254 return " / ".join(self.genres)
1256 class TCOP(TextFrame): "Copyright (c)"
1257 class TCMP(NumericTextFrame): "iTunes Compilation Flag"
1258 class TDAT(TextFrame): "Date of recording (DDMM)"
1259 class TDEN(TimeStampTextFrame): "Encoding Time"
1260 class TDOR(TimeStampTextFrame): "Original Release Time"
1261 class TDLY(NumericTextFrame): "Audio Delay (ms)"
1262 class TDRC(TimeStampTextFrame): "Recording Time"
1263 class TDRL(TimeStampTextFrame): "Release Time"
1264 class TDTG(TimeStampTextFrame): "Tagging Time"
1265 class TENC(TextFrame): "Encoder"
1266 class TEXT(TextFrame): "Lyricist"
1267 class TFLT(TextFrame): "File type"
1268 class TIME(TextFrame): "Time of recording (HHMM)"
1269 class TIT1(TextFrame): "Content group description"
1270 class TIT2(TextFrame): "Title"
1271 class TIT3(TextFrame): "Subtitle/Description refinement"
1272 class TKEY(TextFrame): "Starting Key"
1273 class TLAN(TextFrame): "Audio Languages"
1274 class TLEN(NumericTextFrame): "Audio Length (ms)"
1275 class TMED(TextFrame): "Source Media Type"
1276 class TMOO(TextFrame): "Mood"
1277 class TOAL(TextFrame): "Original Album"
1278 class TOFN(TextFrame): "Original Filename"
1279 class TOLY(TextFrame): "Original Lyricist"
1280 class TOPE(TextFrame): "Original Artist/Performer"
1281 class TORY(NumericTextFrame): "Original Release Year"
1282 class TOWN(TextFrame): "Owner/Licensee"
1283 class TPE1(TextFrame): "Lead Artist/Performer/Soloist/Group"
1284 class TPE2(TextFrame): "Band/Orchestra/Accompaniment"
1285 class TPE3(TextFrame): "Conductor"
1286 class TPE4(TextFrame): "Interpreter/Remixer/Modifier"
1287 class TPOS(NumericPartTextFrame): "Part of set"
1288 class TPRO(TextFrame): "Produced (P)"
1289 class TPUB(TextFrame): "Publisher"
1290 class TRCK(NumericPartTextFrame): "Track Number"
1291 class TRDA(TextFrame): "Recording Dates"
1292 class TRSN(TextFrame): "Internet Radio Station Name"
1293 class TRSO(TextFrame): "Internet Radio Station Owner"
1294 class TSIZ(NumericTextFrame): "Size of audio data (bytes)"
1295 class TSOA(TextFrame): "Album Sort Order key"
1296 class TSOP(TextFrame): "Perfomer Sort Order key"
1297 class TSOT(TextFrame): "Title Sort Order key"
1298 class TSRC(TextFrame): "International Standard Recording Code (ISRC)"
1299 class TSSE(TextFrame): "Encoder settings"
1300 class TSST(TextFrame): "Set Subtitle"
1301 class TYER(NumericTextFrame): "Year of recording"
1303 class TXXX(TextFrame):
1304 """User-defined text data.
1306 TXXX frames have a 'desc' attribute which is set to any Unicode
1307 value (though the encoding of the text and the description must be
1308 the same). Many taggers use this frame to store freeform keys.
1310 _framespec = [ EncodingSpec('encoding'), EncodedTextSpec('desc'),
1311 MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000') ]
1312 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1313 def _pprint(self): return "%s=%s" % (self.desc, " / ".join(self.text))
1315 class WCOM(UrlFrameU): "Commercial Information"
1316 class WCOP(UrlFrame): "Copyright Information"
1317 class WOAF(UrlFrame): "Official File Information"
1318 class WOAR(UrlFrameU): "Official Artist/Performer Information"
1319 class WOAS(UrlFrame): "Official Source Information"
1320 class WORS(UrlFrame): "Official Internet Radio Information"
1321 class WPAY(UrlFrame): "Payment Information"
1322 class WPUB(UrlFrame): "Official Publisher Information"
1324 class WXXX(UrlFrame):
1325 """User-defined URL data.
1327 Like TXXX, this has a freeform description associated with it.
1329 _framespec = [ EncodingSpec('encoding'), EncodedTextSpec('desc'),
1330 Latin1TextSpec('url') ]
1331 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1333 class PairedTextFrame(Frame):
1334 """Paired text strings.
1336 Some ID3 frames pair text strings, to associate names with a more
1337 specific involvement in the song. The 'people' attribute of these
1338 frames contains a list of pairs:
1339 [['trumpet', 'Miles Davis'], ['bass', 'Paul Chambers']]
1341 Like text frames, these frames also have an encoding attribute.
1344 _framespec = [ EncodingSpec('encoding'), MultiSpec('people',
1345 EncodedTextSpec('involvement'), EncodedTextSpec('person')) ]
1346 def __eq__(self, other):
1347 return self.people == other
1349 class TIPL(PairedTextFrame): "Involved People List"
1350 class TMCL(PairedTextFrame): "Musicians Credits List"
1351 class IPLS(TIPL): "Involved People List"
1354 """Binary dump of CD's TOC.
1356 The 'data' attribute contains the raw byte string.
1358 _framespec = [ BinaryDataSpec('data') ]
1359 def __eq__(self, other): return self.data == other
1362 """Event timing codes."""
1363 _framespec = [ ByteSpec("format"), KeyEventSpec("events") ]
1364 def __eq__(self, other): return self.events == other
1367 """MPEG location lookup table.
1369 This frame's attributes may be changed in the future based on
1370 feedback from real-world use.
1372 _framespec = [ SizedIntegerSpec('frames', 2),
1373 SizedIntegerSpec('bytes', 3),
1374 SizedIntegerSpec('milliseconds', 3),
1375 ByteSpec('bits_for_bytes'),
1376 ByteSpec('bits_for_milliseconds'),
1377 BinaryDataSpec('data') ]
1378 def __eq__(self, other): return self.data == other
1381 """Synchronised tempo codes.
1383 This frame's attributes may be changed in the future based on
1384 feedback from real-world use.
1386 _framespec = [ ByteSpec("format"), BinaryDataSpec("data") ]
1387 def __eq__(self, other): return self.data == other
1390 """Unsynchronised lyrics/text transcription.
1392 Lyrics have a three letter ISO language code ('lang'), a
1393 description ('desc'), and a block of plain text ('text').
1396 _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3),
1397 EncodedTextSpec('desc'), EncodedTextSpec('text') ]
1398 HashKey = property(lambda s: '%s:%s:%r' % (s.FrameID, s.desc, s.lang))
1400 def __str__(self): return self.text.encode('utf-8')
1401 def __unicode__(self): return self.text
1402 def __eq__(self, other): return self.text == other
1405 """Synchronised lyrics/text."""
1407 _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3),
1408 ByteSpec('format'), ByteSpec('type'), EncodedTextSpec('desc'),
1409 SynchronizedTextSpec('text') ]
1410 HashKey = property(lambda s: '%s:%s:%r' % (s.FrameID, s.desc, s.lang))
1412 def __eq__(self, other):
1413 return str(self) == other
1416 return "".join([text for (text, time) in self.text]).encode('utf-8')
1418 class COMM(TextFrame):
1421 User comment frames have a descrption, like TXXX, and also a three
1422 letter ISO language code in the 'lang' attribute.
1424 _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3),
1425 EncodedTextSpec('desc'),
1426 MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000') ]
1427 HashKey = property(lambda s: '%s:%s:%r' % (s.FrameID, s.desc, s.lang))
1428 def _pprint(self): return "%s=%r=%s" % (
1429 self.desc, self.lang, " / ".join(self.text))
1432 """Relative volume adjustment (2).
1434 This frame is used to implemented volume scaling, and in
1435 particular, normalization using ReplayGain.
1438 desc -- description or context of this adjustment
1439 channel -- audio channel to adjust (master is 1)
1440 gain -- a + or - dB gain relative to some reference level
1441 peak -- peak of the audio as a floating point number, [0, 1]
1443 When storing ReplayGain tags, use descriptions of 'album' and
1444 'track' on channel 1.
1447 _framespec = [ Latin1TextSpec('desc'), ChannelSpec('channel'),
1448 VolumeAdjustmentSpec('gain'), VolumePeakSpec('peak') ]
1449 _channels = ["Other", "Master volume", "Front right", "Front left",
1450 "Back right", "Back left", "Front centre", "Back centre",
1452 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1454 def __eq__(self, other):
1455 return ((str(self) == other) or
1456 (self.desc == other.desc and
1457 self.channel == other.channel and
1458 self.gain == other.gain and
1459 self.peak == other.peak))
1462 return "%s: %+0.4f dB/%0.4f" % (
1463 self._channels[self.channel], self.gain, self.peak)
1466 """Equalisation (2).
1469 method -- interpolation method (0 = band, 1 = linear)
1470 desc -- identifying description
1471 adjustments -- list of (frequency, vol_adjustment) pairs
1473 _framespec = [ ByteSpec("method"), Latin1TextSpec("desc"),
1474 VolumeAdjustmentsSpec("adjustments") ]
1475 def __eq__(self, other): return self.adjustments == other
1476 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1478 # class RVAD: unsupported
1479 # class EQUA: unsupported
1483 _framespec = [ SizedIntegerSpec('left', 2), SizedIntegerSpec('right', 2),
1484 ByteSpec('bounce_left'), ByteSpec('bounce_right'),
1485 ByteSpec('feedback_ltl'), ByteSpec('feedback_ltr'),
1486 ByteSpec('feedback_rtr'), ByteSpec('feedback_rtl'),
1487 ByteSpec('premix_ltr'), ByteSpec('premix_rtl') ]
1489 def __eq__(self, other): return (self.left, self.right) == other
1492 """Attached (or linked) Picture.
1495 encoding -- text encoding for the description
1496 mime -- a MIME type (e.g. image/jpeg) or '-->' if the data is a URI
1497 type -- the source of the image (3 is the album front cover)
1498 desc -- a text description of the image
1499 data -- raw image data, as a byte string
1501 Mutagen will automatically compress large images when saving tags.
1503 _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('mime'),
1504 ByteSpec('type'), EncodedTextSpec('desc'), BinaryDataSpec('data') ]
1505 def __eq__(self, other): return self.data == other
1506 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1508 return "%s (%s, %d bytes)" % (
1509 self.desc, self.mime, len(self.data))
1514 The 'count' attribute contains the (recorded) number of times this
1515 file has been played.
1517 This frame is basically obsoleted by POPM.
1519 _framespec = [ IntegerSpec('count') ]
1521 def __eq__(self, other): return self.count == other
1522 def __pos__(self): return self.count
1523 def _pprint(self): return unicode(self.count)
1528 This frame keys a rating (out of 255) and a play count to an email
1532 email -- email this POPM frame is for
1533 rating -- rating from 0 to 255
1534 count -- number of times the files has been played
1536 _framespec = [ Latin1TextSpec('email'), ByteSpec('rating'),
1537 IntegerSpec('count') ]
1538 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.email))
1540 def __eq__(self, other): return self.rating == other
1541 def __pos__(self): return self.rating
1542 def _pprint(self): return "%s=%s %s/255" % (
1543 self.email, self.count, self.rating)
1546 """General Encapsulated Object.
1548 A blob of binary data, that is not a picture (those go in APIC).
1551 encoding -- encoding of the description
1552 mime -- MIME type of the data or '-->' if the data is a URI
1553 filename -- suggested filename if extracted
1554 desc -- text description of the data
1555 data -- raw data, as a byte string
1557 _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('mime'),
1558 EncodedTextSpec('filename'), EncodedTextSpec('desc'),
1559 BinaryDataSpec('data') ]
1560 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.desc))
1562 def __eq__(self, other): return self.data == other
1564 class RBUF(FrameOpt):
1565 """Recommended buffer size.
1568 size -- recommended buffer size in bytes
1569 info -- if ID3 tags may be elsewhere in the file (optional)
1570 offset -- the location of the next ID3 tag, if any
1572 Mutagen will not find the next tag itself.
1574 _framespec = [ SizedIntegerSpec('size', 3) ]
1575 _optionalspec = [ ByteSpec('info'), SizedIntegerSpec('offset', 4) ]
1577 def __eq__(self, other): return self.size == other
1578 def __pos__(self): return self.size
1580 class AENC(FrameOpt):
1581 """Audio encryption.
1584 owner -- key identifying this encryption type
1585 preview_start -- unencrypted data block offset
1586 preview_length -- number of unencrypted blocks
1587 data -- data required for decryption (optional)
1589 Mutagen cannot decrypt files.
1591 _framespec = [ Latin1TextSpec('owner'),
1592 SizedIntegerSpec('preview_start', 2),
1593 SizedIntegerSpec('preview_length', 2) ]
1594 _optionalspec = [ BinaryDataSpec('data') ]
1595 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.owner))
1597 def __str__(self): return self.owner.encode('utf-8')
1598 def __unicode__(self): return self.owner
1599 def __eq__(self, other): return self.owner == other
1601 class LINK(FrameOpt):
1602 """Linked information.
1605 frameid -- the ID of the linked frame
1606 url -- the location of the linked frame
1607 data -- further ID information for the frame
1610 _framespec = [ StringSpec('frameid', 4), Latin1TextSpec('url') ]
1611 _optionalspec = [ BinaryDataSpec('data') ]
1612 def __HashKey(self):
1614 return "%s:%s:%s:%r" % (
1615 self.FrameID, self.frameid, self.url, self.data)
1616 except AttributeError:
1617 return "%s:%s:%s" % (self.FrameID, self.frameid, self.url)
1618 HashKey = property(__HashKey)
1619 def __eq__(self, other):
1620 try: return (self.frameid, self.url, self.data) == other
1621 except AttributeError: return (self.frameid, self.url) == other
1624 """Position synchronisation frame
1627 format -- format of the position attribute (frames or milliseconds)
1628 position -- current position of the file
1630 _framespec = [ ByteSpec('format'), IntegerSpec('position') ]
1632 def __pos__(self): return self.position
1633 def __eq__(self, other): return self.position == other
1636 """Unique file identifier.
1639 owner -- format/type of identifier
1643 _framespec = [ Latin1TextSpec('owner'), BinaryDataSpec('data') ]
1644 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.owner))
1646 if isinstance(o, UFI): return s.owner == o.owner and s.data == o.data
1647 else: return s.data == o
1649 isascii = ord(max(self.data)) < 128
1650 if isascii: return "%s=%s" % (self.owner, self.data)
1651 else: return "%s (%d bytes)" % (self.owner, len(self.data))
1657 encoding -- text encoding
1658 lang -- ISO three letter language code
1659 text -- licensing terms for the audio
1661 _framespec = [ EncodingSpec('encoding'), StringSpec('lang', 3),
1662 EncodedTextSpec('text') ]
1663 HashKey = property(lambda s: '%s:%r' % (s.FrameID, s.lang))
1665 def __str__(self): return self.text.encode('utf-8')
1666 def __unicode__(self): return self.text
1667 def __eq__(self, other): return self.text == other
1668 def _pprint(self): return "%r=%s" % (self.lang, self.text)
1671 """Ownership frame."""
1672 _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'),
1673 StringSpec('date', 8), EncodedTextSpec('seller') ]
1675 def __str__(self): return self.seller.encode('utf-8')
1676 def __unicode__(self): return self.seller
1677 def __eq__(self, other): return self.seller == other
1679 class COMR(FrameOpt):
1680 """Commercial frame."""
1681 _framespec = [ EncodingSpec('encoding'), Latin1TextSpec('price'),
1682 StringSpec('valid_until', 8), Latin1TextSpec('contact'),
1683 ByteSpec('format'), EncodedTextSpec('seller'),
1684 EncodedTextSpec('desc')]
1685 _optionalspec = [ Latin1TextSpec('mime'), BinaryDataSpec('logo') ]
1686 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s._writeData()))
1687 def __eq__(self, other): return self._writeData() == other._writeData()
1690 """Encryption method registration.
1692 The standard does not allow multiple ENCR frames with the same owner
1693 or the same method. Mutagen only verifies that the owner is unique.
1695 _framespec = [ Latin1TextSpec('owner'), ByteSpec('method'),
1696 BinaryDataSpec('data') ]
1697 HashKey = property(lambda s: "%s:%s" % (s.FrameID, s.owner))
1698 def __str__(self): return self.data
1699 def __eq__(self, other): return self.data == other
1701 class GRID(FrameOpt):
1702 """Group identification registration."""
1703 _framespec = [ Latin1TextSpec('owner'), ByteSpec('group') ]
1704 _optionalspec = [ BinaryDataSpec('data') ]
1705 HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.group))
1706 def __pos__(self): return self.group
1707 def __str__(self): return self.owner.encode('utf-8')
1708 def __unicode__(self): return self.owner
1709 def __eq__(self, other): return self.owner == other or self.group == other
1713 """Private frame."""
1714 _framespec = [ Latin1TextSpec('owner'), BinaryDataSpec('data') ]
1715 HashKey = property(lambda s: '%s:%s:%s' % (
1716 s.FrameID, s.owner, s.data.decode('latin1')))
1717 def __str__(self): return self.data
1718 def __eq__(self, other): return self.data == other
1720 isascii = ord(max(self.data)) < 128
1721 if isascii: return "%s=%s" % (self.owner, self.data)
1722 else: return "%s (%d bytes)" % (self.owner, len(self.data))
1725 """Signature frame."""
1726 _framespec = [ ByteSpec('group'), BinaryDataSpec('sig') ]
1727 HashKey = property(lambda s: '%s:%c:%s' % (s.FrameID, s.group, s.sig))
1728 def __str__(self): return self.sig
1729 def __eq__(self, other): return self.sig == other
1734 Mutagen does not find tags at seek offsets.
1736 _framespec = [ IntegerSpec('offset') ]
1737 def __pos__(self): return self.offset
1738 def __eq__(self, other): return self.offset == other
1741 """Audio seek point index.
1743 Attributes: S, L, N, b, and Fi. For the meaning of these, see
1744 the ID3v2.4 specification. Fi is a list of integers.
1746 _framespec = [ SizedIntegerSpec("S", 4), SizedIntegerSpec("L", 4),
1747 SizedIntegerSpec("N", 2), ByteSpec("b"),
1748 ASPIIndexSpec("Fi") ]
1749 def __eq__(self, other): return self.Fi == other
1751 Frames = dict([(k,v) for (k,v) in globals().items()
1752 if len(k)==4 and isinstance(v, type) and issubclass(v, Frame)])
1753 """All supported ID3v2 frames, keyed by frame name."""
1757 class UFI(UFID): "Unique File Identifier"
1759 class TT1(TIT1): "Content group description"
1760 class TT2(TIT2): "Title"
1761 class TT3(TIT3): "Subtitle/Description refinement"
1762 class TP1(TPE1): "Lead Artist/Performer/Soloist/Group"
1763 class TP2(TPE2): "Band/Orchestra/Accompaniment"
1764 class TP3(TPE3): "Conductor"
1765 class TP4(TPE4): "Interpreter/Remixer/Modifier"
1766 class TCM(TCOM): "Composer"
1767 class TXT(TEXT): "Lyricist"
1768 class TLA(TLAN): "Audio Language(s)"
1769 class TCO(TCON): "Content Type (Genre)"
1770 class TAL(TALB): "Album"
1771 class TPA(TPOS): "Part of set"
1772 class TRK(TRCK): "Track Number"
1773 class TRC(TSRC): "International Standard Recording Code (ISRC)"
1774 class TYE(TYER): "Year of recording"
1775 class TDA(TDAT): "Date of recording (DDMM)"
1776 class TIM(TIME): "Time of recording (HHMM)"
1777 class TRD(TRDA): "Recording Dates"
1778 class TMT(TMED): "Source Media Type"
1779 class TFT(TFLT): "File Type"
1780 class TBP(TBPM): "Beats per minute"
1781 class TCP(TCMP): "iTunes Compilation Flag"
1782 class TCR(TCOP): "Copyright (C)"
1783 class TPB(TPUB): "Publisher"
1784 class TEN(TENC): "Encoder"
1785 class TSS(TSSE): "Encoder settings"
1786 class TOF(TOFN): "Original Filename"
1787 class TLE(TLEN): "Audio Length (ms)"
1788 class TSI(TSIZ): "Audio Data size (bytes)"
1789 class TDY(TDLY): "Audio Delay (ms)"
1790 class TKE(TKEY): "Starting Key"
1791 class TOT(TOAL): "Original Album"
1792 class TOA(TOPE): "Original Artist/Perfomer"
1793 class TOL(TOLY): "Original Lyricist"
1794 class TOR(TORY): "Original Release Year"
1796 class TXX(TXXX): "User-defined Text"
1798 class WAF(WOAF): "Official File Information"
1799 class WAR(WOAR): "Official Artist/Performer Information"
1800 class WAS(WOAS): "Official Source Information"
1801 class WCM(WCOM): "Commercial Information"
1802 class WCP(WCOP): "Copyright Information"
1803 class WPB(WPUB): "Official Publisher Information"
1805 class WXX(WXXX): "User-defined URL"
1807 class IPL(IPLS): "Involved people list"
1808 class MCI(MCDI): "Binary dump of CD's TOC"
1809 class ETC(ETCO): "Event timing codes"
1810 class MLL(MLLT): "MPEG location lookup table"
1811 class STC(SYTC): "Synced tempo codes"
1812 class ULT(USLT): "Unsychronised lyrics/text transcription"
1813 class SLT(SYLT): "Synchronised lyrics/text"
1814 class COM(COMM): "Comment"
1817 class REV(RVRB): "Reverb"
1819 """Attached Picture.
1821 The 'mime' attribute of an ID3v2.2 attached picture must be either
1824 _framespec = [ EncodingSpec('encoding'), StringSpec('mime', 3),
1825 ByteSpec('type'), EncodedTextSpec('desc'), BinaryDataSpec('data') ]
1826 class GEO(GEOB): "General Encapsulated Object"
1827 class CNT(PCNT): "Play counter"
1828 class POP(POPM): "Popularimeter"
1829 class BUF(RBUF): "Recommended buffer size"
1832 """Encrypted meta frame"""
1833 _framespec = [ Latin1TextSpec('owner'), Latin1TextSpec('desc'),
1834 BinaryDataSpec('data') ]
1835 def __eq__(self, other): return self.data == other
1837 class CRA(AENC): "Audio encryption"
1840 """Linked information"""
1841 _framespec = [ StringSpec('frameid', 3), Latin1TextSpec('url') ]
1842 _optionalspec = [ BinaryDataSpec('data') ]
1844 Frames_2_2 = dict([(k,v) for (k,v) in globals().items()
1845 if len(k)==3 and isinstance(v, type) and issubclass(v, Frame)])
1847 # support open(filename) as interface
1851 def ParseID3v1(string):
1852 """Parse an ID3v1 tag, returning a list of ID3v2.4 frames."""
1853 from struct import error as StructError
1856 tag, title, artist, album, year, comment, track, genre = unpack(
1857 "3s30s30s30s4s29sBB", string)
1858 except StructError: return None
1860 if tag != "TAG": return None
1862 return string.split("\x00")[0].strip().decode('latin1')
1863 title, artist, album, year, comment = map(
1864 fix, [title, artist, album, year, comment])
1866 if title: frames["TIT2"] = TIT2(encoding=0, text=title)
1867 if artist: frames["TPE1"] = TPE1(encoding=0, text=[artist])
1868 if album: frames["TALB"] = TALB(encoding=0, text=album)
1869 if year: frames["TDRC"] = TDRC(encoding=0, text=year)
1870 if comment: frames["COMM"] = COMM(
1871 encoding=0, lang="eng", desc="ID3v1 Comment", text=comment)
1872 # Don't read a track number if it looks like the comment was
1873 # padded with spaces instead of nulls (thanks, WinAmp).
1874 if track and (track != 32 or string[-3] == '\x00'):
1875 frames["TRCK"] = TRCK(encoding=0, text=str(track))
1876 if genre != 255: frames["TCON"] = TCON(encoding=0, text=str(genre))
1880 """Return an ID3v1.1 tag string from a dict of ID3v2.4 frames."""
1884 for v2id, name in {"TIT2": "title", "TPE1": "artist",
1885 "TALB": "album"}.items():
1887 text = id3[v2id].text[0].encode('latin1', 'replace')[:30]
1889 v1[name] = text + ("\x00" * (30 - len(text)))
1892 cmnt = id3["COMM"].text[0].encode('latin1', 'replace')[:28]
1894 v1["comment"] = cmnt + ("\x00" * (29 - len(cmnt)))
1897 try: v1["track"] = chr(+id3["TRCK"])
1898 except ValueError: v1["track"] = "\x00"
1899 else: v1["track"] = "\x00"
1902 try: genre = id3["TCON"].genres[0]
1903 except IndexError: pass
1905 if genre in TCON.GENRES:
1906 v1["genre"] = chr(TCON.GENRES.index(genre))
1907 if "genre" not in v1: v1["genre"] = "\xff"
1909 if "TDRC" in id3: v1["year"] = str(id3["TDRC"])[:4]
1910 else: v1["year"] = "\x00\x00\x00\x00"
1912 return ("TAG%(title)s%(artist)s%(album)s%(year)s%(comment)s"
1913 "%(track)s%(genre)s") % v1
1915 class ID3FileType(mutagen.FileType):
1916 """An unknown type of file with ID3 tags."""
1918 class _Info(object):
1920 def __init__(self, fileobj, offset): pass
1921 pprint = staticmethod(lambda: "Unknown format with ID3 tag")
1923 def score(filename, fileobj, header):
1924 return header.startswith("ID3")
1925 score = staticmethod(score)
1927 def add_tags(self, ID3=ID3):
1928 """Add an empty ID3 tag to the file.
1930 A custom tag reader may be used in instead of the default
1931 mutagen.id3.ID3 object, e.g. an EasyID3 reader.
1933 if self.tags is None:
1936 raise error("an ID3 tag already exists")
1938 def load(self, filename, ID3=ID3, **kwargs):
1939 """Load stream and tag information from a file.
1941 A custom tag reader may be used in instead of the default
1942 mutagen.id3.ID3 object, e.g. an EasyID3 reader.
1944 self.filename = filename
1945 try: self.tags = ID3(filename, **kwargs)
1946 except error: self.tags = None
1947 if self.tags is not None:
1948 try: offset = self.tags.size
1949 except AttributeError: offset = None
1952 fileobj = file(filename, "rb")
1953 self.info = self._Info(fileobj, offset)