lib/mutagen/mp3.py

   1 # MP3 stream header information support for Mutagen.
   2 # Copyright 2006 Joe Wreschnig
   3 #
   4 # This program is free software; you can redistribute it and/or modify
   5 # it under the terms of version 2 of the GNU General Public License as
   6 # published by the Free Software Foundation.
   7
   8 """MPEG audio stream information and tags."""
   9
  10 import os
  11 import struct
  12
  13 from mutagen.id3 import ID3FileType, BitPaddedInt, delete
  14
  15 class error(RuntimeError): pass
  16 class HeaderNotFoundError(error, IOError): pass
  17 class InvalidMPEGHeader(error, IOError): pass
  18
  19 # Mode values.
  20 STEREO, JOINTSTEREO, DUALCHANNEL, MONO = range(4)
  21
  22 class MPEGInfo(object):
  23     """MPEG audio stream information
  24
  25     Parse information about an MPEG audio file. This also reads the
  26     Xing VBR header format.
  27
  28     This code was implemented based on the format documentation at
  29     http://www.dv.co.yu/mpgscript/mpeghdr.htm.
  30
  31     Useful attributes:
  32     length -- audio length, in seconds
  33     bitrate -- audio bitrate, in bits per second
  34     sketchy -- if true, the file may not be valid MPEG audio
  35
  36     Useless attributes:
  37     version -- MPEG version (1, 2, 2.5)
  38     layer -- 1, 2, or 3
  39     mode -- One of STEREO, JOINTSTEREO, DUALCHANNEL, or MONO (0-3)
  40     protected -- whether or not the file is "protected"
  41     padding -- whether or not audio frames are padded
  42     sample_rate -- audio sample rate, in Hz
  43     """
  44
  45     # Map (version, layer) tuples to bitrates.
  46     __BITRATE = {
  47         (1, 1): range(0, 480, 32),
  48         (1, 2): [0, 32, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320,384],
  49         (1, 3): [0, 32, 40, 48, 56, 64, 80, 96, 112,128,160,192,224,256,320],
  50         (2, 1): [0, 32, 48, 56, 64, 80, 96, 112,128,144,160,176,192,224,256],
  51         (2, 2): [0,  8, 16, 24, 32, 40, 48,  56, 64, 80, 96,112,128,144,160],
  52         }
  53
  54     __BITRATE[(2, 3)] = __BITRATE[(2, 2)]
  55     for i in range(1, 4): __BITRATE[(2.5, i)] = __BITRATE[(2, i)]
  56
  57     # Map version to sample rates.
  58     __RATES = {
  59         1: [44100, 48000, 32000],
  60         2: [22050, 24000, 16000],
  61         2.5: [11025, 12000, 8000]
  62         }
  63
  64     sketchy = False
  65
  66     def __init__(self, fileobj, offset=None):
  67         """Parse MPEG stream information from a file-like object.
  68
  69         If an offset argument is given, it is used to start looking
  70         for stream information and Xing headers; otherwise, ID3v2 tags
  71         will be skipped automatically. A correct offset can make
  72         loading files significantly faster.
  73         """
  74
  75         try: size = os.path.getsize(fileobj.name)
  76         except (IOError, OSError, AttributeError):
  77             fileobj.seek(0, 2)
  78             size = fileobj.tell()
  79
  80         # If we don't get an offset, try to skip an ID3v2 tag.
  81         if offset is None:
  82             fileobj.seek(0, 0)
  83             idata = fileobj.read(10)
  84             try: id3, insize = struct.unpack('>3sxxx4s', idata)
  85             except struct.error: id3, insize = '', 0
  86             insize = BitPaddedInt(insize)
  87             if id3 == 'ID3' and insize > 0:
  88                 offset = insize
  89             else: offset = 0
  90
  91         # Try to find two valid headers (meaning, very likely MPEG data)
  92         # at the given offset, 30% through the file, 60% through the file,
  93         # and 90% through the file.
  94         for i in [offset, 0.3 * size, 0.6 * size, 0.9 * size]:
  95             try: self.__try(fileobj, int(i), size - offset)
  96             except error, e: pass
  97             else: break
  98         # If we can't find any two consecutive frames, try to find just
  99         # one frame back at the original offset given.
 100         else:
 101             self.__try(fileobj, offset, size - offset, False)
 102             self.sketchy = True
 103
 104     def __try(self, fileobj, offset, real_size, check_second=True):
 105         # This is going to be one really long function; bear with it,
 106         # because there's not really a sane point to cut it up.
 107         fileobj.seek(offset, 0)
 108
 109         # We "know" we have an MPEG file if we find two frames that look like
 110         # valid MPEG data. If we can't find them in 32k of reads, something
 111         # is horribly wrong (the longest frame can only be about 4k). This
 112         # is assuming the offset didn't lie.
 113         data = fileobj.read(32768)
 114
 115         frame_1 = data.find("\xff")
 116         while 0 <= frame_1 <= len(data) - 4:
 117             frame_data = struct.unpack(">I", data[frame_1:frame_1 + 4])[0]
 118             if (frame_data >> 16) & 0xE0 != 0xE0:
 119                 frame_1 = data.find("\xff", frame_1 + 2)
 120             else:
 121                 version = (frame_data >> 19) & 0x3
 122                 layer = (frame_data >> 17) & 0x3
 123                 protection = (frame_data >> 16) & 0x1
 124                 bitrate = (frame_data >> 12) & 0xF
 125                 sample_rate = (frame_data >> 10) & 0x3
 126                 padding = (frame_data >> 9) & 0x1
 127                 private = (frame_data >> 8) & 0x1
 128                 self.mode = (frame_data >> 6) & 0x3
 129                 mode_extension = (frame_data >> 4) & 0x3
 130                 copyright = (frame_data >> 3) & 0x1
 131                 original = (frame_data >> 2) & 0x1
 132                 emphasis = (frame_data >> 0) & 0x3
 133                 if (version == 1 or layer == 0 or sample_rate == 0x3 or
 134                     bitrate == 0 or bitrate == 0xF):
 135                     frame_1 = data.find("\xff", frame_1 + 2)
 136                 else: break
 137         else:
 138             raise HeaderNotFoundError("can't sync to an MPEG frame")
 139
 140         # There is a serious problem here, which is that many flags
 141         # in an MPEG header are backwards.
 142         self.version = [2.5, None, 2, 1][version]
 143         self.layer = 4 - layer
 144         self.protected = not protection
 145         self.padding = bool(padding)
 146
 147         self.bitrate = self.__BITRATE[(self.version, self.layer)][bitrate]
 148         self.bitrate *= 1000
 149         self.sample_rate = self.__RATES[self.version][sample_rate]
 150
 151         if self.layer == 1:
 152             frame_length = (12 * self.bitrate / self.sample_rate + padding) * 4
 153             frame_size = 384
 154         else:
 155             frame_length = 144 * self.bitrate / self.sample_rate + padding
 156             frame_size = 1152
 157
 158         if check_second:
 159             possible = frame_1 + frame_length
 160             if possible > len(data) + 4:
 161                 raise HeaderNotFoundError("can't sync to second MPEG frame")
 162             frame_data = struct.unpack(">H", data[possible:possible + 2])[0]
 163             if frame_data & 0xFFE0 != 0xFFE0:
 164                 raise HeaderNotFoundError("can't sync to second MPEG frame")
 165
 166         frame_count = real_size / float(frame_length)
 167         samples = frame_size * frame_count
 168         self.length = samples / self.sample_rate
 169
 170         # Try to find/parse the Xing header, which trumps the above length
 171         # and bitrate calculation.
 172         fileobj.seek(offset, 0)
 173         data = fileobj.read(32768)
 174         try:
 175             xing = data[:-4].index("Xing")
 176         except ValueError:
 177             # Try to find/parse the VBRI header, which trumps the above length
 178             # calculation.
 179             try:
 180                 vbri = data[:-24].index("VBRI")
 181             except ValueError: pass
 182             else:
 183                 # If a VBRI header was found, this is definitely MPEG audio.
 184                 self.sketchy = False
 185                 vbri_version = struct.unpack('>H', data[vbri + 4:vbri + 6])[0]
 186                 if vbri_version == 1:
 187                     frame_count = struct.unpack('>I', data[vbri + 14:vbri + 18])[0]
 188                     samples = frame_size * frame_count
 189                     self.length = (samples / self.sample_rate) or self.length
 190         else:
 191             # If a Xing header was found, this is definitely MPEG audio.
 192             self.sketchy = False
 193             flags = struct.unpack('>I', data[xing + 4:xing + 8])[0]
 194             if flags & 0x1:
 195                 frame_count = struct.unpack('>I', data[xing + 8:xing + 12])[0]
 196                 samples = frame_size * frame_count
 197                 self.length = (samples / self.sample_rate) or self.length
 198             if flags & 0x2:
 199                 bytes = struct.unpack('>I', data[xing + 12:xing + 16])[0]
 200                 self.bitrate = int((bytes * 8) // self.length)
 201
 202     def pprint(self):
 203         s = "MPEG %s layer %d, %d bps, %s Hz, %.2f seconds" % (
 204             self.version, self.layer, self.bitrate, self.sample_rate,
 205             self.length)
 206         if self.sketchy: s += " (sketchy)"
 207         return s
 208
 209 class MP3(ID3FileType):
 210     """An MPEG audio (usually MPEG-1 Layer 3) file."""
 211
 212     _Info = MPEGInfo
 213     _mimes = ["audio/mp3", "audio/x-mp3", "audio/mpeg", "audio/mpg",
 214               "audio/x-mpeg"]
 215
 216     def score(filename, fileobj, header):
 217         filename = filename.lower()
 218         return (header.startswith("ID3") * 2 + filename.endswith(".mp3") +
 219                 filename.endswith(".mp2") + filename.endswith(".mpg") +
 220                 filename.endswith(".mpeg"))
 221     score = staticmethod(score)
 222
 223 Open = MP3