1 # FLAC comment support for Mutagen
2 # Copyright 2005 Joe Wreschnig
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of version 2 of the GNU General Public License as
6 # published by the Free Software Foundation.
8 """Read and write FLAC Vorbis comments and stream information.
10 Read more about FLAC at http://flac.sourceforge.net.
12 FLAC supports arbitrary metadata blocks. The two most interesting ones
13 are the FLAC stream information block, and the Vorbis comment block;
14 these are also the only ones Mutagen can currently read.
16 This module does not handle Ogg FLAC files.
18 Based off documentation available at
19 http://flac.sourceforge.net/format.html
22 __all__ = ["FLAC", "Open", "delete"]
25 from cStringIO import StringIO
26 from _vorbis import VCommentDict
27 from mutagen import FileType
28 from mutagen._util import insert_bytes
29 from mutagen.id3 import BitPaddedInt
31 class error(IOError): pass
32 class FLACNoHeaderError(error): pass
33 class FLACVorbisError(ValueError, error): pass
35 def to_int_be(string):
36 """Convert an arbitrarily-long string to a long using big-endian
38 return reduce(lambda a, b: (a << 8) + ord(b), string, 0L)
40 class MetadataBlock(object):
41 """A generic block of FLAC metadata.
43 This class is extended by specific used as an ancestor for more specific
44 blocks, and also as a container for data blobs of unknown blocks.
47 data -- raw binary data for this block
50 def __init__(self, data):
51 """Parse the given data string or file-like as a metadata block.
52 The metadata header should not be included."""
54 if isinstance(data, str): data = StringIO(data)
55 elif not hasattr(data, 'read'):
57 "StreamInfo requires string data or a file-like")
60 def load(self, data): self.data = data.read()
61 def write(self): return self.data
63 def writeblocks(blocks):
64 """Render metadata block as a byte string."""
66 codes = [[block.code, block.write()] for block in blocks]
68 for code, datum in codes:
70 if len(datum) > 2**24:
71 raise error("block is too long to write")
72 length = struct.pack(">I", len(datum))[-3:]
73 data.append(byte + length + datum)
75 writeblocks = staticmethod(writeblocks)
77 def group_padding(blocks):
78 """Consolidate FLAC padding metadata blocks.
80 The overall size of the rendered blocks does not change, so
81 this adds several bytes of padding for each merged block."""
82 paddings = filter(lambda x: isinstance(x, Padding), blocks)
83 map(blocks.remove, paddings)
85 # total padding size is the sum of padding sizes plus 4 bytes
87 size = sum([padding.length for padding in paddings])
88 padding.length = size + 4 * (len(paddings) - 1)
89 blocks.append(padding)
90 group_padding = staticmethod(group_padding)
92 class StreamInfo(MetadataBlock):
93 """FLAC stream information.
95 This contains information about the audio data in the FLAC file.
96 Unlike most stream information objects in Mutagen, changes to this
97 one will rewritten to the file when it is saved. Unless you are
98 actually changing the audio stream itself, don't change any
99 attributes of this block.
102 min_blocksize -- minimum audio block size
103 max_blocksize -- maximum audio block size
104 sample_rate -- audio sample rate in Hz
105 channels -- audio channels (1 for mono, 2 for stereo)
106 bits_per_sample -- bits per sample
107 total_samples -- total samples in file
108 length -- audio length in seconds
113 def __eq__(self, other):
114 try: return (self.min_blocksize == other.min_blocksize and
115 self.max_blocksize == other.max_blocksize and
116 self.sample_rate == other.sample_rate and
117 self.channels == other.channels and
118 self.bits_per_sample == other.bits_per_sample and
119 self.total_samples == other.total_samples)
122 def load(self, data):
123 self.min_blocksize = int(to_int_be(data.read(2)))
124 self.max_blocksize = int(to_int_be(data.read(2)))
125 self.min_framesize = int(to_int_be(data.read(3)))
126 self.max_framesize = int(to_int_be(data.read(3)))
127 # first 16 bits of sample rate
128 sample_first = to_int_be(data.read(2))
129 # last 4 bits of sample rate, 3 of channels, first 1 of bits/sample
130 sample_channels_bps = to_int_be(data.read(1))
131 # last 4 of bits/sample, 36 of total samples
132 bps_total = to_int_be(data.read(5))
134 sample_tail = sample_channels_bps >> 4
135 self.sample_rate = int((sample_first << 4) + sample_tail)
136 self.channels = int(((sample_channels_bps >> 1) & 7) + 1)
137 bps_tail = bps_total >> 36
138 bps_head = (sample_channels_bps & 1) << 4
139 self.bits_per_sample = int(bps_head + bps_tail + 1)
140 self.total_samples = bps_total & 0xFFFFFFFFFL
141 self.length = self.total_samples / float(self.sample_rate)
143 self.md5_signature = to_int_be(data.read(16))
147 f.write(struct.pack(">I", self.min_blocksize)[-2:])
148 f.write(struct.pack(">I", self.max_blocksize)[-2:])
149 f.write(struct.pack(">I", self.min_framesize)[-3:])
150 f.write(struct.pack(">I", self.max_framesize)[-3:])
152 # first 16 bits of sample rate
153 f.write(struct.pack(">I", self.sample_rate >> 4)[-2:])
154 # 4 bits sample, 3 channel, 1 bps
155 byte = (self.sample_rate & 0xF) << 4
156 byte += ((self.channels - 1) & 3) << 1
157 byte += ((self.bits_per_sample - 1) >> 4) & 1
159 # 4 bits of bps, 4 of sample count
160 byte = ((self.bits_per_sample - 1) & 0xF) << 4
161 byte += (self.total_samples >> 32) & 0xF
163 # last 32 of sample count
164 f.write(struct.pack(">I", self.total_samples & 0xFFFFFFFFL))
166 sig = self.md5_signature
168 ">4I", (sig >> 96) & 0xFFFFFFFFL, (sig >> 64) & 0xFFFFFFFFL,
169 (sig >> 32) & 0xFFFFFFFFL, sig & 0xFFFFFFFFL))
173 return "FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate)
175 class SeekPoint(tuple):
176 """A single seek point in a FLAC file.
178 Placeholder seek points have first_sample of 0xFFFFFFFFFFFFFFFFL,
179 and byte_offset and num_samples undefined. Seek points must be
180 sorted in ascending order by first_sample number. Seek points must
181 be unique by first_sample number, except for placeholder
182 points. Placeholder points must occur last in the table and there
183 may be any number of them.
186 first_sample -- sample number of first sample in the target frame
187 byte_offset -- offset from first frame to target frame
188 num_samples -- number of samples in target frame
191 def __new__(cls, first_sample, byte_offset, num_samples):
192 return super(cls, SeekPoint).__new__(cls, (first_sample,
193 byte_offset, num_samples))
194 first_sample = property(lambda self: self[0])
195 byte_offset = property(lambda self: self[1])
196 num_samples = property(lambda self: self[2])
198 class SeekTable(MetadataBlock):
199 """Read and write FLAC seek tables.
202 seekpoints -- list of SeekPoint objects
205 __SEEKPOINT_FORMAT = '>QQH'
206 __SEEKPOINT_SIZE = struct.calcsize(__SEEKPOINT_FORMAT)
210 def __init__(self, data):
212 super(SeekTable, self).__init__(data)
214 def __eq__(self, other):
215 try: return (self.seekpoints == other.seekpoints)
216 except (AttributeError, TypeError): return False
218 def load(self, data):
220 sp = data.read(self.__SEEKPOINT_SIZE)
221 while len(sp) == self.__SEEKPOINT_SIZE:
222 self.seekpoints.append(SeekPoint(
223 *struct.unpack(self.__SEEKPOINT_FORMAT, sp)))
224 sp = data.read(self.__SEEKPOINT_SIZE)
228 for seekpoint in self.seekpoints:
229 packed = struct.pack(self.__SEEKPOINT_FORMAT,
230 seekpoint.first_sample, seekpoint.byte_offset,
231 seekpoint.num_samples)
236 return "<%s seekpoints=%r>" % (type(self).__name__, self.seekpoints)
238 class VCFLACDict(VCommentDict):
239 """Read and write FLAC Vorbis comments.
241 FLACs don't use the framing bit at the end of the comment block.
242 So this extends VCommentDict to not use the framing bit.
247 def load(self, data, errors='replace', framing=False):
248 super(VCFLACDict, self).load(data, errors=errors, framing=framing)
250 def write(self, framing=False):
251 return super(VCFLACDict, self).write(framing=framing)
253 class CueSheetTrackIndex(tuple):
254 """Index for a track in a cuesheet.
256 For CD-DA, an index_number of 0 corresponds to the track
257 pre-gap. The first index in a track must have a number of 0 or 1,
258 and subsequently, index_numbers must increase by 1. Index_numbers
259 must be unique within a track. And index_offset must be evenly
260 divisible by 588 samples.
263 index_number -- index point number
264 index_offset -- offset in samples from track start
267 def __new__(cls, index_number, index_offset):
268 return super(cls, CueSheetTrackIndex).__new__(cls,
269 (index_number, index_offset))
270 index_number = property(lambda self: self[0])
271 index_offset = property(lambda self: self[1])
273 class CueSheetTrack(object):
274 """A track in a cuesheet.
276 For CD-DA, track_numbers must be 1-99, or 170 for the
277 lead-out. Track_numbers must be unique within a cue sheet. There
278 must be atleast one index in every track except the lead-out track
279 which must have none.
282 track_number -- track number
283 start_offset -- track offset in samples from start of FLAC stream
285 type -- 0 for audio, 1 for digital data
286 pre_emphasis -- true if the track is recorded with pre-emphasis
287 indexes -- list of CueSheetTrackIndex objects
290 def __init__(self, track_number, start_offset, isrc='', type_=0,
292 self.track_number = track_number
293 self.start_offset = start_offset
296 self.pre_emphasis = pre_emphasis
299 def __eq__(self, other):
300 try: return (self.track_number == other.track_number and
301 self.start_offset == other.start_offset and
302 self.isrc == other.isrc and
303 self.type == other.type and
304 self.pre_emphasis == other.pre_emphasis and
305 self.indexes == other.indexes)
306 except (AttributeError, TypeError): return False
309 return ("<%s number=%r, offset=%d, isrc=%r, type=%r, "
310 "pre_emphasis=%r, indexes=%r)>") % (
311 type(self).__name__, self.track_number, self.start_offset,
312 self.isrc, self.type, self.pre_emphasis, self.indexes)
314 class CueSheet(MetadataBlock):
315 """Read and write FLAC embedded cue sheets.
317 Number of tracks should be from 1 to 100. There should always be
318 exactly one lead-out track and that track must be the last track
322 media_catalog_number -- media catalog number in ASCII
323 lead_in_samples -- number of lead-in samples
324 compact_disc -- true if the cuesheet corresponds to a compact disc
325 tracks -- list of CueSheetTrack objects
326 lead_out -- lead-out as CueSheetTrack or None if lead-out was not found
329 __CUESHEET_FORMAT = '>128sQB258xB'
330 __CUESHEET_SIZE = struct.calcsize(__CUESHEET_FORMAT)
331 __CUESHEET_TRACK_FORMAT = '>QB12sB13xB'
332 __CUESHEET_TRACK_SIZE = struct.calcsize(__CUESHEET_TRACK_FORMAT)
333 __CUESHEET_TRACKINDEX_FORMAT = '>QB3x'
334 __CUESHEET_TRACKINDEX_SIZE = struct.calcsize(__CUESHEET_TRACKINDEX_FORMAT)
338 media_catalog_number = ''
339 lead_in_samples = 88200
342 def __init__(self, data):
344 super(CueSheet, self).__init__(data)
346 def __eq__(self, other):
348 return (self.media_catalog_number == other.media_catalog_number and
349 self.lead_in_samples == other.lead_in_samples and
350 self.compact_disc == other.compact_disc and
351 self.tracks == other.tracks)
352 except (AttributeError, TypeError): return False
354 def load(self, data):
355 header = data.read(self.__CUESHEET_SIZE)
356 media_catalog_number, lead_in_samples, flags, num_tracks = \
357 struct.unpack(self.__CUESHEET_FORMAT, header)
358 self.media_catalog_number = media_catalog_number.rstrip('\0')
359 self.lead_in_samples = lead_in_samples
360 self.compact_disc = bool(flags & 0x80)
362 for i in range(num_tracks):
363 track = data.read(self.__CUESHEET_TRACK_SIZE)
364 start_offset, track_number, isrc_padded, flags, num_indexes = \
365 struct.unpack(self.__CUESHEET_TRACK_FORMAT, track)
366 isrc = isrc_padded.rstrip('\0')
367 type_ = (flags & 0x80) >> 7
368 pre_emphasis = bool(flags & 0x40)
370 track_number, start_offset, isrc, type_, pre_emphasis)
371 for j in range(num_indexes):
372 index = data.read(self.__CUESHEET_TRACKINDEX_SIZE)
373 index_offset, index_number = struct.unpack(
374 self.__CUESHEET_TRACKINDEX_FORMAT, index)
376 CueSheetTrackIndex(index_number, index_offset))
377 self.tracks.append(val)
382 if self.compact_disc: flags |= 0x80
383 packed = struct.pack(
384 self.__CUESHEET_FORMAT, self.media_catalog_number,
385 self.lead_in_samples, flags, len(self.tracks))
387 for track in self.tracks:
389 track_flags |= (track.type & 1) << 7
390 if track.pre_emphasis: track_flags |= 0x40
391 track_packed = struct.pack(
392 self.__CUESHEET_TRACK_FORMAT, track.start_offset,
393 track.track_number, track.isrc, track_flags,
395 f.write(track_packed)
396 for index in track.indexes:
397 index_packed = struct.pack(
398 self.__CUESHEET_TRACKINDEX_FORMAT,
399 index.index_offset, index.index_number)
400 f.write(index_packed)
404 return ("<%s media_catalog_number=%r, lead_in=%r, compact_disc=%r, "
406 type(self).__name__, self.media_catalog_number,
407 self.lead_in_samples, self.compact_disc, self.tracks)
409 class Picture(MetadataBlock):
410 """Read and write FLAC embed pictures.
413 type -- picture type (same as types for ID3 APIC frames)
414 mime -- MIME type of the picture
415 desc -- picture's description
416 width -- width in pixels
417 height -- height in pixels
418 depth -- color depth in bits-per-pixel
419 colors -- number of colors for indexed palettes (like GIF),
426 def __init__(self, data=None):
435 super(Picture, self).__init__(data)
437 def __eq__(self, other):
438 try: return (self.type == other.type and
439 self.mime == other.mime and
440 self.desc == other.desc and
441 self.width == other.width and
442 self.height == other.height and
443 self.depth == other.depth and
444 self.colors == other.colors and
445 self.data == other.data)
446 except (AttributeError, TypeError): return False
448 def load(self, data):
449 self.type, length = struct.unpack('>2I', data.read(8))
450 self.mime = data.read(length).decode('UTF-8', 'replace')
451 length, = struct.unpack('>I', data.read(4))
452 self.desc = data.read(length).decode('UTF-8', 'replace')
453 (self.width, self.height, self.depth,
454 self.colors, length) = struct.unpack('>5I', data.read(20))
455 self.data = data.read(length)
459 mime = self.mime.encode('UTF-8')
460 f.write(struct.pack('>2I', self.type, len(mime)))
462 desc = self.desc.encode('UTF-8')
463 f.write(struct.pack('>I', len(desc)))
465 f.write(struct.pack('>5I', self.width, self.height, self.depth,
466 self.colors, len(self.data)))
471 return "<%s '%s' (%d bytes)>" % (type(self).__name__, self.mime,
474 class Padding(MetadataBlock):
475 """Empty padding space for metadata blocks.
477 To avoid rewriting the entire FLAC file when editing comments,
478 metadata is often padded. Padding should occur at the end, and no
479 more than one padding block should be in any FLAC file. Mutagen
480 handles this with MetadataBlock.group_padding.
485 def __init__(self, data=""): super(Padding, self).__init__(data)
486 def load(self, data): self.length = len(data.read())
488 try: return "\x00" * self.length
489 # On some 64 bit platforms this won't generate a MemoryError
490 # or OverflowError since you might have enough RAM, but it
491 # still generates a ValueError. On other 64 bit platforms,
492 # this will still succeed for extremely large values.
493 # Those should never happen in the real world, and if they
494 # do, writeblocks will catch it.
495 except (OverflowError, ValueError, MemoryError):
496 raise error("cannot write %d bytes" % self.length)
497 def __eq__(self, other):
498 return isinstance(other, Padding) and self.length == other.length
500 return "<%s (%d bytes)>" % (type(self).__name__, self.length)
502 class FLAC(FileType):
503 """A FLAC audio file.
506 info -- stream information (length, bitrate, sample rate)
507 tags -- metadata tags, if any
508 cuesheet -- CueSheet object, if any
509 seektable -- SeekTable object, if any
510 pictures -- list of embedded pictures
513 _mimes = ["audio/x-flac", "application/x-flac"]
515 METADATA_BLOCKS = [StreamInfo, Padding, None, SeekTable, VCFLACDict,
517 """Known metadata block types, indexed by ID."""
519 def score(filename, fileobj, header):
520 return header.startswith("fLaC")
521 score = staticmethod(score)
523 def __read_metadata_block(self, file):
524 byte = ord(file.read(1))
525 size = to_int_be(file.read(3))
527 data = file.read(size)
528 if len(data) != size:
530 "file said %d bytes, read %d bytes" % (size, len(data)))
531 block = self.METADATA_BLOCKS[byte & 0x7F](data)
532 except (IndexError, TypeError):
533 block = MetadataBlock(data)
534 block.code = byte & 0x7F
535 self.metadata_blocks.append(block)
537 self.metadata_blocks.append(block)
538 if block.code == VCFLACDict.code:
539 if self.tags is None: self.tags = block
540 else: raise FLACVorbisError("> 1 Vorbis comment block found")
541 elif block.code == CueSheet.code:
542 if self.cuesheet is None: self.cuesheet = block
543 else: raise error("> 1 CueSheet block found")
544 elif block.code == SeekTable.code:
545 if self.seektable is None: self.seektable = block
546 else: raise error("> 1 SeekTable block found")
547 return (byte >> 7) ^ 1
550 """Add a Vorbis comment block to the file."""
551 if self.tags is None:
552 self.tags = VCFLACDict()
553 self.metadata_blocks.append(self.tags)
554 else: raise FLACVorbisError("a Vorbis comment already exists")
555 add_vorbiscomment = add_tags
557 def delete(self, filename=None):
558 """Remove Vorbis comments from a file.
560 If no filename is given, the one most recently loaded is used.
562 if filename is None: filename = self.filename
563 for s in list(self.metadata_blocks):
564 if isinstance(s, VCFLACDict):
565 self.metadata_blocks.remove(s)
570 vc = property(lambda s: s.tags, doc="Alias for tags; don't use this.")
572 def load(self, filename):
573 """Load file information from a filename."""
575 self.metadata_blocks = []
578 self.seektable = None
579 self.filename = filename
580 fileobj = file(filename, "rb")
582 self.__check_header(fileobj)
583 while self.__read_metadata_block(fileobj): pass
587 try: self.metadata_blocks[0].length
588 except (AttributeError, IndexError):
589 raise FLACNoHeaderError("Stream info block not found")
591 info = property(lambda s: s.metadata_blocks[0])
593 def add_picture(self, picture):
594 """Add a new picture to the file."""
595 self.metadata_blocks.append(picture)
597 def clear_pictures(self):
598 """Delete all pictures from the file."""
599 self.metadata_blocks = filter(lambda b: b.code != Picture.code,
600 self.metadata_blocks)
602 def __get_pictures(self):
603 return filter(lambda b: b.code == Picture.code, self.metadata_blocks)
604 pictures = property(__get_pictures, doc="List of embedded pictures")
606 def save(self, filename=None, deleteid3=False):
607 """Save metadata blocks to a file.
609 If no filename is given, the one most recently loaded is used.
612 if filename is None: filename = self.filename
613 f = open(filename, 'rb+')
615 # Ensure we've got padding at the end, and only at the end.
616 # If adding makes it too large, we'll scale it down later.
617 self.metadata_blocks.append(Padding('\x00' * 1020))
618 MetadataBlock.group_padding(self.metadata_blocks)
620 header = self.__check_header(f)
621 available = self.__find_audio_offset(f) - header # "fLaC" and maybe ID3
622 data = MetadataBlock.writeblocks(self.metadata_blocks)
625 if deleteid3 and header > 4:
626 available += header - 4
629 if len(data) > available:
630 # If we have too much data, see if we can reduce padding.
631 padding = self.metadata_blocks[-1]
632 newlength = padding.length - (len(data) - available)
634 padding.length = newlength
635 data = MetadataBlock.writeblocks(self.metadata_blocks)
636 assert len(data) == available
638 elif len(data) < available:
639 # If we have too little data, increase padding.
640 self.metadata_blocks[-1].length += (available - len(data))
641 data = MetadataBlock.writeblocks(self.metadata_blocks)
642 assert len(data) == available
644 if len(data) != available:
645 # We couldn't reduce the padding enough.
646 diff = (len(data) - available)
647 insert_bytes(f, diff, header)
650 f.write("fLaC" + data)
657 if f.read(3) == "TAG":
661 def __find_audio_offset(self, fileobj):
663 while not (byte >> 7) & 1:
664 byte = ord(fileobj.read(1))
665 size = to_int_be(fileobj.read(3))
667 return fileobj.tell()
669 def __check_header(self, fileobj):
671 header = fileobj.read(4)
674 if header[:3] == "ID3":
675 size = 14 + BitPaddedInt(fileobj.read(6)[2:])
676 fileobj.seek(size - 4)
677 if fileobj.read(4) != "fLaC": size = None
679 raise FLACNoHeaderError(
680 "%r is not a valid FLAC file" % fileobj.name)
685 def delete(filename):
686 """Remove tags from a file."""
687 FLAC(filename).delete()