Melodia/Melodia/resources/audiotools/cue.py

#!/usr/bin/python

#Audio Tools, a module and set of tools for manipulating audio data
#Copyright (C) 2008-2011  Brian Langenberger

#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or
#(at your option) any later version.

#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

"""The cuesheet handling module."""

import re
from audiotools import SheetException, parse_timestamp, build_timestamp
import gettext

gettext.install("audiotools", unicode=True)

###################
#Cue Sheet Parsing
###################

#This method of cuesheet reading involves a tokenizer and parser,
#analagous to lexx/yacc.
#It might be easier to use a line-by-line ad-hoc method for parsing,
#but this brute-force approach should be a bit more thorough.

SPACE = 0x0
TAG = 0x1
NUMBER = 0x2
EOL = 0x4
STRING = 0x8
ISRC = 0x10
TIMESTAMP = 0x20


class CueException(SheetException):
    """Raised by cuesheet parsing errors."""

    pass


def tokens(cuedata):
    """Yields (text, token, line) tuples from cuedata stream.

    text is a plain string.
    token is an integer such as TAG or NUMBER.
    line is a line number integer."""

    full_length = len(cuedata)
    cuedata = cuedata.lstrip('efbbbf'.decode('hex'))
    line_number = 1

    #This isn't completely accurate since the whitespace requirements
    #between tokens aren't enforced.
    TOKENS = [(re.compile("^(%s)" % (s)), element) for (s, element) in
              [(r'[A-Z]{2}[A-Za-z0-9]{3}[0-9]{7}', ISRC),
               (r'[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}', TIMESTAMP),
               (r'[0-9]+', NUMBER),
               (r'[\r\n]+', EOL),
               (r'".+?"', STRING),
               (r'\S+', STRING),
               (r'[ ]+', SPACE)]]

    TAGMATCH = re.compile(r'^[A-Z]+$')

    while (True):
        for (token, element) in TOKENS:
            t = token.search(cuedata)
            if (t is not None):
                cuedata = cuedata[len(t.group()):]
                if (element == SPACE):
                    break
                elif (element == NUMBER):
                    yield (int(t.group()), element, line_number)
                elif (element == EOL):
                    line_number += 1
                    yield (t.group(), element, line_number)
                elif (element == STRING):
                    if (TAGMATCH.match(t.group())):
                        yield (t.group(), TAG, line_number)
                    else:
                        yield (t.group().strip('"'), element, line_number)
                elif (element == TIMESTAMP):
                    (m, s, f) = map(int, t.group().split(":"))
                    yield (((m * 60 * 75) + (s * 75) + f),
                           element, line_number)
                else:
                    yield (t.group(), element, line_number)
                break
        else:
            break

    if (len(cuedata) > 0):
        raise CueException(_(u"Invalid token at char %d") % \
                                 (full_length - len(cuedata)))


def get_value(tokens, accept, error):
    """Retrieves a specific token from the stream of tokens.

    tokens - the token iterator
    accept - an "or"ed list of all the tokens we'll accept
    error - the string to prepend to the error message

    Returns the gotten value which matches one of the accepted tokens
    or raises ValueError if the token matches none of them.
    """

    (token, element, line_number) = tokens.next()
    if ((element & accept) != 0):
        return token
    else:
        raise CueException(_(u"%(error)s at line %(line)d") % \
                                 {"error": error,
                                  "line": line_number})


def parse(tokens):
    """Returns a Cuesheet object from the token iterator stream.

    Raises CueException if a parsing error occurs.
    """

    def skip_to_eol(tokens):
        (token, element, line_number) = tokens.next()
        while (element != EOL):
            (token, element, line_number) = tokens.next()

    cuesheet = Cuesheet()
    track = None

    try:
        while (True):
            (token, element, line_number) = tokens.next()
            if (element == TAG):

                #ignore comment lines
                if (token == "REM"):
                    skip_to_eol(tokens)

                #we're moving to a new track
                elif (token == 'TRACK'):
                    if (track is not None):
                        cuesheet.tracks[track.number] = track

                    track = Track(get_value(tokens, NUMBER,
                                            _(u"Invalid track number")),
                                  get_value(tokens, TAG | STRING,
                                            _(u"Invalid track type")))

                    get_value(tokens, EOL, "Excess data")

                #if we haven't started on track data yet,
                #add attributes to the main cue sheet
                elif (track is None):
                    if (token in ('CATALOG', 'CDTEXTFILE',
                                  'PERFORMER', 'SONGWRITER',
                                  'TITLE')):
                        cuesheet.attribs[token] = get_value(
                            tokens,
                            STRING | TAG | NUMBER | ISRC,
                            _(u"Missing value"))

                        get_value(tokens, EOL, _(u"Excess data"))

                    elif (token == 'FILE'):
                        filename = get_value(tokens, STRING,
                                             _(u"Missing filename"))
                        filetype = get_value(tokens, STRING | TAG,
                                             _(u"Missing file type"))

                        cuesheet.attribs[token] = (filename, filetype)

                        get_value(tokens, EOL, _(u"Excess data"))

                    else:
                        raise CueException(
                            _(u"Invalid tag %(tag)s at line %(line)d") % \
                                  {"tag": token,
                                   "line": line_number})
                #otherwise, we're adding data to the current track
                else:
                    if (token in ('ISRC', 'PERFORMER',
                                  'SONGWRITER', 'TITLE')):
                        track.attribs[token] = get_value(
                            tokens,
                            STRING | TAG | NUMBER | ISRC,
                            "Missing value")

                        get_value(tokens, EOL, _(u"Invalid data"))

                    elif (token == 'FLAGS'):
                        flags = []
                        s = get_value(tokens, STRING | TAG | EOL,
                                      _(u"Invalid flag"))
                        while (('\n' not in s) and ('\r' not in s)):
                            flags.append(s)
                            s = get_value(tokens, STRING | TAG | EOL,
                                          _(u"Invalid flag"))
                        track.attribs[token] = ",".join(flags)

                    elif (token in ('POSTGAP', 'PREGAP')):
                        track.attribs[token] = get_value(
                            tokens, TIMESTAMP,
                            _(u"Invalid timestamp"))
                        get_value(tokens, EOL, _(u"Excess data"))

                    elif (token == 'INDEX'):
                        index_number = get_value(tokens, NUMBER,
                                                 _(u"Invalid index number"))
                        index_timestamp = get_value(tokens, TIMESTAMP,
                                                    _(u"Invalid timestamp"))
                        track.indexes[index_number] = index_timestamp

                        get_value(tokens, EOL, _(u"Excess data"))

                    elif (token in ('FILE',)):
                        skip_to_eol(tokens)

                    else:
                        raise CueException(
                            _(u"Invalid tag %(tag)s at line %(line)d") % \
                                  {"tag": token,
                                   "line": line_number})

            else:
                raise CueException(_(u"Missing tag at line %d") % (
                        line_number))
    except StopIteration:
        if (track is not None):
            cuesheet.tracks[track.number] = track
        return cuesheet


def __attrib_str__(attrib):
    if (isinstance(attrib, tuple)):
        return " ".join([__attrib_str__(a) for a in attrib])
    elif (re.match(r'^[A-Z]+$', attrib) is not None):
        return attrib
    else:
        return "\"%s\"" % (attrib)


class Cuesheet:
    """An object representing a cuesheet file."""

    def __init__(self):
        self.attribs = {}
        self.tracks = {}

    def __repr__(self):
        return "Cuesheet(attribs=%s,tracks=%s)" % \
            (repr(self.attribs), repr(self.tracks))

    def __str__(self):
        return "\r\n".join(["%s %s" % (key, __attrib_str__(value))
                            for key, value in self.attribs.items()] + \
                           [str(track) for track in
                            sorted(self.tracks.values())])

    def catalog(self):
        """Returns the cuesheet's CATALOG number as a plain string, or None.

        If present, this value is typically a CD's UPC code."""

        if ('CATALOG' in self.attribs):
            return str(self.attribs['CATALOG'])
        else:
            return None

    def single_file_type(self):
        """Returns True if this cuesheet is formatted for a single file."""

        previous = -1
        for t in self.indexes():
            for index in t:
                if (index <= previous):
                    return False
                else:
                    previous = index
        else:
            return True

    def indexes(self):
        """Yields a set of index lists, one for each track in the file."""

        for key in sorted(self.tracks.keys()):
            yield tuple(
                [self.tracks[key].indexes[k]
                 for k in sorted(self.tracks[key].indexes.keys())])

    def pcm_lengths(self, total_length):
        """Yields a list of PCM lengths for all audio tracks within the file.

        total_length is the length of the entire file in PCM frames."""

        previous = None

        for key in sorted(self.tracks.keys()):
            current = self.tracks[key].indexes
            if (previous is None):
                previous = current
            else:
                track_length = (current[max(current.keys())] -
                                previous[max(previous.keys())]) * (44100 / 75)
                total_length -= track_length
                yield track_length
                previous = current

        yield total_length

    def ISRCs(self):
        """Returns a track_number->ISRC dict of all non-empty tracks."""

        return dict([(track.number, track.ISRC()) for track in
                     self.tracks.values() if track.ISRC() is not None])

    @classmethod
    def file(cls, sheet, filename):
        """Constructs a new cuesheet string from a compatible object.

        sheet must have catalog(), indexes() and ISRCs() methods.
        filename is a string to the filename the cuesheet is created for.
        Although we don't care whether the filename points to a real file,
        other tools sometimes do.
        """

        import cStringIO

        catalog = sheet.catalog()        # a catalog string, or None
        indexes = list(sheet.indexes())  # a list of index tuples
        ISRCs = sheet.ISRCs()            # a track_number->ISRC dict

        data = cStringIO.StringIO()

        if (catalog is not None):
            data.write("CATALOG %s\r\n" % (catalog))
        data.write("FILE \"%s\" WAVE\r\n" % (filename))

        for (i, current) in enumerate(indexes):
            tracknum = i + 1

            data.write("  TRACK %2.2d AUDIO\r\n" % (tracknum))

            if (tracknum in ISRCs.keys()):
                data.write("    ISRC %s\r\n" % (ISRCs[tracknum]))

            for (j, index) in enumerate(current):
                data.write("    INDEX %2.2d %s\r\n" % (j,
                                                       build_timestamp(index)))

        return data.getvalue()


class Track:
    """A track inside a Cuesheet object."""

    def __init__(self, number, type):
        """number is the track's number on disc, type is a string."""

        self.number = number
        self.type = type
        self.attribs = {}
        self.indexes = {}

    def __cmp__(self, t):
        return cmp(self.number, t.number)

    def __repr__(self):
        return "Track(%s,%s,attribs=%s,indexes=%s)" % \
            (repr(self.number), repr(self.type),
             repr(self.attribs), repr(self.indexes))

    def __str__(self):
        return ("  TRACK %2.2d %s\r\n" % (self.number, self.type)) + \
            "\r\n".join(["    %s %s" % (key, __attrib_str__(value))
                         for key, value in self.attribs.items()] + \
                        ["    INDEX %2.2d %2.2d:%2.2d:%2.2d" % \
                             (k, v / 75 / 60, v / 75 % 60, v % 75)
                         for (k, v) in sorted(self.indexes.items())])

    def ISRC(self):
        """Returns the track's ISRC value, or None."""

        if ('ISRC' in self.attribs.keys()):
            return str(self.attribs['ISRC'])
        else:
            return None


def read_cuesheet(filename):
    """Returns a Cuesheet from a cuesheet filename on disk.

    Raises CueException if some error occurs reading or parsing the file.
    """

    try:
        f = open(filename, 'r')
    except IOError, msg:
        raise CueException(unicode(_(u"Unable to read cuesheet")))
    try:
        sheet = parse(tokens(f.read()))
        if (not sheet.single_file_type()):
            raise CueException(_(u"Cuesheet not formatted for disc images"))
        else:
            return sheet
    finally:
        f.close()