419 lines
14 KiB
Python
419 lines
14 KiB
Python
#!/usr/bin/python
|
|
|
|
#Audio Tools, a module and set of tools for manipulating audio data
|
|
#Copyright (C) 2008-2011 Brian Langenberger
|
|
|
|
#This program is free software; you can redistribute it and/or modify
|
|
#it under the terms of the GNU General Public License as published by
|
|
#the Free Software Foundation; either version 2 of the License, or
|
|
#(at your option) any later version.
|
|
|
|
#This program is distributed in the hope that it will be useful,
|
|
#but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
#GNU General Public License for more details.
|
|
|
|
#You should have received a copy of the GNU General Public License
|
|
#along with this program; if not, write to the Free Software
|
|
#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
"""The cuesheet handling module."""
|
|
|
|
import re
|
|
from audiotools import SheetException, parse_timestamp, build_timestamp
|
|
import gettext
|
|
|
|
gettext.install("audiotools", unicode=True)
|
|
|
|
###################
|
|
#Cue Sheet Parsing
|
|
###################
|
|
|
|
#This method of cuesheet reading involves a tokenizer and parser,
|
|
#analagous to lexx/yacc.
|
|
#It might be easier to use a line-by-line ad-hoc method for parsing,
|
|
#but this brute-force approach should be a bit more thorough.
|
|
|
|
SPACE = 0x0
|
|
TAG = 0x1
|
|
NUMBER = 0x2
|
|
EOL = 0x4
|
|
STRING = 0x8
|
|
ISRC = 0x10
|
|
TIMESTAMP = 0x20
|
|
|
|
|
|
class CueException(SheetException):
|
|
"""Raised by cuesheet parsing errors."""
|
|
|
|
pass
|
|
|
|
|
|
def tokens(cuedata):
|
|
"""Yields (text, token, line) tuples from cuedata stream.
|
|
|
|
text is a plain string.
|
|
token is an integer such as TAG or NUMBER.
|
|
line is a line number integer."""
|
|
|
|
full_length = len(cuedata)
|
|
cuedata = cuedata.lstrip('efbbbf'.decode('hex'))
|
|
line_number = 1
|
|
|
|
#This isn't completely accurate since the whitespace requirements
|
|
#between tokens aren't enforced.
|
|
TOKENS = [(re.compile("^(%s)" % (s)), element) for (s, element) in
|
|
[(r'[A-Z]{2}[A-Za-z0-9]{3}[0-9]{7}', ISRC),
|
|
(r'[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}', TIMESTAMP),
|
|
(r'[0-9]+', NUMBER),
|
|
(r'[\r\n]+', EOL),
|
|
(r'".+?"', STRING),
|
|
(r'\S+', STRING),
|
|
(r'[ ]+', SPACE)]]
|
|
|
|
TAGMATCH = re.compile(r'^[A-Z]+$')
|
|
|
|
while (True):
|
|
for (token, element) in TOKENS:
|
|
t = token.search(cuedata)
|
|
if (t is not None):
|
|
cuedata = cuedata[len(t.group()):]
|
|
if (element == SPACE):
|
|
break
|
|
elif (element == NUMBER):
|
|
yield (int(t.group()), element, line_number)
|
|
elif (element == EOL):
|
|
line_number += 1
|
|
yield (t.group(), element, line_number)
|
|
elif (element == STRING):
|
|
if (TAGMATCH.match(t.group())):
|
|
yield (t.group(), TAG, line_number)
|
|
else:
|
|
yield (t.group().strip('"'), element, line_number)
|
|
elif (element == TIMESTAMP):
|
|
(m, s, f) = map(int, t.group().split(":"))
|
|
yield (((m * 60 * 75) + (s * 75) + f),
|
|
element, line_number)
|
|
else:
|
|
yield (t.group(), element, line_number)
|
|
break
|
|
else:
|
|
break
|
|
|
|
if (len(cuedata) > 0):
|
|
raise CueException(_(u"Invalid token at char %d") % \
|
|
(full_length - len(cuedata)))
|
|
|
|
|
|
def get_value(tokens, accept, error):
|
|
"""Retrieves a specific token from the stream of tokens.
|
|
|
|
tokens - the token iterator
|
|
accept - an "or"ed list of all the tokens we'll accept
|
|
error - the string to prepend to the error message
|
|
|
|
Returns the gotten value which matches one of the accepted tokens
|
|
or raises ValueError if the token matches none of them.
|
|
"""
|
|
|
|
(token, element, line_number) = tokens.next()
|
|
if ((element & accept) != 0):
|
|
return token
|
|
else:
|
|
raise CueException(_(u"%(error)s at line %(line)d") % \
|
|
{"error": error,
|
|
"line": line_number})
|
|
|
|
|
|
def parse(tokens):
|
|
"""Returns a Cuesheet object from the token iterator stream.
|
|
|
|
Raises CueException if a parsing error occurs.
|
|
"""
|
|
|
|
def skip_to_eol(tokens):
|
|
(token, element, line_number) = tokens.next()
|
|
while (element != EOL):
|
|
(token, element, line_number) = tokens.next()
|
|
|
|
cuesheet = Cuesheet()
|
|
track = None
|
|
|
|
try:
|
|
while (True):
|
|
(token, element, line_number) = tokens.next()
|
|
if (element == TAG):
|
|
|
|
#ignore comment lines
|
|
if (token == "REM"):
|
|
skip_to_eol(tokens)
|
|
|
|
#we're moving to a new track
|
|
elif (token == 'TRACK'):
|
|
if (track is not None):
|
|
cuesheet.tracks[track.number] = track
|
|
|
|
track = Track(get_value(tokens, NUMBER,
|
|
_(u"Invalid track number")),
|
|
get_value(tokens, TAG | STRING,
|
|
_(u"Invalid track type")))
|
|
|
|
get_value(tokens, EOL, "Excess data")
|
|
|
|
#if we haven't started on track data yet,
|
|
#add attributes to the main cue sheet
|
|
elif (track is None):
|
|
if (token in ('CATALOG', 'CDTEXTFILE',
|
|
'PERFORMER', 'SONGWRITER',
|
|
'TITLE')):
|
|
cuesheet.attribs[token] = get_value(
|
|
tokens,
|
|
STRING | TAG | NUMBER | ISRC,
|
|
_(u"Missing value"))
|
|
|
|
get_value(tokens, EOL, _(u"Excess data"))
|
|
|
|
elif (token == 'FILE'):
|
|
filename = get_value(tokens, STRING,
|
|
_(u"Missing filename"))
|
|
filetype = get_value(tokens, STRING | TAG,
|
|
_(u"Missing file type"))
|
|
|
|
cuesheet.attribs[token] = (filename, filetype)
|
|
|
|
get_value(tokens, EOL, _(u"Excess data"))
|
|
|
|
else:
|
|
raise CueException(
|
|
_(u"Invalid tag %(tag)s at line %(line)d") % \
|
|
{"tag": token,
|
|
"line": line_number})
|
|
#otherwise, we're adding data to the current track
|
|
else:
|
|
if (token in ('ISRC', 'PERFORMER',
|
|
'SONGWRITER', 'TITLE')):
|
|
track.attribs[token] = get_value(
|
|
tokens,
|
|
STRING | TAG | NUMBER | ISRC,
|
|
"Missing value")
|
|
|
|
get_value(tokens, EOL, _(u"Invalid data"))
|
|
|
|
elif (token == 'FLAGS'):
|
|
flags = []
|
|
s = get_value(tokens, STRING | TAG | EOL,
|
|
_(u"Invalid flag"))
|
|
while (('\n' not in s) and ('\r' not in s)):
|
|
flags.append(s)
|
|
s = get_value(tokens, STRING | TAG | EOL,
|
|
_(u"Invalid flag"))
|
|
track.attribs[token] = ",".join(flags)
|
|
|
|
elif (token in ('POSTGAP', 'PREGAP')):
|
|
track.attribs[token] = get_value(
|
|
tokens, TIMESTAMP,
|
|
_(u"Invalid timestamp"))
|
|
get_value(tokens, EOL, _(u"Excess data"))
|
|
|
|
elif (token == 'INDEX'):
|
|
index_number = get_value(tokens, NUMBER,
|
|
_(u"Invalid index number"))
|
|
index_timestamp = get_value(tokens, TIMESTAMP,
|
|
_(u"Invalid timestamp"))
|
|
track.indexes[index_number] = index_timestamp
|
|
|
|
get_value(tokens, EOL, _(u"Excess data"))
|
|
|
|
elif (token in ('FILE',)):
|
|
skip_to_eol(tokens)
|
|
|
|
else:
|
|
raise CueException(
|
|
_(u"Invalid tag %(tag)s at line %(line)d") % \
|
|
{"tag": token,
|
|
"line": line_number})
|
|
|
|
else:
|
|
raise CueException(_(u"Missing tag at line %d") % (
|
|
line_number))
|
|
except StopIteration:
|
|
if (track is not None):
|
|
cuesheet.tracks[track.number] = track
|
|
return cuesheet
|
|
|
|
|
|
def __attrib_str__(attrib):
|
|
if (isinstance(attrib, tuple)):
|
|
return " ".join([__attrib_str__(a) for a in attrib])
|
|
elif (re.match(r'^[A-Z]+$', attrib) is not None):
|
|
return attrib
|
|
else:
|
|
return "\"%s\"" % (attrib)
|
|
|
|
|
|
class Cuesheet:
|
|
"""An object representing a cuesheet file."""
|
|
|
|
def __init__(self):
|
|
self.attribs = {}
|
|
self.tracks = {}
|
|
|
|
def __repr__(self):
|
|
return "Cuesheet(attribs=%s,tracks=%s)" % \
|
|
(repr(self.attribs), repr(self.tracks))
|
|
|
|
def __str__(self):
|
|
return "\r\n".join(["%s %s" % (key, __attrib_str__(value))
|
|
for key, value in self.attribs.items()] + \
|
|
[str(track) for track in
|
|
sorted(self.tracks.values())])
|
|
|
|
def catalog(self):
|
|
"""Returns the cuesheet's CATALOG number as a plain string, or None.
|
|
|
|
If present, this value is typically a CD's UPC code."""
|
|
|
|
if ('CATALOG' in self.attribs):
|
|
return str(self.attribs['CATALOG'])
|
|
else:
|
|
return None
|
|
|
|
def single_file_type(self):
|
|
"""Returns True if this cuesheet is formatted for a single file."""
|
|
|
|
previous = -1
|
|
for t in self.indexes():
|
|
for index in t:
|
|
if (index <= previous):
|
|
return False
|
|
else:
|
|
previous = index
|
|
else:
|
|
return True
|
|
|
|
def indexes(self):
|
|
"""Yields a set of index lists, one for each track in the file."""
|
|
|
|
for key in sorted(self.tracks.keys()):
|
|
yield tuple(
|
|
[self.tracks[key].indexes[k]
|
|
for k in sorted(self.tracks[key].indexes.keys())])
|
|
|
|
def pcm_lengths(self, total_length):
|
|
"""Yields a list of PCM lengths for all audio tracks within the file.
|
|
|
|
total_length is the length of the entire file in PCM frames."""
|
|
|
|
previous = None
|
|
|
|
for key in sorted(self.tracks.keys()):
|
|
current = self.tracks[key].indexes
|
|
if (previous is None):
|
|
previous = current
|
|
else:
|
|
track_length = (current[max(current.keys())] -
|
|
previous[max(previous.keys())]) * (44100 / 75)
|
|
total_length -= track_length
|
|
yield track_length
|
|
previous = current
|
|
|
|
yield total_length
|
|
|
|
def ISRCs(self):
|
|
"""Returns a track_number->ISRC dict of all non-empty tracks."""
|
|
|
|
return dict([(track.number, track.ISRC()) for track in
|
|
self.tracks.values() if track.ISRC() is not None])
|
|
|
|
@classmethod
|
|
def file(cls, sheet, filename):
|
|
"""Constructs a new cuesheet string from a compatible object.
|
|
|
|
sheet must have catalog(), indexes() and ISRCs() methods.
|
|
filename is a string to the filename the cuesheet is created for.
|
|
Although we don't care whether the filename points to a real file,
|
|
other tools sometimes do.
|
|
"""
|
|
|
|
import cStringIO
|
|
|
|
catalog = sheet.catalog() # a catalog string, or None
|
|
indexes = list(sheet.indexes()) # a list of index tuples
|
|
ISRCs = sheet.ISRCs() # a track_number->ISRC dict
|
|
|
|
data = cStringIO.StringIO()
|
|
|
|
if (catalog is not None):
|
|
data.write("CATALOG %s\r\n" % (catalog))
|
|
data.write("FILE \"%s\" WAVE\r\n" % (filename))
|
|
|
|
for (i, current) in enumerate(indexes):
|
|
tracknum = i + 1
|
|
|
|
data.write(" TRACK %2.2d AUDIO\r\n" % (tracknum))
|
|
|
|
if (tracknum in ISRCs.keys()):
|
|
data.write(" ISRC %s\r\n" % (ISRCs[tracknum]))
|
|
|
|
for (j, index) in enumerate(current):
|
|
data.write(" INDEX %2.2d %s\r\n" % (j,
|
|
build_timestamp(index)))
|
|
|
|
return data.getvalue()
|
|
|
|
|
|
class Track:
|
|
"""A track inside a Cuesheet object."""
|
|
|
|
def __init__(self, number, type):
|
|
"""number is the track's number on disc, type is a string."""
|
|
|
|
self.number = number
|
|
self.type = type
|
|
self.attribs = {}
|
|
self.indexes = {}
|
|
|
|
def __cmp__(self, t):
|
|
return cmp(self.number, t.number)
|
|
|
|
def __repr__(self):
|
|
return "Track(%s,%s,attribs=%s,indexes=%s)" % \
|
|
(repr(self.number), repr(self.type),
|
|
repr(self.attribs), repr(self.indexes))
|
|
|
|
def __str__(self):
|
|
return (" TRACK %2.2d %s\r\n" % (self.number, self.type)) + \
|
|
"\r\n".join([" %s %s" % (key, __attrib_str__(value))
|
|
for key, value in self.attribs.items()] + \
|
|
[" INDEX %2.2d %2.2d:%2.2d:%2.2d" % \
|
|
(k, v / 75 / 60, v / 75 % 60, v % 75)
|
|
for (k, v) in sorted(self.indexes.items())])
|
|
|
|
def ISRC(self):
|
|
"""Returns the track's ISRC value, or None."""
|
|
|
|
if ('ISRC' in self.attribs.keys()):
|
|
return str(self.attribs['ISRC'])
|
|
else:
|
|
return None
|
|
|
|
|
|
def read_cuesheet(filename):
|
|
"""Returns a Cuesheet from a cuesheet filename on disk.
|
|
|
|
Raises CueException if some error occurs reading or parsing the file.
|
|
"""
|
|
|
|
try:
|
|
f = open(filename, 'r')
|
|
except IOError, msg:
|
|
raise CueException(unicode(_(u"Unable to read cuesheet")))
|
|
try:
|
|
sheet = parse(tokens(f.read()))
|
|
if (not sheet.single_file_type()):
|
|
raise CueException(_(u"Cuesheet not formatted for disc images"))
|
|
else:
|
|
return sheet
|
|
finally:
|
|
f.close()
|