diff --git a/.gitignore b/.gitignore index 848661a..69c5fd0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.pyc *.sqlite secret_key.py +Melodia/cache diff --git a/Melodia/melodia_settings.py b/Melodia/melodia_settings.py index dd8f7ad..8a5d38e 100644 --- a/Melodia/melodia_settings.py +++ b/Melodia/melodia_settings.py @@ -19,4 +19,4 @@ SUPPORTED_AUDIO_EXTENSIONS = [ filetype[0] for filetype in SUPPORTED_AUDIO_FILET import django.db.models.fields HASH_FUNCTION = hash -HASH_RESULT_DB_TYPE = django.db.models.fields.IntegerField() +HASH_RESULT_DB_TYPE = django.db.models.fields.IntegerField(default = -1) diff --git a/archiver/archive.py b/archiver/archive.py index 0bda85e..23ece56 100644 --- a/archiver/archive.py +++ b/archiver/archive.py @@ -11,17 +11,10 @@ describes a group of songs. In this way, you back up archives of music - you don't back up the songs in a playlist. Additionally, you may want to re-organize your music to use a cleaner directory structure - a playlist doesn't care about this. -Note that archives are different from collections: - -Archives are physical organizations of songs. These are used in the backend. - -Collections are logical organizations of songs. These are intended to be used - on the frontend. - The difference is intended to separate the difference between logical and physical - operations. For example, you don't need to re-organize the directory structure of - a collection of songs. However, you may want to prevent kids from accessing explicit - songs even if they are part of the same archive folder as clean songs. """ class Archive (models.Model): + import datetime """ The archive model itself, and all functions used to interact with it. @@ -33,21 +26,21 @@ class Archive (models.Model): music files under there, and takes control of them from there. """ - name = models.CharField(max_length = 64) + name = models.CharField(max_length = 64) #Note that we're not using FilePathField since this is actually a folder - root_folder = models.CharField(max_length = 255) + root_folder = models.CharField(max_length = 255) #And a reference to the songs in this archive songs = models.ManyToManyField(Song) #Backup settings - backup_location = models.CharField(max_length = 255) - backup_frequency = models.IntegerField() - last_backup = models.DateTimeField() + backup_location = models.CharField(max_length = 255, default = "/dev/null") + backup_frequency = models.IntegerField(default = 604800) #1 week in seconds + last_backup = models.DateTimeField(default = datetime.datetime.now()) #Note that this by default will be the time the archive was instantiated - def _scan_filesystem(self, progress_callback = lambda x: None): - "Scan the archive's root filesystem and add any new songs" + def _scan_filesystem(self): + "Scan the archive's root filesystem and add any new songs without adding metadata, delete songs that exist no more" #This method is implemented since the other scan methods all need to use the same code #DRY FTW import re, os @@ -58,6 +51,7 @@ class Archive (models.Model): _regex = '|'.join(( '.*' + ext + '$' for ext in SUPPORTED_AUDIO_EXTENSIONS)) regex = re.compile(_regex, re.IGNORECASE) + #Add new songs for dirname, dirnames, filenames in os.walk(self.root_folder): #For each filename for filename in filenames: @@ -69,77 +63,29 @@ class Archive (models.Model): except ObjectDoesNotExist, e: #Song needs to be added to database - full_url = os.path.join(dirname, filename) new_song = Song(url = full_url) - new_song.populate_metadata() new_song.save() self.songs.add(new_song) - def quick_scan(self): - "Scan this archive's root folder and make sure that all songs are in the database." - - from os.path import isfile - - #Validate existing database results + #Remove songs in the database if they exist no longer for song in self.songs.all(): - if not isfile(song.url): - song.delete() - - #Scan the root folder, and find if we need to add any new songs - self._scan_filesystem() - - def scan(self): - "Scan this archive's root folder and make sure any local metadata are correct." - #Overload the regular hash function with whatever Melodia as a whole is using - from Melodia.melodia_settings import HASH_FUNCTION as hash - import os.path - - for song in self.songs.all(): - - if not os.path.isfile(song.song_url): + if not os.path.isfile(song.url): song.delete() continue - #The song exists, check that the hash is the same - db_hash = song.file_hash - - f = open(song_url) - file_hash = hash(f.read()) - if file_hash != db_hash: - #Something about the song has changed, rescan the metadata - song.populate_metadata() - - #Make sure to add any new songs as well - self._scan_filesystem() - - - def deep_scan(self): - "Scan this archive's root folder and make sure that all songs are in the database, and use EchoNest to update metadata as necessary" - - #Overload the regular hash function with whatever Melodia as a whole is using - from Melodia.melodia_settings import HASH_FUNCTION as hash - import os.path + def _update_song_metadata(self, use_echonest = False, progress_callback = lambda x, y: None): + "Scan every song in this archive (database only) and make sure all songs are correct" + #This method operates only on the songs that are in the database - if you need to make + #sure that new songs are added, use the _scan_filesystem() method in addition + total_songs = self.songs.count() + current_song = 0 for song in self.songs.all(): - - if not os.path.isfile(song.song_url): - song.delete() - continue - - #The song exists, check that the hash is the same - db_hash = song.file_hash - - f = open(song_url) - file_hash = hash(f.read()) - - if file_hash != db_hash: - #Something about the song has changed, rescan the metadata - song.populate_metadata(use_echonest = True) - - #Make sure to add any new songs as well - self._scan_filesystem() + current_song += 1 + song.populate_metadata(use_echonest = use_echonest) + progress_callback(current_song, total_songs) def _needs_backup(self): "Check if the current archive is due for a backup" @@ -154,6 +100,28 @@ class Archive (models.Model): else: return False + def quick_scan(self): + "Scan this archive's root folder and make sure that all songs are in the database." + #This is a quick scan - only validate whether or not songs should exist in the database + + self._scan_filesystem() + + def scan(self): + "Scan this archive's root folder and make sure any local metadata are correct." + #This is a longer scan - validate whether songs should exist, and use local data to update + #the database + + self._scan_filesystem() + self._update_song_metadata() + + def deep_scan(self): + "Scan this archive's root folder and make sure that all songs are in the database, and use EchoNest to update metadata as necessary" + #This is a very long scan - validate whether songs should exist, and use Echonest to make sure + #that metadata is as accurate as possible. + self._scan_filesystem() + self._update_song_metadata(use_echonest = True) + + def run_backup(self, force_backup = False): "Backup the current archive" if force_backup or self._needs_backup(): diff --git a/archiver/song.py b/archiver/song.py index 5b5c394..9a43c89 100644 --- a/archiver/song.py +++ b/archiver/song.py @@ -1,6 +1,7 @@ from django.db import models from Melodia import melodia_settings +import datetime """ The Song model Each instance of a Song represents a single music file. @@ -8,6 +9,17 @@ This database model is used for storing the metadata information about a song, and helps in doing sorting etc. """ +_default_title = "" +_default_artist = "" +_default_album = "" +_default_release_date = datetime.datetime.now #Function will be called per new song, rather than once at loading the file +_default_genre = "" +_default_bpm = -1 + +_default_bit_rate = -1 +_default_duration = -1 +_default_echonest_song_id = "" + class Song (models.Model): """ @@ -16,24 +28,36 @@ class Song (models.Model): """ #Standard user-populated metadata - title = models.CharField(max_length = 64) - artist = models.CharField(max_length = 64) - album = models.CharField(max_length = 64) - release_date = models.DateField() - genre = models.CharField(max_length = 64) - bpm = models.IntegerField() + title = models.CharField(max_length = 64, default = _default_title) + artist = models.CharField(max_length = 64, default = _default_artist) + album = models.CharField(max_length = 64, default = _default_album) + release_date = models.DateField(default = _default_release_date) + genre = models.CharField(max_length = 64, default = _default_genre) + bpm = models.IntegerField(default = _default_bpm) #File metadata - bit_rate = models.IntegerField() - duration = models.IntegerField() - echonest_song_id = models.CharField(max_length = 64) + bit_rate = models.IntegerField(default = _default_bit_rate) + duration = models.IntegerField(default = _default_bit_rate) + echonest_song_id = models.CharField(max_length = 64, default = _default_echonest_song_id) url = models.CharField(max_length = 64) file_hash = melodia_settings.HASH_RESULT_DB_TYPE - def populate_metadata(self, use_echonest = False, use_musicbrainz = False): - "Populate the metadata of this song" - import datetime + def populate_metadata(self, use_echonest = False): + "Populate the metadata of this song (only if file hash has changed)" + #Overload the hash function with whatever Melodia as a whole is using + from Melodia.melodia_settings import HASH_FUNCTION as hash + #Check if there's a hash entry - if there is, the song may not have changed, + #and we can go ahead and return + if self.file_hash != None: + song_file = open(self.url, 'rb') + current_file_hash = hash(song_file.read()) + + if current_file_hash == self.file_hash: + #The song data hasn't changed at all, we don't need to do anything + return + + #If we've gotten to here, we do actually need to fully update the metadata if use_echonest: #Code to grab metadata from echonest here pass @@ -49,38 +73,31 @@ class Song (models.Model): track = audiotools.open(self.url) track_metadata = track.get_metadata() - self.title = track_metadata.track_name or '' - self.artist = track_metadata.artist_name or '' - self.album = track_metadata.album_name or '' + self.title = track_metadata.track_name or _default_title + self.artist = track_metadata.artist_name or _default_artist + self.album = track_metadata.album_name or _default_album self.release_date = datetime.date(int(track_metadata.year or 1), 1, 1) - self.bpm = -1 + self.bpm = _default_bpm - self.bit_rate = track.bits_per_sample() or '' - self.duration = int(track.seconds_length()) or '' - self.echonest_song_id = '' + self.bit_rate = track.bits_per_sample() or _default_bit_rate + self.duration = int(track.seconds_length()) or _default_duration + self.echonest_song_id = _default_echonest_song_id except audiotools.UnsupportedFile, e: - #Couldn't grab the local data - #doesn't support the file, or because reading from it caused an error - self.title = "" - self.artist = "" - self.album = "" - self.release_date = datetime.datetime.now() - self.bpm = -1 + #Couldn't grab the local data - fill in the remaining data for this record, preserving + #anything that already exists. + self.title = self.title or _default_title + self.artist = self.artist or _default_artist + self.album = self.album or _default_album + self.release_date = self.release_date or _default_release_date() - self.bit_rate = -1 - self.duration = -1 - self.echonest_song_id = '' - - #Hash check is run regardless of what metadata method is used - if self.file_hash == None: - #Only get the hash if we really must, it's an expensive operation... - from Melodia.melodia_settings import HASH_FUNCTION as hash - f = open(self.url, 'rb') - self.file_hash = hash(f.read()) + self.bpm = self.bpm or _default_bpm + self.bit_rate = self.bit_rate or _default_bitrate + self.duration = self.bit_rate or _default_duration + self.echonest_song_id = self.echonest_song_id or _default_echonest_song_id def convert(self, output_location, output_format, progress_func = lambda x, y: None): - "Convert a song to a new format, optionally specifying what format to convert to." + "Convert a song to a new format." #Note that output_format over-rides the format guessed by output_location from Melodia.resources import add_resource_dir diff --git a/archiver/tests.py b/archiver/tests.py index 70e110d..1c5bd36 100644 --- a/archiver/tests.py +++ b/archiver/tests.py @@ -20,7 +20,7 @@ class FilesystemScanTest(TestCase): #We must save the archive before we can start adding songs to it new_archive.save() - new_archive._scan_filesystem() + new_archive.quick_scan() class ScanTest(TestCase): def test_archive_scan(self):