From 43bd55d1f5fa00513673dceca2ce990d41ab3403 Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Thu, 10 Jan 2013 10:43:18 -0500 Subject: [PATCH] Optimize scanning further, make song URL's PK The Primary Key is added because these URL's must be unique anyway - you can't have two filenames point to different files. Additionally, this ensures that the database doesn't have multiple copies of a song. --- archiver/archive.py | 27 ++++++++------------------- archiver/song.py | 2 +- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/archiver/archive.py b/archiver/archive.py index b1590f3..1eececd 100644 --- a/archiver/archive.py +++ b/archiver/archive.py @@ -51,30 +51,19 @@ class Archive (models.Model): _regex = '|'.join(( '.*' + ext + '$' for ext in SUPPORTED_AUDIO_EXTENSIONS)) regex = re.compile(_regex, re.IGNORECASE) - #Remove songs in the database if they exist no longer - # -Do this first since we don't need to re-check songs that were just added - for song in self.songs.all(): - if not os.path.isfile(song.url): - song.delete() - continue + #It's hackish, but far fewer transactions to delete everything first, and add it all back. + #If we get interrupted, just re-run it. + self.songs.all().delete() #Add new songs for dirname, dirnames, filenames in os.walk(self.root_folder): #For each filename that is supported for filename in itertools.ifilter(lambda filename: re.match(regex, filename), filenames): - #Make sure that `filename` is in the database - try: - rel_url = os.path.join(dirname, filename) - full_url = os.path.abspath(rel_url) - self.songs.get(url = full_url) - - except ObjectDoesNotExist, e: - #Song needs to be added to database - rel_url = os.path.join(dirname, filename) - full_url = os.path.abspath(rel_url) - new_song = Song(url = full_url) - new_song.save() - self.songs.add(new_song) + rel_url = os.path.join(dirname, filename) + full_url = os.path.abspath(rel_url) + new_song = Song(url = full_url) + new_song.save() + self.songs.add(new_song) def _update_song_metadata(self, use_echonest = False, progress_callback = lambda x, y: None): """Scan every song in this archive (database only) and make sure all songs are correct diff --git a/archiver/song.py b/archiver/song.py index e277a65..e65107c 100644 --- a/archiver/song.py +++ b/archiver/song.py @@ -47,7 +47,7 @@ class Song (models.Model): bit_rate = models.IntegerField(default = _default_bit_rate) duration = models.IntegerField(default = _default_bit_rate) echonest_song_id = models.CharField(max_length = 64, default = _default_echonest_song_id) - url = models.CharField(max_length = 64) + url = models.CharField(max_length = 255, primary_key = True) file_hash = melodia_settings.HASH_RESULT_DB_TYPE def populate_metadata(self, use_echonest = False):