From a515a4b8d30367b4854c8d46a5958c99d5bd5f52 Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Thu, 9 May 2013 22:29:12 -0400 Subject: [PATCH] Add documentation for the Archive and Feed models --- archiver/__init__.py | 6 +- archiver/models/__init__.py | 7 + archiver/models/archive.py | 343 ++++++++++++++++++++--------------- archiver/models/feed.py | 156 +++++++++------- archiver/models/listfield.py | 56 ------ archiver/models/playlist.py | 29 +-- 6 files changed, 316 insertions(+), 281 deletions(-) delete mode 100644 archiver/models/listfield.py diff --git a/archiver/__init__.py b/archiver/__init__.py index 469ab41..81493b4 100644 --- a/archiver/__init__.py +++ b/archiver/__init__.py @@ -1,5 +1,7 @@ ''' -.. py:currentmodule:: +.. moduleauthor:: Bradlee Speice - :synopsis: Archiving back-end for the Melodia system. +The ``archiver`` application is responsible for all of the backend operations in Melodia. Its purpose is to provide an easy, Pythonic API to any other applications that want to use it. Some features include: + * Multiple archive location support + * Automatic backup of archives using rsync ''' diff --git a/archiver/models/__init__.py b/archiver/models/__init__.py index 3547801..9abfc4a 100644 --- a/archiver/models/__init__.py +++ b/archiver/models/__init__.py @@ -1,3 +1,10 @@ +''' +.. currentmodule:: archiver.models + +I'm trying to link to :class:`~archiver.models.archive.Archive`! + +''' + # Create your models here. from archive import Archive from song import Song diff --git a/archiver/models/archive.py b/archiver/models/archive.py index ffe20c3..fb635fb 100644 --- a/archiver/models/archive.py +++ b/archiver/models/archive.py @@ -1,188 +1,235 @@ -from django.db import models +""" +.. module:: archiver.models.archive +This is the Archive model for the backend of Melodia. It's functionality is to +provide a grouping of songs based on where they are located in the filesystem. +It controls the high-level functionality of managing multiple archives +of music - basically, multiple filesystem folders holding your music. """ -This is the archive model for the archiving backend of Melodia. -It's purpose is to control the high-level functionality of managing -multiple archives of music. It is different from a playlist both conceptually -and practically - an archive describes a group of files, while a playlist -describes a group of songs. -In this way, you back up archives of music - you don't back up the songs in a -playlist. Additionally, you may want to re-organize your music to use a -cleaner directory structure - a playlist doesn't care about this. -""" + +from django.db import models +from django.core.exceptions import ObjectDoesNotExist + +import datetime +import re, os +from itertools import ifilter + +from Melodia.melodia_settings import SUPPORTED_AUDIO_EXTENSIONS +from Melodia.melodia_settings import HASH_FUNCTION as hash class Archive (models.Model): - class Meta: - app_label = 'archiver' + """ + .. data:: name - """ - The archive model itself, and all functions used to interact with it. - The archive is built up from a grouping of songs, and the functions - that are used to interact with many songs at a single time. The archive - for example allows you to re-organize a specific set of music files into - a cleaner directory structure. - The archive is given a folder to use as its root directory - it finds all - music files under there, and takes control of them from there. - """ + String human-readable name of this archive -- ex. ``Steve's Music`` - import datetime + .. data:: root_folder - name = models.CharField(max_length = 64) + String containing the root folder of this archive. Should not be + modified once the archive has been created. - #Note that we're not using FilePathField since this is actually a folder - root_folder = models.CharField(max_length = 255) + .. data:: backup_location - #We've removed the reference to "songs" - instead define it as a ForeignKey, - #and do lookups via song_set + String for the rsync-readable location that this archive should + be backed up to. Can be modified if you need to change the location. - #Backup settings - backup_location = models.CharField(max_length = 255, default = None, null = True) - backup_frequency = models.IntegerField(default = 604800) #1 week in seconds - last_backup = models.DateTimeField(default = datetime.datetime.now) #Note that this by default will be the time the archive was instantiated + .. data:: backup_frequency - def _scan_filesystem(self): - "Scan the archive's root filesystem and add any new songs without adding metadata, delete songs that exist no more" - #This method is implemented since the other scan methods all need to use the same code - #DRY FTW - import re, os, itertools - from django.core.exceptions import ObjectDoesNotExist - from Melodia.melodia_settings import SUPPORTED_AUDIO_EXTENSIONS - from Melodia.melodia_settings import HASH_FUNCTION as hash + Integer time in minutes that should be between backups of this archive. + This should not be blank, if you want to disable backups, set the + location to being blank. - _regex = '|'.join(( '.*' + ext + '$' for ext in SUPPORTED_AUDIO_EXTENSIONS)) - regex = re.compile(_regex, re.IGNORECASE) + .. data:: last_backup - #It's hackish, but far fewer transactions to delete everything first, and add it all back. - #If we get interrupted, just re-run it. - song_set.all().delete() + DateTime object that records when the last **successful** backup was run. + Don't touch this. + """ - #Add new songs - for dirname, dirnames, filenames in os.walk(self.root_folder): - #For each filename that is supported - for filename in itertools.ifilter(lambda filename: re.match(regex, filename), filenames): - rel_url = os.path.join(dirname, filename) - full_url = os.path.abspath(rel_url) - new_song = Song(url = full_url) - new_song.save() - song_set.add(new_song) + name = models.CharField(max_length = 64) - def _update_song_metadata(self, use_echonest = False, progress_callback = lambda x, y: None): - """Scan every song in this archive (database only) and make sure all songs are correct - The progress_callback function is called with the current song being operated on first, and the total songs second.""" - #This method operates only on the songs that are in the database - if you need to make - #sure that new songs are added, use the _scan_filesystem() method in addition - total_songs = song_set.count() + #Note that we're not using FilePathField since this is actually a folder + root_folder = models.CharField(max_length = 512) - for index, song in enumerate(song_set.all()): - song.populate_metadata(use_echonest = use_echonest) - song.save() - progress_callback(index + 1, total_songs) + #We've removed the reference to "songs" - instead define it as a ForeignKey, + #and do lookups via song_set - def _needs_backup(self): - "Check if the current archive is due for a backup" - import datetime + #Backup settings + backup_location = models.CharField(max_length = 255, default = None, null = True) + backup_frequency = models.IntegerField(default = 10800) #1 week in minutes + last_backup = models.DateTimeField(default = datetime.datetime.now) #Note that this by default will be the time the archive was instantiated - prev_backup_time = self.last_backup - current_time = datetime.datetime.now() + class Meta: + app_label = 'archiver' - delta = current_time - prev_backup_time - if delta > datetime.timedelta(seconds = self.backup_frequency): - return True - else: - return False + def _scan_filesystem(self): + """ + Scan the archive's root filesystem and add any new songs without adding + metadata, delete songs that exist no more. + .. todo:: + + This should be fixed so that we don't drop all songs and re-add + them. That's just terrible design. + """ + #This method is implemented since the other scan methods all need to + #use the same code. DRY FTW + + _supported_extns_regex = '|'.join(( '.*' + ext + '$' for ext + in SUPPORTED_AUDIO_EXTENSIONS)) + regex = re.compile(_supported_extns_regex, re.IGNORECASE) - def quick_scan(self): - "Scan this archive's root folder and make sure that all songs are in the database." - #This is a quick scan - only validate whether or not songs should exist in the database + #It's hackish, but far fewer transactions to delete everything first, + #and add it all back. If we get interrupted, just re-run it. + song_set.all().delete() - self._scan_filesystem() + #For each filename that is supported + for filename in ifilter(lambda filename: re.match(regex, filename), filenames): + rel_url = os.path.join(dirname, filename) + full_url = os.path.abspath(rel_url) + new_song = Song(url = full_url) + new_song.save() + song_set.add(new_song) - def scan(self): - "Scan this archive's root folder and make sure any local metadata are correct." - #This is a longer scan - validate whether songs should exist, and use local data to update - #the database + def _update_song_metadata(self, progress_callback = lambda x, y: None): + """ + Scan every song in this archive (database only) and make sure all + songs are correct. The progress_callback function is called with the + current song being operated on first, and the total songs second. - self._scan_filesystem() - self._update_song_metadata() + :param progess_callback: Function called to give progress. First + argument is an integer for the song currently in progress, second + argument is the total number of songs to be operated on. + """ + total_songs = song_set.count() - def deep_scan(self): - "Scan this archive's root folder and make sure that all songs are in the database, and use EchoNest to update metadata as necessary" - #This is a very long scan - validate whether songs should exist, and use Echonest to make sure - #that metadata is as accurate as possible. - self._scan_filesystem() - self._update_song_metadata(use_echonest = True) + for index, song in enumerate(song_set.all()): + song.populate_metadata() + song.save() + progress_callback(index + 1, total_songs) - - def run_backup(self, force_backup = False): - "Backup the current archive" - if force_backup or self._needs_backup(): - import subprocess - subprocess.call(['rsync', '-av', self.root_folder, self.backup_location]) + def _needs_backup(self): + "Check if the current archive is due for a backup" + import datetime - def reorganize(self, format_string, progress_function = lambda w, x, y, z: None, dry_run = False): - """Reorganize a music archive using a specified format string. - Recognized escape characters: - %a - Artist Name %A - Album Name - %d - Disc Number %e - Number of discs - %f - Current Filename (with extension) %g - Current Filename (no extension) - %n - Track Number %o - Number of tracks on disc - %y - Album year + prev_backup_time = self.last_backup + current_time = datetime.datetime.now() - Note that all organization takes place relative to the archive's root folder. - The progress_function is called with the current song number as its first argument, total songs as its second, - current song URL as the third argument, and new URL as the fourth. - """ - import os, shutil, errno + delta = current_time - prev_backup_time + if delta > datetime.timedelta(seconds = self.backup_frequency): + return True + else: + return False - total_songs = song_set.count() + def quick_scan(self): + """ + Scan this archive's root folder, add or remove songs from the DB + as necessary. + """ + self._scan_filesystem() - for index, song in enumerate(song_set.all()): - _current_filename = os.path.basename(song.url) - _current_filename_no_extension = os.path.splitext(_current_filename)[0] + def scan(self): + """ + Like :func:`quick_scan` but makes sure all metadata is current. + """ + #This is a longer scan - validate whether songs should exist, and use local data to update + #the database - _release_year = song.release_date.year + self._scan_filesystem() + self._update_song_metadata() - new_location = format_string.replace("%a", song.artist)\ - .replace("%A", song.album)\ - .replace("%d", str(song.disc_number))\ - .replace("%e", str(song.disc_total))\ - .replace("%f", _current_filename)\ - .replace("%g", _current_filename_no_extension)\ - .replace("%n", str(song.track_number))\ - .replace("%o", str(song.track_total))\ - .replace("%y", str(_release_year)) + def run_backup(self, force_backup = False): + """ + Backup the current archive - new_url = os.path.join(self.root_folder, new_location) + :param force_backup: Boolean value, if `True` will ensure backup runs. + """ + if force_backup or self._needs_backup(): + import subprocess + subprocess.call(['rsync', '-av', self.root_folder, self.backup_location]) - progress_function(index + 1, total_songs, song.url, new_url) + def reorganize(self, format_string, + progress_function = lambda w, x, y, z: None, + dry_run = False): + """ + Reorganize a music archive using a given `format_string`. Recognized + escape characters are below: - if not dry_run: - new_folder = os.path.dirname(new_url) - try: - #`mkdir -p` functionality - if not os.path.isdir(new_folder): - os.makedirs(new_folder) + .. table:: - #Safely copy the file - don't 'move' it, but do a full 'copy' 'rm' - #This way if the process is ever interrupted, we have an unaltered copy - #of the file. - shutil.copyfile(song.url, new_url) - shutil.copystat(song.url, new_url) + ========== ============== + Character: Replaced with: + ========== ============== + %a Artist Name + %A Album Name + %d Disc Number + %e Number of discs + %f Current Filename (with extension) + %g Current Filename (no extension) + %n Track Number + %o Number of tracks on disc + %y Album year + ========== ============== - #Notify the database about the new URL - old_url = song.url - song.url = new_url - song.save() + All re-organization takes place relative to the archive's + :data:`root_folder`. - #Actually remove the file since all references to the original location have been removed - os.remove(old_url) + :param format_string: String describing how each song should be re-organized + :param progress_function: Optional function to get current progress - see notes below. + :param dry_run: Boolean, if `True` will do everything except move files - except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(new_folder): - #This is safe to skip - makedirs() is complaining about a folder already existing - pass - else: raise + The progress_function is called with the current song number as its first argument, total songs as its second, + current song URL as the third argument, and new URL as the fourth. + """ + import os, shutil, errno - except IOError as exc: - #shutil error - likely that folders weren't specified correctly + total_songs = song_set.count() + + for index, song in enumerate(song_set.all()): + _current_filename = os.path.basename(song.url) + _current_filename_no_extension = os.path.splitext(_current_filename)[0] + + _release_year = song.release_date.year + + new_location = format_string.replace("%a", song.artist)\ + .replace("%A", song.album)\ + .replace("%d", str(song.disc_number))\ + .replace("%e", str(song.disc_total))\ + .replace("%f", _current_filename)\ + .replace("%g", _current_filename_no_extension)\ + .replace("%n", str(song.track_number))\ + .replace("%o", str(song.track_total))\ + .replace("%y", str(_release_year)) + + new_url = os.path.join(self.root_folder, new_location) + + progress_function(index + 1, total_songs, song.url, new_url) + + if not dry_run: + new_folder = os.path.dirname(new_url) + try: + #`mkdir -p` functionality + if not os.path.isdir(new_folder): + os.makedirs(new_folder) + + #Safely copy the file - don't 'move' it, but do a full 'copy' 'rm' + #This way if the process is ever interrupted, we have an unaltered copy + #of the file. + shutil.copyfile(song.url, new_url) + shutil.copystat(song.url, new_url) + + #Notify the database about the new URL + old_url = song.url + song.url = new_url + song.save() + + #Actually remove the file since all references to the original location have been removed + os.remove(old_url) + + except OSError as exc: + if exc.errno == errno.EEXIST and os.path.isdir(new_folder): + #This is safe to skip - makedirs() is complaining about a folder already existing + pass + else: raise + + except IOError as exc: + #shutil error - likely that folders weren't specified correctly raise raise diff --git a/archiver/models/feed.py b/archiver/models/feed.py index 82a8114..6842588 100644 --- a/archiver/models/feed.py +++ b/archiver/models/feed.py @@ -1,85 +1,117 @@ +""" +The Feed model describes a podcast of anything that can be parsed by :mod:`feedparser`. +Most of the heavy lifting is done via :mod:`feedparser`, we just download the +podcast files. +""" + from django.db import models import datetime, time import feedparser from archive import Archive - -""" -The "Feed" model describes a podcast feed using any of RSS, Atom, etc. -Backend handling is processed by 'feedparser', we just download all the podcast files, -control how many are stored, etc. The feed is intended to belong to an archive - -this way the feed is backed up automatically (and we don't have the podcast spewing -files everywhere). -It is important to note - the "max_episodes" field regulates how many episodes are -stored and backed up. A value < 1 indicates storing all episodes. -""" - +# What mime types should be downloaded from the podcast XML _audio_type_mime_types = [ - u'audio/mpeg' - ] + u'audio/mpeg' + ] _audio_type_mime_types_string = "\n".join(_audio_type_mime_types) class Feed(models.Model): - class Meta: - app_label = 'archiver' + """ + .. data:: url + + String representation of The URL from which the podcast + file should be downloaded. - url = models.URLField() - name = models.CharField(max_length = 64) - max_episodes = models.IntegerField(default = 0) # Default store everything - current_episodes = models.IntegerField(default = 0) - last_episode = models.DateTimeField(default = datetime.datetime(1970, 1, 1)) - parent_archive = models.ForeignKey(Archive) + .. data:: name + + Human-readable string for this podcast. This is set by the user, not by + the XML podcast name. Is the name for the folder in which this podcast + is stored. - def _get_episode_time(episode): - """ - Get a datetime.datetime object of a podcast episode's published time. - Expects a specific element from feed_object.entries. - """ - t = time.mktime(episode.published_parsed) - return datetime.datetime.fromtimestamp(t) + .. data:: max_episodes - def _calculate_new_episodes(feed_object): - """ - Calculate how many new episodes there are of a podcast (and consequently - how many we need to remove). - """ - num_episodes = 0 + Integer for how many fields should be stored at a time. A value of ``0`` + (or ``< 0``) indicates that all episodes should be stored. A positive + value controls how many episodes are stored at a time. - #feed_object.entries starts at the most recent - for episode in feed_object.entries: - if _get_episode_time(episode) > last_episode: - num_episodes += 1 + .. data:: current_episodes - #Don't set ourselves up to download any more than max_episodes - if num_episodes > max_episodes and max_episodes > 0: - return num_episodes + Integer for how many episodes are currently stored locally. This will + be deprecated, as it can be calculated. - return num_episodes + .. data:: last_episode + + DateTime object for the date of the most recent file downloaded. This + should not be modified by anything outside this model. - def _download_podcast(feed_object, num_episodes = -1): - """ - Update this podcast with episodes from the server copy. The feed_object is a reference to a - feedparser object so we don't have to redownload a feed multiple times. - """ + .. data:: parent_archive - num_episodes = _calculate_new_episodes() + Reference to the :class:`Archive` this podcast belongs to. Informs the + feed where it should store its files at. + """ - #feedparser-specific way of building the list - new_episodes = feed_object.entries[:num_episodes] + url = models.URLField() + name = models.CharField(max_length = 64) + max_episodes = models.IntegerField(default = 0) # Default store everything + current_episodes = models.IntegerField(default = 0) + last_episode = models.DateTimeField(default = datetime.datetime(1970, 1, 1)) + parent_archive = models.ForeignKey(Archive) - for episode in new_episodes: - episode_audio_links = [link for link in episodes['links'] - if link['type'] in _audio_type_mime_types_string] + class Meta: + app_label = 'archiver' - print episode_audio_links + def _get_episode_time(episode): + """ + Get a datetime.datetime object of a podcast episode's published time. + Expects a specific element from feed_object.entries. + """ + t = time.mktime(episode.published_parsed) + return datetime.datetime.fromtimestamp(t) - - def sync_podcast(dry_run = False, forbid_delete = False): - """ - Update the podcast with episodes from the server copy. If dry_run, don't actually download episodes, - but show what changes would have been made (implies forbid_delete). If forbid_delete, download all new - episodes, ignoring the max_episodes count. - """ - pass + def _calculate_new_episodes(feed_object): + """ + Calculate how many new episodes there are of a podcast (and consequently + how many we need to remove). + """ + num_episodes = 0 + + #feed_object.entries starts at the most recent + for episode in feed_object.entries: + if _get_episode_time(episode) > last_episode: + num_episodes += 1 + + #Don't set ourselves up to download any more than max_episodes + if num_episodes > max_episodes and max_episodes > 0: + return num_episodes + + return num_episodes + + def _download_podcast(feed_object, num_episodes = -1): + """ + Update this podcast with episodes from the server copy. The feed_object is a reference to a + feedparser object so we don't have to redownload a feed multiple times. + """ + + num_episodes = _calculate_new_episodes() + + #feedparser-specific way of building the list + new_episodes = feed_object.entries[:num_episodes] + + for episode in new_episodes: + episode_audio_links = [link for link in episodes['links'] + if link['type'] in _audio_type_mime_types_string] + + print episode_audio_links + + + def sync_podcast(dry_run = False, forbid_delete = False): + """ + Update the podcast with episodes from the server copy. + + :param dry_run: Calculate what would have been downloaded or deleted, but do not actually do either. + :param forbid_delete: Run, and only download new episodes. Ignores the :data:`max_episodes` field for this podcast. + + """ + pass diff --git a/archiver/models/listfield.py b/archiver/models/listfield.py deleted file mode 100644 index a064f9c..0000000 --- a/archiver/models/listfield.py +++ /dev/null @@ -1,56 +0,0 @@ -from django.db import models -import re, itertools - -class IntegerListField(models.TextField): - class Meta: - app_label = 'archiver' - - """ - Store a list of integers in a database string. - Format is: - [, , , ... , ] - """ - - description = "Field type for storing lists of integers." - - __metaclass__ = models.SubfieldBase - - def __init__(self, *args, **kwargs): - super(IntegerListField, self).__init__(*args, **kwargs) - - - #Convert database to python - def to_python(self, value): - if isinstance(value, list): - return value - - #Process a database string - - #Validation first - if len(value) <= 0: - return [] - - if value[0] != '[' or value[-1] != ']': - raise ValidationError("Invalid input to parse a list of integers!") - - #Note that any non-digit string is a valid separator - _csv_regex = "[0-9]" - csv_regex = re.compile(_csv_regex) - - #Synonymous to: - #string_list = filter(None, csv_regex.findall(value)) - string_list = itertools.ifilter(None, csv_regex.findall(value)) - value_list = [int(i) for i in string_list] - - return value_list - - #Convert python to database - def get_prep_value(self, value): - if not isinstance(value, list): - raise ValidationError("Invalid list given to put in database!") - - separator_string = ", " - - list_elements = separator_string.join(map(str, value)) - - return "[" + list_elements + "]" diff --git a/archiver/models/playlist.py b/archiver/models/playlist.py index 0736133..150f062 100644 --- a/archiver/models/playlist.py +++ b/archiver/models/playlist.py @@ -1,3 +1,19 @@ +""" +.. module:: archiver.models + +Playlist model +Each playlist is a high-level ordering of songs. There really isn't much to a playlist - just its name, and the songs inside it. +However, we need to have a way to guarantee song order, in addition to re-ordering. A ManyToMany field can't do this. +As such, a custom IntegerListField is implemented - it takes a python list of ints, converts it to a text field in the DB, +and then back to a python list. This way, we can guarantee order, and have a song appear multiple times. +The IntegerListField itself uses the ID of each song as the int in a list. For example, a list of: + + [1, 3, 5, 17] + +Means that the playlist is made up of four songs. The order of the playlist is the song with index 1, 3, 5, and 17. +Additionally, the ManyToMany field is included to make sure we don't use the global Songs manager - it just seems hackish. +""" + from django.db import models from django.core.exceptions import ObjectDoesNotExist @@ -7,19 +23,6 @@ from listfield import IntegerListField import re from warnings import warn -""" -Playlist model -Each playlist is a high-level ordering of songs. There really isn't much to a playlist - just its name, and the songs inside it. -However, we need to have a way to guarantee song order, in addition to re-ordering. A ManyToMany field can't do this. -As such, a custom IntegerListField is implemented - it takes a python list of ints, converts it to a text field in the DB, -and then back to a python list. This way, we can guarantee order, and have a song appear multiple times. -The IntegerListField itself uses the ID of each song as the int in a list. For example, a list of: - [1, 3, 5, 17] - -Means that the playlist is made up of four songs. The order of the playlist is the song with index 1, 3, 5, and 17. -Additionally, the ManyToMany field is included to make sure we don't use the global Songs manager - it just seems hackish. -""" - class Playlist(models.Model): class Meta: app_label = 'archiver'