Add documentation for the Archive and Feed models

2025-07-05 07:54:44 -04:00 · 2013-05-09 22:29:12 -04:00
parent fb74ceaa1d
commit a515a4b8d3
6 changed files with 316 additions and 281 deletions
--- a/archiver/init.py
+++ b/archiver/init.py
@ -1,5 +1,7 @@
 '''
-.. py:currentmodule:: 
+.. moduleauthor:: Bradlee Speice <bspeice.nc@gmail.com>
-   :synopsis: Archiving back-end for the Melodia system.
+The ``archiver`` application is responsible for all of the backend operations in Melodia. Its purpose is to provide an easy, Pythonic API to any other applications that want to use it. Some features include:
   * Multiple archive location support
   * Automatic backup of archives using rsync
 '''
--- a/archiver/models/init.py
+++ b/archiver/models/init.py
@ -1,3 +1,10 @@
 '''
 .. currentmodule:: archiver.models
 I'm trying to link to :class:`~archiver.models.archive.Archive`!
 '''
 # Create your models here.
 from archive import Archive
 from song import Song
--- a/archiver/models/archive.py
+++ b/archiver/models/archive.py
@ -1,188 +1,235 @@
-from django.db import models
+"""
 .. module:: archiver.models.archive
 This is the Archive model for the backend of Melodia. It's functionality is to
 provide a grouping of songs based on where they are located in the filesystem.
 It controls the high-level functionality of managing multiple archives
 of music - basically, multiple filesystem folders holding your music.
 """
-This is the archive model for the archiving backend of Melodia.
+
-It's purpose is to control the high-level functionality of managing
+from django.db import models
-multiple archives of music. It is different from a playlist both conceptually
+from django.core.exceptions import ObjectDoesNotExist
-and practically - an archive describes a group of files, while a playlist
+
-describes a group of songs.
+import datetime
-In this way, you back up archives of music - you don't back up the songs in a
+import re, os
-playlist. Additionally, you may want to re-organize your music to use a
+from itertools import ifilter
-cleaner directory structure - a playlist doesn't care about this.
+
-"""
+from Melodia.melodia_settings import SUPPORTED_AUDIO_EXTENSIONS
 from Melodia.melodia_settings import HASH_FUNCTION as hash
 class Archive (models.Model):
-	class Meta:
+    """
-		app_label = 'archiver'
+    .. data:: name
-	"""
+       String human-readable name of this archive -- ex. ``Steve's Music``
 	The archive model itself, and all functions used to interact with it.
 	The archive is built up from a grouping of songs, and the functions
 	that are used to interact with many songs at a single time. The archive
 	for example allows you to re-organize a specific set of music files into
 	a cleaner directory structure.
 	The archive is given a folder to use as its root directory - it finds all
 	music files under there, and takes control of them from there.
 	"""
-	import datetime
+    .. data:: root_folder
-	name        = models.CharField(max_length = 64)
+       String containing the root folder of this archive. Should not be
       modified once the archive has been created.
-	#Note that we're not using FilePathField since this is actually a folder
+    .. data:: backup_location
 	root_folder = models.CharField(max_length = 255)
-	#We've removed the reference to "songs" - instead define it as a ForeignKey,
+       String for the rsync-readable location that this archive should
-	#and do lookups via song_set
+       be backed up to. Can be modified if you need to change the location.
-	#Backup settings
+    .. data:: backup_frequency
 	backup_location  = models.CharField(max_length = 255, default = None, null = True)
 	backup_frequency = models.IntegerField(default = 604800) #1 week in seconds
 	last_backup      = models.DateTimeField(default = datetime.datetime.now) #Note that this by default will be the time the archive was instantiated
-	def _scan_filesystem(self):
+       Integer time in minutes that should be between backups of this archive.
-		"Scan the archive's root filesystem and add any new songs without adding metadata, delete songs that exist no more"
+       This should not be blank, if you want to disable backups, set the
-		#This method is implemented since the other scan methods all need to use the same code
+       location to being blank.
 		#DRY FTW
 		import re, os, itertools
 		from django.core.exceptions import ObjectDoesNotExist
 		from Melodia.melodia_settings import SUPPORTED_AUDIO_EXTENSIONS
 		from Melodia.melodia_settings import HASH_FUNCTION as hash
-		_regex = '|'.join(( '.*' + ext + '$' for ext in SUPPORTED_AUDIO_EXTENSIONS))
+    .. data:: last_backup
 		regex  = re.compile(_regex, re.IGNORECASE)
-		#It's hackish, but far fewer transactions to delete everything first, and add it all back.
+       DateTime object that records when the last **successful** backup was run.
-		#If we get interrupted, just re-run it.
+       Don't touch this.
-		song_set.all().delete()
+    """
-		#Add new songs
+    name = models.CharField(max_length = 64)
 		for dirname, dirnames, filenames in os.walk(self.root_folder):
 			#For each filename that is supported
 			for filename in itertools.ifilter(lambda filename: re.match(regex, filename), filenames):
 				rel_url = os.path.join(dirname, filename)
 				full_url = os.path.abspath(rel_url)
 				new_song = Song(url = full_url)
 				new_song.save()
 				song_set.add(new_song)
-	def _update_song_metadata(self, use_echonest = False, progress_callback = lambda x, y: None):
+    #Note that we're not using FilePathField since this is actually a folder
-		"""Scan every song in this archive (database only) and make sure all songs are correct
+    root_folder = models.CharField(max_length = 512)
 		The progress_callback function is called with the current song being operated on first, and the total songs second."""
 		#This method operates only on the songs that are in the database - if you need to make
 		#sure that new songs are added, use the _scan_filesystem() method in addition
 		total_songs  = song_set.count()
-		for index, song in enumerate(song_set.all()):
+    #We've removed the reference to "songs" - instead define it as a ForeignKey,
-			song.populate_metadata(use_echonest = use_echonest)
+    #and do lookups via song_set
 			song.save()
 			progress_callback(index + 1, total_songs)
-	def _needs_backup(self):
+    #Backup settings
-		"Check if the current archive is due for a backup"
+    backup_location  = models.CharField(max_length = 255, default = None, null = True)
-		import datetime
+    backup_frequency = models.IntegerField(default = 10800) #1 week in minutes
    last_backup      = models.DateTimeField(default = datetime.datetime.now) #Note that this by default will be the time the archive was instantiated
-		prev_backup_time = self.last_backup
+    class Meta:
-		current_time     = datetime.datetime.now()
+        app_label = 'archiver'
-		delta = current_time - prev_backup_time
+    def _scan_filesystem(self):
-		if delta > datetime.timedelta(seconds = self.backup_frequency):
+        """
-			return True
+        Scan the archive's root filesystem and add any new songs without adding
-		else:
+        metadata, delete songs that exist no more.
-			return False
+        .. todo:: 
-	def quick_scan(self):
+           This should be fixed so that we don't drop all songs and re-add
-		"Scan this archive's root folder and make sure that	all songs are in the database."
+           them. That's just terrible design.
-		#This is a quick scan - only validate whether or not songs should exist in the database
+        """
        #This method is implemented since the other scan methods all need to
        #use the same code. DRY FTW
-		self._scan_filesystem()
+        _supported_extns_regex = '|'.join(( '.*' + ext + '$' for ext
                                            in SUPPORTED_AUDIO_EXTENSIONS))
        regex  = re.compile(_supported_extns_regex, re.IGNORECASE)
-	def scan(self):
+        #It's hackish, but far fewer transactions to delete everything first,
-		"Scan this archive's root folder and make sure any local metadata are correct."
+        #and add it all back. If we get interrupted, just re-run it.
-		#This is a longer scan - validate whether songs should exist, and use local data to update
+        song_set.all().delete()
 		#the database
-		self._scan_filesystem()
+        #For each filename that is supported
-		self._update_song_metadata()
+        for filename in ifilter(lambda filename: re.match(regex, filename), filenames):
            rel_url = os.path.join(dirname, filename)
            full_url = os.path.abspath(rel_url)
            new_song = Song(url = full_url)
            new_song.save()
            song_set.add(new_song)
-	def deep_scan(self):
+    def _update_song_metadata(self, progress_callback = lambda x, y: None):
-		"Scan this archive's root folder and make sure that	all songs are in the database, and use EchoNest to update metadata as necessary"
+        """
-		#This is a very long scan - validate whether songs should exist, and use Echonest to make sure
+        Scan every song in this archive (database only) and make sure all
-		#that metadata is as accurate as possible.
+        songs are correct. The progress_callback function is called with the
-		self._scan_filesystem()
+        current song being operated on first, and the total songs second.
 		self._update_song_metadata(use_echonest = True)
        :param progess_callback: Function called to give progress. First
        argument is an integer for the song currently in progress, second
        argument is the total number of songs to be operated on.
        """
        total_songs  = song_set.count()
-	def run_backup(self, force_backup = False):
+        for index, song in enumerate(song_set.all()):
-		"Backup the current archive"
+            song.populate_metadata()
-		if force_backup or self._needs_backup():
+            song.save()
-			import subprocess
+            progress_callback(index + 1, total_songs)
 			subprocess.call(['rsync', '-av', self.root_folder, self.backup_location])
-	def reorganize(self, format_string, progress_function = lambda w, x, y, z: None, dry_run = False):
+    def _needs_backup(self):
-		"""Reorganize a music archive using a specified format string.
+        "Check if the current archive is due for a backup"
-		Recognized escape characters:
+        import datetime
 		%a - Artist Name                       %A - Album Name
 		%d - Disc Number                       %e - Number of discs
 		%f - Current Filename (with extension) %g - Current Filename (no extension)
 		%n - Track Number                      %o - Number of tracks on disc
 		%y - Album year
-		Note that all organization takes place relative to the archive's root folder.
+        prev_backup_time = self.last_backup
-		The progress_function is called with the current song number as its first argument, total songs as its second,
+        current_time     = datetime.datetime.now()
 		current song URL as the third argument, and new URL as the fourth.
 		"""
 		import os, shutil, errno
-		total_songs = song_set.count()
+        delta = current_time - prev_backup_time
        if delta > datetime.timedelta(seconds = self.backup_frequency):
            return True
        else:
            return False
-		for index, song in enumerate(song_set.all()):
+    def quick_scan(self):
-			_current_filename              = os.path.basename(song.url)
+        """
-			_current_filename_no_extension = os.path.splitext(_current_filename)[0]
+        Scan this archive's root folder, add or remove songs from the DB
        as necessary.
        """
        self._scan_filesystem()
-			_release_year = song.release_date.year
+    def scan(self):
        """
        Like :func:`quick_scan` but makes sure all metadata is current.
        """
        #This is a longer scan - validate whether songs should exist, and use local data to update
        #the database
-			new_location = format_string.replace("%a", song.artist)\
+        self._scan_filesystem()
-										.replace("%A", song.album)\
+        self._update_song_metadata()
 										.replace("%d", str(song.disc_number))\
 										.replace("%e", str(song.disc_total))\
 										.replace("%f", _current_filename)\
 										.replace("%g", _current_filename_no_extension)\
 										.replace("%n", str(song.track_number))\
 										.replace("%o", str(song.track_total))\
 										.replace("%y", str(_release_year))
-			new_url = os.path.join(self.root_folder, new_location)
+    def run_backup(self, force_backup = False):
        """
        Backup the current archive
-			progress_function(index + 1, total_songs, song.url, new_url)
+        :param force_backup: Boolean value, if `True` will ensure backup runs.
        """
        if force_backup or self._needs_backup():
            import subprocess
            subprocess.call(['rsync', '-av', self.root_folder, self.backup_location])
-			if not dry_run:
+    def reorganize(self, format_string,
-				new_folder = os.path.dirname(new_url)
+                    progress_function = lambda w, x, y, z: None,
-				try:
+                    dry_run = False):
-					#`mkdir -p` functionality
+        """
-					if not os.path.isdir(new_folder):
+        Reorganize a music archive using a given `format_string`. Recognized
-						os.makedirs(new_folder)
+        escape characters are below:
-					#Safely copy the file - don't 'move' it, but do a full 'copy' 'rm'
+        .. table::
 					#This way if the process is ever interrupted, we have an unaltered copy
 					#of the file.
 					shutil.copyfile(song.url, new_url)
 					shutil.copystat(song.url, new_url)
-					#Notify the database about the new URL
+           ==========   ==============
-					old_url  = song.url
+           Character:   Replaced with:
-					song.url = new_url
+           ==========   ==============
-					song.save()
+           %a           Artist Name
           %A           Album Name
           %d           Disc Number
           %e           Number of discs
           %f           Current Filename (with extension)
           %g           Current Filename (no extension)
           %n           Track Number
           %o           Number of tracks on disc
           %y           Album year
           ==========   ==============
-					#Actually remove the file since all references to the original location have been removed
+        All re-organization takes place relative to the archive's
-					os.remove(old_url)
+        :data:`root_folder`.
-				except OSError as exc:
+        :param format_string: String describing how each song should be re-organized
-					if exc.errno == errno.EEXIST and os.path.isdir(new_folder):
+        :param progress_function: Optional function to get current progress - see notes below.
-						#This is safe to skip - makedirs() is complaining about a folder already existing
+        :param dry_run: Boolean, if `True` will do everything except move files
 						pass
 					else: raise
-				except IOError as exc:
+        The progress_function is called with the current song number as its first argument, total songs as its second,
-					#shutil error - likely that folders weren't specified correctly
+        current song URL as the third argument, and new URL as the fourth.
        """
        import os, shutil, errno
        total_songs = song_set.count()
        for index, song in enumerate(song_set.all()):
            _current_filename              = os.path.basename(song.url)
            _current_filename_no_extension = os.path.splitext(_current_filename)[0]
            _release_year = song.release_date.year
            new_location = format_string.replace("%a", song.artist)\
                                        .replace("%A", song.album)\
                                        .replace("%d", str(song.disc_number))\
                                        .replace("%e", str(song.disc_total))\
                                        .replace("%f", _current_filename)\
                                        .replace("%g", _current_filename_no_extension)\
                                        .replace("%n", str(song.track_number))\
                                        .replace("%o", str(song.track_total))\
                                        .replace("%y", str(_release_year))
            new_url = os.path.join(self.root_folder, new_location)
            progress_function(index + 1, total_songs, song.url, new_url)
            if not dry_run:
                new_folder = os.path.dirname(new_url)
                try:
                    #`mkdir -p` functionality
                    if not os.path.isdir(new_folder):
                        os.makedirs(new_folder)
                    #Safely copy the file - don't 'move' it, but do a full 'copy' 'rm'
                    #This way if the process is ever interrupted, we have an unaltered copy
                    #of the file.
                    shutil.copyfile(song.url, new_url)
                    shutil.copystat(song.url, new_url)
                    #Notify the database about the new URL
                    old_url  = song.url
                    song.url = new_url
                    song.save()
                    #Actually remove the file since all references to the original location have been removed
                    os.remove(old_url)
                except OSError as exc:
                    if exc.errno == errno.EEXIST and os.path.isdir(new_folder):
                        #This is safe to skip - makedirs() is complaining about a folder already existing
                        pass
                    else: raise
                except IOError as exc:
                    #shutil error - likely that folders weren't specified correctly raise
 					raise
--- a/archiver/models/feed.py
+++ b/archiver/models/feed.py
@ -1,85 +1,117 @@
 """
 The Feed model describes a podcast of anything that can be parsed by :mod:`feedparser`.
 Most of the heavy lifting is done via :mod:`feedparser`, we just download the
 podcast files.
 """
 from django.db import models
 import datetime, time
 import feedparser
 from archive import Archive
-
+# What mime types should be downloaded from the podcast XML
 """
 The "Feed" model describes a podcast feed using any of RSS, Atom, etc.
 Backend handling is processed by 'feedparser', we just download all the podcast files,
 control how many are stored, etc. The feed is intended to belong to an archive - 
 this way the feed is backed up automatically (and we don't have the podcast spewing
 files everywhere).
 It is important to note - the "max_episodes" field regulates how many episodes are
 stored and backed up. A value < 1 indicates storing all episodes.
 """
 _audio_type_mime_types = [
-		u'audio/mpeg'
+        u'audio/mpeg'
-		]
+        ]
 _audio_type_mime_types_string = "\n".join(_audio_type_mime_types)
 class Feed(models.Model):
-	class Meta:
+    """
-		app_label = 'archiver'
+    .. data:: url
-	url = models.URLField()
+       String representation of The URL from which the podcast
-	name = models.CharField(max_length = 64)
+       file should be downloaded.
 	max_episodes = models.IntegerField(default = 0) # Default store everything
 	current_episodes = models.IntegerField(default = 0)
 	last_episode = models.DateTimeField(default = datetime.datetime(1970, 1, 1))
 	parent_archive = models.ForeignKey(Archive)
-	def _get_episode_time(episode):
+    .. data:: name
 		"""
 		Get a datetime.datetime object of a podcast episode's published time.
 		Expects a specific element from feed_object.entries.
 		"""
 		t = time.mktime(episode.published_parsed)
 		return datetime.datetime.fromtimestamp(t)
-	def _calculate_new_episodes(feed_object):
+       Human-readable string for this podcast. This is set by the user, not by
-		"""
+       the XML podcast name. Is the name for the folder in which this podcast
-		Calculate how many new episodes there are of a podcast (and consequently
+       is stored.
 		how many we need to remove).
 		"""
 		num_episodes = 0
-		#feed_object.entries starts at the most recent
+    .. data:: max_episodes
 		for episode in feed_object.entries:
 			if _get_episode_time(episode) > last_episode:
 				num_episodes += 1
-			#Don't set ourselves up to download any more than max_episodes
+       Integer for how many fields should be stored at a time. A value of ``0``
-			if num_episodes > max_episodes and max_episodes > 0:
+       (or ``< 0``) indicates that all episodes should be stored. A positive
-				return num_episodes
+       value controls how many episodes are stored at a time.
-		return num_episodes
+    .. data:: current_episodes
-	def _download_podcast(feed_object, num_episodes = -1):
+       Integer for how many episodes are currently stored locally. This will
-		"""
+       be deprecated, as it can be calculated.
 		Update this podcast with episodes from the server copy. The feed_object is a reference to a
 		feedparser object so we don't have to redownload a feed multiple times.
 		"""
-		num_episodes = _calculate_new_episodes()
+    .. data:: last_episode
-		#feedparser-specific way of building the list
+       DateTime object for the date of the most recent file downloaded. This
-		new_episodes = feed_object.entries[:num_episodes]
+       should not be modified by anything outside this model.
-		for episode in new_episodes:
+    .. data:: parent_archive
 			episode_audio_links = [link for link in episodes['links']
 											if link['type'] in _audio_type_mime_types_string]
-			print episode_audio_links
+       Reference to the :class:`Archive` this podcast belongs to. Informs the
       feed where it should store its files at.
    """
    url = models.URLField()
    name = models.CharField(max_length = 64)
    max_episodes = models.IntegerField(default = 0) # Default store everything
    current_episodes = models.IntegerField(default = 0)
    last_episode = models.DateTimeField(default = datetime.datetime(1970, 1, 1))
    parent_archive = models.ForeignKey(Archive)
    class Meta:
        app_label = 'archiver'
    def _get_episode_time(episode):
        """
        Get a datetime.datetime object of a podcast episode's published time.
        Expects a specific element from feed_object.entries.
        """
        t = time.mktime(episode.published_parsed)
        return datetime.datetime.fromtimestamp(t)
    def _calculate_new_episodes(feed_object):
        """
        Calculate how many new episodes there are of a podcast (and consequently
        how many we need to remove).
        """
        num_episodes = 0
        #feed_object.entries starts at the most recent
        for episode in feed_object.entries:
            if _get_episode_time(episode) > last_episode:
                num_episodes += 1
            #Don't set ourselves up to download any more than max_episodes
            if num_episodes > max_episodes and max_episodes > 0:
                return num_episodes
        return num_episodes
    def _download_podcast(feed_object, num_episodes = -1):
        """
        Update this podcast with episodes from the server copy. The feed_object is a reference to a
        feedparser object so we don't have to redownload a feed multiple times.
        """
        num_episodes = _calculate_new_episodes()
        #feedparser-specific way of building the list
        new_episodes = feed_object.entries[:num_episodes]
        for episode in new_episodes:
            episode_audio_links = [link for link in episodes['links']
                                            if link['type'] in _audio_type_mime_types_string]
            print episode_audio_links
-	def sync_podcast(dry_run = False, forbid_delete = False):
+    def sync_podcast(dry_run = False, forbid_delete = False):
-		"""
+        """
-		Update the podcast with episodes from the server copy. If dry_run, don't actually download episodes,
+        Update the podcast with episodes from the server copy.
-		but show what changes would have been made (implies forbid_delete). If forbid_delete, download all new
+
-		episodes, ignoring the max_episodes count.
+		:param dry_run: Calculate what would have been downloaded or deleted, but do not actually do either.
-		"""
+		:param forbid_delete: Run, and only download new episodes. Ignores the :data:`max_episodes` field for this podcast.
-		pass
+
        """
        pass
--- a/archiver/models/listfield.py
+++ b/archiver/models/listfield.py
@ -1,56 +0,0 @@
 from django.db import models
 import re, itertools
 class IntegerListField(models.TextField):
 	class Meta:
 		app_label = 'archiver'
 	"""
 	Store a list of integers in a database string.
 	Format is: 
 	[<int_1>, <int_2>, <int_3>, ... , <int_n>]
 	"""
 	description = "Field type for storing lists of integers."
 	__metaclass__ = models.SubfieldBase
 	def __init__(self, *args, **kwargs):
 		super(IntegerListField, self).__init__(*args, **kwargs)
 	#Convert database to python
 	def to_python(self, value):
 		if isinstance(value, list):
 			return value
 		#Process a database string
 		#Validation first
 		if len(value) <= 0:
 			return []
 		if value[0] != '[' or value[-1] != ']':
 			raise ValidationError("Invalid input to parse a list of integers!")
 		#Note that any non-digit string is a valid separator
 		_csv_regex = "[0-9]"
 		csv_regex  = re.compile(_csv_regex)
 		#Synonymous to:
 		#string_list = filter(None, csv_regex.findall(value))
 		string_list  = itertools.ifilter(None, csv_regex.findall(value))
 		value_list   = [int(i) for i in string_list]
 		return value_list
 	#Convert python to database
 	def get_prep_value(self, value):
 		if not isinstance(value, list):
 			raise ValidationError("Invalid list given to put in database!")
 		separator_string = ", "
 		list_elements = separator_string.join(map(str, value))
 		return "[" + list_elements + "]"
--- a/archiver/models/playlist.py
+++ b/archiver/models/playlist.py
@ -1,3 +1,19 @@
 """
 .. module:: archiver.models
 Playlist model
 Each playlist is a high-level ordering of songs. There really isn't much to a playlist - just its name, and the songs inside it.
 However, we need to have a way to guarantee song order, in addition to re-ordering. A ManyToMany field can't do this.
 As such, a custom IntegerListField is implemented - it takes a python list of ints, converts it to a text field in the DB,
 and then back to a python list. This way, we can guarantee order, and have a song appear multiple times.
 The IntegerListField itself uses the ID of each song as the int in a list. For example, a list of:
   [1, 3, 5, 17]
 Means that the playlist is made up of four songs. The order of the playlist is the song with index 1, 3, 5, and 17.
 Additionally, the ManyToMany field is included to make sure we don't use the global Songs manager - it just seems hackish.
 """
 from django.db import models
 from django.core.exceptions import ObjectDoesNotExist
@ -7,19 +23,6 @@ from listfield import IntegerListField
 import re
 from warnings import warn
 """
 Playlist model
 Each playlist is a high-level ordering of songs. There really isn't much to a playlist - just its name, and the songs inside it.
 However, we need to have a way to guarantee song order, in addition to re-ordering. A ManyToMany field can't do this.
 As such, a custom IntegerListField is implemented - it takes a python list of ints, converts it to a text field in the DB,
 and then back to a python list. This way, we can guarantee order, and have a song appear multiple times.
 The IntegerListField itself uses the ID of each song as the int in a list. For example, a list of:
 	[1, 3, 5, 17]
 Means that the playlist is made up of four songs. The order of the playlist is the song with index 1, 3, 5, and 17.
 Additionally, the ManyToMany field is included to make sure we don't use the global Songs manager - it just seems hackish.
 """
 class Playlist(models.Model):
 	class Meta:
 		app_label = 'archiver'