Add documentation for the Archive and Feed models

master
Bradlee Speice 2013-05-09 22:29:12 -04:00
parent fb74ceaa1d
commit a515a4b8d3
6 changed files with 316 additions and 281 deletions

View File

@ -1,5 +1,7 @@
''' '''
.. py:currentmodule:: .. moduleauthor:: Bradlee Speice <bspeice.nc@gmail.com>
:synopsis: Archiving back-end for the Melodia system. The ``archiver`` application is responsible for all of the backend operations in Melodia. Its purpose is to provide an easy, Pythonic API to any other applications that want to use it. Some features include:
* Multiple archive location support
* Automatic backup of archives using rsync
''' '''

View File

@ -1,3 +1,10 @@
'''
.. currentmodule:: archiver.models
I'm trying to link to :class:`~archiver.models.archive.Archive`!
'''
# Create your models here. # Create your models here.
from archive import Archive from archive import Archive
from song import Song from song import Song

View File

@ -1,188 +1,235 @@
from django.db import models """
.. module:: archiver.models.archive
This is the Archive model for the backend of Melodia. It's functionality is to
provide a grouping of songs based on where they are located in the filesystem.
It controls the high-level functionality of managing multiple archives
of music - basically, multiple filesystem folders holding your music.
""" """
This is the archive model for the archiving backend of Melodia.
It's purpose is to control the high-level functionality of managing from django.db import models
multiple archives of music. It is different from a playlist both conceptually from django.core.exceptions import ObjectDoesNotExist
and practically - an archive describes a group of files, while a playlist
describes a group of songs. import datetime
In this way, you back up archives of music - you don't back up the songs in a import re, os
playlist. Additionally, you may want to re-organize your music to use a from itertools import ifilter
cleaner directory structure - a playlist doesn't care about this.
""" from Melodia.melodia_settings import SUPPORTED_AUDIO_EXTENSIONS
from Melodia.melodia_settings import HASH_FUNCTION as hash
class Archive (models.Model): class Archive (models.Model):
class Meta: """
app_label = 'archiver' .. data:: name
""" String human-readable name of this archive -- ex. ``Steve's Music``
The archive model itself, and all functions used to interact with it.
The archive is built up from a grouping of songs, and the functions
that are used to interact with many songs at a single time. The archive
for example allows you to re-organize a specific set of music files into
a cleaner directory structure.
The archive is given a folder to use as its root directory - it finds all
music files under there, and takes control of them from there.
"""
import datetime .. data:: root_folder
name = models.CharField(max_length = 64) String containing the root folder of this archive. Should not be
modified once the archive has been created.
#Note that we're not using FilePathField since this is actually a folder .. data:: backup_location
root_folder = models.CharField(max_length = 255)
#We've removed the reference to "songs" - instead define it as a ForeignKey, String for the rsync-readable location that this archive should
#and do lookups via song_set be backed up to. Can be modified if you need to change the location.
#Backup settings .. data:: backup_frequency
backup_location = models.CharField(max_length = 255, default = None, null = True)
backup_frequency = models.IntegerField(default = 604800) #1 week in seconds
last_backup = models.DateTimeField(default = datetime.datetime.now) #Note that this by default will be the time the archive was instantiated
def _scan_filesystem(self): Integer time in minutes that should be between backups of this archive.
"Scan the archive's root filesystem and add any new songs without adding metadata, delete songs that exist no more" This should not be blank, if you want to disable backups, set the
#This method is implemented since the other scan methods all need to use the same code location to being blank.
#DRY FTW
import re, os, itertools
from django.core.exceptions import ObjectDoesNotExist
from Melodia.melodia_settings import SUPPORTED_AUDIO_EXTENSIONS
from Melodia.melodia_settings import HASH_FUNCTION as hash
_regex = '|'.join(( '.*' + ext + '$' for ext in SUPPORTED_AUDIO_EXTENSIONS)) .. data:: last_backup
regex = re.compile(_regex, re.IGNORECASE)
#It's hackish, but far fewer transactions to delete everything first, and add it all back. DateTime object that records when the last **successful** backup was run.
#If we get interrupted, just re-run it. Don't touch this.
song_set.all().delete() """
#Add new songs name = models.CharField(max_length = 64)
for dirname, dirnames, filenames in os.walk(self.root_folder):
#For each filename that is supported
for filename in itertools.ifilter(lambda filename: re.match(regex, filename), filenames):
rel_url = os.path.join(dirname, filename)
full_url = os.path.abspath(rel_url)
new_song = Song(url = full_url)
new_song.save()
song_set.add(new_song)
def _update_song_metadata(self, use_echonest = False, progress_callback = lambda x, y: None): #Note that we're not using FilePathField since this is actually a folder
"""Scan every song in this archive (database only) and make sure all songs are correct root_folder = models.CharField(max_length = 512)
The progress_callback function is called with the current song being operated on first, and the total songs second."""
#This method operates only on the songs that are in the database - if you need to make
#sure that new songs are added, use the _scan_filesystem() method in addition
total_songs = song_set.count()
for index, song in enumerate(song_set.all()): #We've removed the reference to "songs" - instead define it as a ForeignKey,
song.populate_metadata(use_echonest = use_echonest) #and do lookups via song_set
song.save()
progress_callback(index + 1, total_songs)
def _needs_backup(self): #Backup settings
"Check if the current archive is due for a backup" backup_location = models.CharField(max_length = 255, default = None, null = True)
import datetime backup_frequency = models.IntegerField(default = 10800) #1 week in minutes
last_backup = models.DateTimeField(default = datetime.datetime.now) #Note that this by default will be the time the archive was instantiated
prev_backup_time = self.last_backup class Meta:
current_time = datetime.datetime.now() app_label = 'archiver'
delta = current_time - prev_backup_time def _scan_filesystem(self):
if delta > datetime.timedelta(seconds = self.backup_frequency): """
return True Scan the archive's root filesystem and add any new songs without adding
else: metadata, delete songs that exist no more.
return False .. todo::
This should be fixed so that we don't drop all songs and re-add
them. That's just terrible design.
"""
#This method is implemented since the other scan methods all need to
#use the same code. DRY FTW
_supported_extns_regex = '|'.join(( '.*' + ext + '$' for ext
in SUPPORTED_AUDIO_EXTENSIONS))
regex = re.compile(_supported_extns_regex, re.IGNORECASE)
def quick_scan(self): #It's hackish, but far fewer transactions to delete everything first,
"Scan this archive's root folder and make sure that all songs are in the database." #and add it all back. If we get interrupted, just re-run it.
#This is a quick scan - only validate whether or not songs should exist in the database song_set.all().delete()
self._scan_filesystem() #For each filename that is supported
for filename in ifilter(lambda filename: re.match(regex, filename), filenames):
rel_url = os.path.join(dirname, filename)
full_url = os.path.abspath(rel_url)
new_song = Song(url = full_url)
new_song.save()
song_set.add(new_song)
def scan(self): def _update_song_metadata(self, progress_callback = lambda x, y: None):
"Scan this archive's root folder and make sure any local metadata are correct." """
#This is a longer scan - validate whether songs should exist, and use local data to update Scan every song in this archive (database only) and make sure all
#the database songs are correct. The progress_callback function is called with the
current song being operated on first, and the total songs second.
self._scan_filesystem() :param progess_callback: Function called to give progress. First
self._update_song_metadata() argument is an integer for the song currently in progress, second
argument is the total number of songs to be operated on.
"""
total_songs = song_set.count()
def deep_scan(self): for index, song in enumerate(song_set.all()):
"Scan this archive's root folder and make sure that all songs are in the database, and use EchoNest to update metadata as necessary" song.populate_metadata()
#This is a very long scan - validate whether songs should exist, and use Echonest to make sure song.save()
#that metadata is as accurate as possible. progress_callback(index + 1, total_songs)
self._scan_filesystem()
self._update_song_metadata(use_echonest = True)
def _needs_backup(self):
def run_backup(self, force_backup = False): "Check if the current archive is due for a backup"
"Backup the current archive" import datetime
if force_backup or self._needs_backup():
import subprocess
subprocess.call(['rsync', '-av', self.root_folder, self.backup_location])
def reorganize(self, format_string, progress_function = lambda w, x, y, z: None, dry_run = False): prev_backup_time = self.last_backup
"""Reorganize a music archive using a specified format string. current_time = datetime.datetime.now()
Recognized escape characters:
%a - Artist Name %A - Album Name
%d - Disc Number %e - Number of discs
%f - Current Filename (with extension) %g - Current Filename (no extension)
%n - Track Number %o - Number of tracks on disc
%y - Album year
Note that all organization takes place relative to the archive's root folder. delta = current_time - prev_backup_time
The progress_function is called with the current song number as its first argument, total songs as its second, if delta > datetime.timedelta(seconds = self.backup_frequency):
current song URL as the third argument, and new URL as the fourth. return True
""" else:
import os, shutil, errno return False
total_songs = song_set.count() def quick_scan(self):
"""
Scan this archive's root folder, add or remove songs from the DB
as necessary.
"""
self._scan_filesystem()
for index, song in enumerate(song_set.all()): def scan(self):
_current_filename = os.path.basename(song.url) """
_current_filename_no_extension = os.path.splitext(_current_filename)[0] Like :func:`quick_scan` but makes sure all metadata is current.
"""
#This is a longer scan - validate whether songs should exist, and use local data to update
#the database
_release_year = song.release_date.year self._scan_filesystem()
self._update_song_metadata()
new_location = format_string.replace("%a", song.artist)\ def run_backup(self, force_backup = False):
.replace("%A", song.album)\ """
.replace("%d", str(song.disc_number))\ Backup the current archive
.replace("%e", str(song.disc_total))\
.replace("%f", _current_filename)\
.replace("%g", _current_filename_no_extension)\
.replace("%n", str(song.track_number))\
.replace("%o", str(song.track_total))\
.replace("%y", str(_release_year))
new_url = os.path.join(self.root_folder, new_location) :param force_backup: Boolean value, if `True` will ensure backup runs.
"""
if force_backup or self._needs_backup():
import subprocess
subprocess.call(['rsync', '-av', self.root_folder, self.backup_location])
progress_function(index + 1, total_songs, song.url, new_url) def reorganize(self, format_string,
progress_function = lambda w, x, y, z: None,
dry_run = False):
"""
Reorganize a music archive using a given `format_string`. Recognized
escape characters are below:
if not dry_run: .. table::
new_folder = os.path.dirname(new_url)
try:
#`mkdir -p` functionality
if not os.path.isdir(new_folder):
os.makedirs(new_folder)
#Safely copy the file - don't 'move' it, but do a full 'copy' 'rm' ========== ==============
#This way if the process is ever interrupted, we have an unaltered copy Character: Replaced with:
#of the file. ========== ==============
shutil.copyfile(song.url, new_url) %a Artist Name
shutil.copystat(song.url, new_url) %A Album Name
%d Disc Number
%e Number of discs
%f Current Filename (with extension)
%g Current Filename (no extension)
%n Track Number
%o Number of tracks on disc
%y Album year
========== ==============
#Notify the database about the new URL All re-organization takes place relative to the archive's
old_url = song.url :data:`root_folder`.
song.url = new_url
song.save()
#Actually remove the file since all references to the original location have been removed :param format_string: String describing how each song should be re-organized
os.remove(old_url) :param progress_function: Optional function to get current progress - see notes below.
:param dry_run: Boolean, if `True` will do everything except move files
except OSError as exc: The progress_function is called with the current song number as its first argument, total songs as its second,
if exc.errno == errno.EEXIST and os.path.isdir(new_folder): current song URL as the third argument, and new URL as the fourth.
#This is safe to skip - makedirs() is complaining about a folder already existing """
pass import os, shutil, errno
else: raise
except IOError as exc: total_songs = song_set.count()
#shutil error - likely that folders weren't specified correctly
for index, song in enumerate(song_set.all()):
_current_filename = os.path.basename(song.url)
_current_filename_no_extension = os.path.splitext(_current_filename)[0]
_release_year = song.release_date.year
new_location = format_string.replace("%a", song.artist)\
.replace("%A", song.album)\
.replace("%d", str(song.disc_number))\
.replace("%e", str(song.disc_total))\
.replace("%f", _current_filename)\
.replace("%g", _current_filename_no_extension)\
.replace("%n", str(song.track_number))\
.replace("%o", str(song.track_total))\
.replace("%y", str(_release_year))
new_url = os.path.join(self.root_folder, new_location)
progress_function(index + 1, total_songs, song.url, new_url)
if not dry_run:
new_folder = os.path.dirname(new_url)
try:
#`mkdir -p` functionality
if not os.path.isdir(new_folder):
os.makedirs(new_folder)
#Safely copy the file - don't 'move' it, but do a full 'copy' 'rm'
#This way if the process is ever interrupted, we have an unaltered copy
#of the file.
shutil.copyfile(song.url, new_url)
shutil.copystat(song.url, new_url)
#Notify the database about the new URL
old_url = song.url
song.url = new_url
song.save()
#Actually remove the file since all references to the original location have been removed
os.remove(old_url)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(new_folder):
#This is safe to skip - makedirs() is complaining about a folder already existing
pass
else: raise
except IOError as exc:
#shutil error - likely that folders weren't specified correctly raise
raise raise

View File

@ -1,85 +1,117 @@
"""
The Feed model describes a podcast of anything that can be parsed by :mod:`feedparser`.
Most of the heavy lifting is done via :mod:`feedparser`, we just download the
podcast files.
"""
from django.db import models from django.db import models
import datetime, time import datetime, time
import feedparser import feedparser
from archive import Archive from archive import Archive
# What mime types should be downloaded from the podcast XML
"""
The "Feed" model describes a podcast feed using any of RSS, Atom, etc.
Backend handling is processed by 'feedparser', we just download all the podcast files,
control how many are stored, etc. The feed is intended to belong to an archive -
this way the feed is backed up automatically (and we don't have the podcast spewing
files everywhere).
It is important to note - the "max_episodes" field regulates how many episodes are
stored and backed up. A value < 1 indicates storing all episodes.
"""
_audio_type_mime_types = [ _audio_type_mime_types = [
u'audio/mpeg' u'audio/mpeg'
] ]
_audio_type_mime_types_string = "\n".join(_audio_type_mime_types) _audio_type_mime_types_string = "\n".join(_audio_type_mime_types)
class Feed(models.Model): class Feed(models.Model):
class Meta: """
app_label = 'archiver' .. data:: url
String representation of The URL from which the podcast
file should be downloaded.
url = models.URLField() .. data:: name
name = models.CharField(max_length = 64)
max_episodes = models.IntegerField(default = 0) # Default store everything Human-readable string for this podcast. This is set by the user, not by
current_episodes = models.IntegerField(default = 0) the XML podcast name. Is the name for the folder in which this podcast
last_episode = models.DateTimeField(default = datetime.datetime(1970, 1, 1)) is stored.
parent_archive = models.ForeignKey(Archive)
def _get_episode_time(episode): .. data:: max_episodes
"""
Get a datetime.datetime object of a podcast episode's published time.
Expects a specific element from feed_object.entries.
"""
t = time.mktime(episode.published_parsed)
return datetime.datetime.fromtimestamp(t)
def _calculate_new_episodes(feed_object): Integer for how many fields should be stored at a time. A value of ``0``
""" (or ``< 0``) indicates that all episodes should be stored. A positive
Calculate how many new episodes there are of a podcast (and consequently value controls how many episodes are stored at a time.
how many we need to remove).
"""
num_episodes = 0
#feed_object.entries starts at the most recent .. data:: current_episodes
for episode in feed_object.entries:
if _get_episode_time(episode) > last_episode:
num_episodes += 1
#Don't set ourselves up to download any more than max_episodes Integer for how many episodes are currently stored locally. This will
if num_episodes > max_episodes and max_episodes > 0: be deprecated, as it can be calculated.
return num_episodes
return num_episodes .. data:: last_episode
DateTime object for the date of the most recent file downloaded. This
should not be modified by anything outside this model.
def _download_podcast(feed_object, num_episodes = -1): .. data:: parent_archive
"""
Update this podcast with episodes from the server copy. The feed_object is a reference to a
feedparser object so we don't have to redownload a feed multiple times.
"""
num_episodes = _calculate_new_episodes() Reference to the :class:`Archive` this podcast belongs to. Informs the
feed where it should store its files at.
"""
#feedparser-specific way of building the list url = models.URLField()
new_episodes = feed_object.entries[:num_episodes] name = models.CharField(max_length = 64)
max_episodes = models.IntegerField(default = 0) # Default store everything
current_episodes = models.IntegerField(default = 0)
last_episode = models.DateTimeField(default = datetime.datetime(1970, 1, 1))
parent_archive = models.ForeignKey(Archive)
for episode in new_episodes: class Meta:
episode_audio_links = [link for link in episodes['links'] app_label = 'archiver'
if link['type'] in _audio_type_mime_types_string]
print episode_audio_links def _get_episode_time(episode):
"""
Get a datetime.datetime object of a podcast episode's published time.
Expects a specific element from feed_object.entries.
"""
t = time.mktime(episode.published_parsed)
return datetime.datetime.fromtimestamp(t)
def _calculate_new_episodes(feed_object):
def sync_podcast(dry_run = False, forbid_delete = False): """
""" Calculate how many new episodes there are of a podcast (and consequently
Update the podcast with episodes from the server copy. If dry_run, don't actually download episodes, how many we need to remove).
but show what changes would have been made (implies forbid_delete). If forbid_delete, download all new """
episodes, ignoring the max_episodes count. num_episodes = 0
"""
pass #feed_object.entries starts at the most recent
for episode in feed_object.entries:
if _get_episode_time(episode) > last_episode:
num_episodes += 1
#Don't set ourselves up to download any more than max_episodes
if num_episodes > max_episodes and max_episodes > 0:
return num_episodes
return num_episodes
def _download_podcast(feed_object, num_episodes = -1):
"""
Update this podcast with episodes from the server copy. The feed_object is a reference to a
feedparser object so we don't have to redownload a feed multiple times.
"""
num_episodes = _calculate_new_episodes()
#feedparser-specific way of building the list
new_episodes = feed_object.entries[:num_episodes]
for episode in new_episodes:
episode_audio_links = [link for link in episodes['links']
if link['type'] in _audio_type_mime_types_string]
print episode_audio_links
def sync_podcast(dry_run = False, forbid_delete = False):
"""
Update the podcast with episodes from the server copy.
:param dry_run: Calculate what would have been downloaded or deleted, but do not actually do either.
:param forbid_delete: Run, and only download new episodes. Ignores the :data:`max_episodes` field for this podcast.
"""
pass

View File

@ -1,56 +0,0 @@
from django.db import models
import re, itertools
class IntegerListField(models.TextField):
class Meta:
app_label = 'archiver'
"""
Store a list of integers in a database string.
Format is:
[<int_1>, <int_2>, <int_3>, ... , <int_n>]
"""
description = "Field type for storing lists of integers."
__metaclass__ = models.SubfieldBase
def __init__(self, *args, **kwargs):
super(IntegerListField, self).__init__(*args, **kwargs)
#Convert database to python
def to_python(self, value):
if isinstance(value, list):
return value
#Process a database string
#Validation first
if len(value) <= 0:
return []
if value[0] != '[' or value[-1] != ']':
raise ValidationError("Invalid input to parse a list of integers!")
#Note that any non-digit string is a valid separator
_csv_regex = "[0-9]"
csv_regex = re.compile(_csv_regex)
#Synonymous to:
#string_list = filter(None, csv_regex.findall(value))
string_list = itertools.ifilter(None, csv_regex.findall(value))
value_list = [int(i) for i in string_list]
return value_list
#Convert python to database
def get_prep_value(self, value):
if not isinstance(value, list):
raise ValidationError("Invalid list given to put in database!")
separator_string = ", "
list_elements = separator_string.join(map(str, value))
return "[" + list_elements + "]"

View File

@ -1,3 +1,19 @@
"""
.. module:: archiver.models
Playlist model
Each playlist is a high-level ordering of songs. There really isn't much to a playlist - just its name, and the songs inside it.
However, we need to have a way to guarantee song order, in addition to re-ordering. A ManyToMany field can't do this.
As such, a custom IntegerListField is implemented - it takes a python list of ints, converts it to a text field in the DB,
and then back to a python list. This way, we can guarantee order, and have a song appear multiple times.
The IntegerListField itself uses the ID of each song as the int in a list. For example, a list of:
[1, 3, 5, 17]
Means that the playlist is made up of four songs. The order of the playlist is the song with index 1, 3, 5, and 17.
Additionally, the ManyToMany field is included to make sure we don't use the global Songs manager - it just seems hackish.
"""
from django.db import models from django.db import models
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
@ -7,19 +23,6 @@ from listfield import IntegerListField
import re import re
from warnings import warn from warnings import warn
"""
Playlist model
Each playlist is a high-level ordering of songs. There really isn't much to a playlist - just its name, and the songs inside it.
However, we need to have a way to guarantee song order, in addition to re-ordering. A ManyToMany field can't do this.
As such, a custom IntegerListField is implemented - it takes a python list of ints, converts it to a text field in the DB,
and then back to a python list. This way, we can guarantee order, and have a song appear multiple times.
The IntegerListField itself uses the ID of each song as the int in a list. For example, a list of:
[1, 3, 5, 17]
Means that the playlist is made up of four songs. The order of the playlist is the song with index 1, 3, 5, and 17.
Additionally, the ManyToMany field is included to make sure we don't use the global Songs manager - it just seems hackish.
"""
class Playlist(models.Model): class Playlist(models.Model):
class Meta: class Meta:
app_label = 'archiver' app_label = 'archiver'