Add documentation for the Archive and Feed models

master
Bradlee Speice 2013-05-09 22:29:12 -04:00
parent fb74ceaa1d
commit a515a4b8d3
6 changed files with 316 additions and 281 deletions

View File

@ -1,5 +1,7 @@
'''
.. py:currentmodule::
.. moduleauthor:: Bradlee Speice <bspeice.nc@gmail.com>
:synopsis: Archiving back-end for the Melodia system.
The ``archiver`` application is responsible for all of the backend operations in Melodia. Its purpose is to provide an easy, Pythonic API to any other applications that want to use it. Some features include:
* Multiple archive location support
* Automatic backup of archives using rsync
'''

View File

@ -1,3 +1,10 @@
'''
.. currentmodule:: archiver.models
I'm trying to link to :class:`~archiver.models.archive.Archive`!
'''
# Create your models here.
from archive import Archive
from song import Song

View File

@ -1,188 +1,235 @@
from django.db import models
"""
.. module:: archiver.models.archive
This is the Archive model for the backend of Melodia. It's functionality is to
provide a grouping of songs based on where they are located in the filesystem.
It controls the high-level functionality of managing multiple archives
of music - basically, multiple filesystem folders holding your music.
"""
This is the archive model for the archiving backend of Melodia.
It's purpose is to control the high-level functionality of managing
multiple archives of music. It is different from a playlist both conceptually
and practically - an archive describes a group of files, while a playlist
describes a group of songs.
In this way, you back up archives of music - you don't back up the songs in a
playlist. Additionally, you may want to re-organize your music to use a
cleaner directory structure - a playlist doesn't care about this.
"""
from django.db import models
from django.core.exceptions import ObjectDoesNotExist
import datetime
import re, os
from itertools import ifilter
from Melodia.melodia_settings import SUPPORTED_AUDIO_EXTENSIONS
from Melodia.melodia_settings import HASH_FUNCTION as hash
class Archive (models.Model):
class Meta:
app_label = 'archiver'
"""
.. data:: name
"""
The archive model itself, and all functions used to interact with it.
The archive is built up from a grouping of songs, and the functions
that are used to interact with many songs at a single time. The archive
for example allows you to re-organize a specific set of music files into
a cleaner directory structure.
The archive is given a folder to use as its root directory - it finds all
music files under there, and takes control of them from there.
"""
String human-readable name of this archive -- ex. ``Steve's Music``
import datetime
.. data:: root_folder
name = models.CharField(max_length = 64)
String containing the root folder of this archive. Should not be
modified once the archive has been created.
#Note that we're not using FilePathField since this is actually a folder
root_folder = models.CharField(max_length = 255)
.. data:: backup_location
#We've removed the reference to "songs" - instead define it as a ForeignKey,
#and do lookups via song_set
String for the rsync-readable location that this archive should
be backed up to. Can be modified if you need to change the location.
#Backup settings
backup_location = models.CharField(max_length = 255, default = None, null = True)
backup_frequency = models.IntegerField(default = 604800) #1 week in seconds
last_backup = models.DateTimeField(default = datetime.datetime.now) #Note that this by default will be the time the archive was instantiated
.. data:: backup_frequency
def _scan_filesystem(self):
"Scan the archive's root filesystem and add any new songs without adding metadata, delete songs that exist no more"
#This method is implemented since the other scan methods all need to use the same code
#DRY FTW
import re, os, itertools
from django.core.exceptions import ObjectDoesNotExist
from Melodia.melodia_settings import SUPPORTED_AUDIO_EXTENSIONS
from Melodia.melodia_settings import HASH_FUNCTION as hash
Integer time in minutes that should be between backups of this archive.
This should not be blank, if you want to disable backups, set the
location to being blank.
_regex = '|'.join(( '.*' + ext + '$' for ext in SUPPORTED_AUDIO_EXTENSIONS))
regex = re.compile(_regex, re.IGNORECASE)
.. data:: last_backup
#It's hackish, but far fewer transactions to delete everything first, and add it all back.
#If we get interrupted, just re-run it.
song_set.all().delete()
DateTime object that records when the last **successful** backup was run.
Don't touch this.
"""
#Add new songs
for dirname, dirnames, filenames in os.walk(self.root_folder):
#For each filename that is supported
for filename in itertools.ifilter(lambda filename: re.match(regex, filename), filenames):
rel_url = os.path.join(dirname, filename)
full_url = os.path.abspath(rel_url)
new_song = Song(url = full_url)
new_song.save()
song_set.add(new_song)
name = models.CharField(max_length = 64)
def _update_song_metadata(self, use_echonest = False, progress_callback = lambda x, y: None):
"""Scan every song in this archive (database only) and make sure all songs are correct
The progress_callback function is called with the current song being operated on first, and the total songs second."""
#This method operates only on the songs that are in the database - if you need to make
#sure that new songs are added, use the _scan_filesystem() method in addition
total_songs = song_set.count()
#Note that we're not using FilePathField since this is actually a folder
root_folder = models.CharField(max_length = 512)
for index, song in enumerate(song_set.all()):
song.populate_metadata(use_echonest = use_echonest)
song.save()
progress_callback(index + 1, total_songs)
#We've removed the reference to "songs" - instead define it as a ForeignKey,
#and do lookups via song_set
def _needs_backup(self):
"Check if the current archive is due for a backup"
import datetime
#Backup settings
backup_location = models.CharField(max_length = 255, default = None, null = True)
backup_frequency = models.IntegerField(default = 10800) #1 week in minutes
last_backup = models.DateTimeField(default = datetime.datetime.now) #Note that this by default will be the time the archive was instantiated
prev_backup_time = self.last_backup
current_time = datetime.datetime.now()
class Meta:
app_label = 'archiver'
delta = current_time - prev_backup_time
if delta > datetime.timedelta(seconds = self.backup_frequency):
return True
else:
return False
def _scan_filesystem(self):
"""
Scan the archive's root filesystem and add any new songs without adding
metadata, delete songs that exist no more.
.. todo::
This should be fixed so that we don't drop all songs and re-add
them. That's just terrible design.
"""
#This method is implemented since the other scan methods all need to
#use the same code. DRY FTW
_supported_extns_regex = '|'.join(( '.*' + ext + '$' for ext
in SUPPORTED_AUDIO_EXTENSIONS))
regex = re.compile(_supported_extns_regex, re.IGNORECASE)
def quick_scan(self):
"Scan this archive's root folder and make sure that all songs are in the database."
#This is a quick scan - only validate whether or not songs should exist in the database
#It's hackish, but far fewer transactions to delete everything first,
#and add it all back. If we get interrupted, just re-run it.
song_set.all().delete()
self._scan_filesystem()
#For each filename that is supported
for filename in ifilter(lambda filename: re.match(regex, filename), filenames):
rel_url = os.path.join(dirname, filename)
full_url = os.path.abspath(rel_url)
new_song = Song(url = full_url)
new_song.save()
song_set.add(new_song)
def scan(self):
"Scan this archive's root folder and make sure any local metadata are correct."
#This is a longer scan - validate whether songs should exist, and use local data to update
#the database
def _update_song_metadata(self, progress_callback = lambda x, y: None):
"""
Scan every song in this archive (database only) and make sure all
songs are correct. The progress_callback function is called with the
current song being operated on first, and the total songs second.
self._scan_filesystem()
self._update_song_metadata()
:param progess_callback: Function called to give progress. First
argument is an integer for the song currently in progress, second
argument is the total number of songs to be operated on.
"""
total_songs = song_set.count()
def deep_scan(self):
"Scan this archive's root folder and make sure that all songs are in the database, and use EchoNest to update metadata as necessary"
#This is a very long scan - validate whether songs should exist, and use Echonest to make sure
#that metadata is as accurate as possible.
self._scan_filesystem()
self._update_song_metadata(use_echonest = True)
for index, song in enumerate(song_set.all()):
song.populate_metadata()
song.save()
progress_callback(index + 1, total_songs)
def run_backup(self, force_backup = False):
"Backup the current archive"
if force_backup or self._needs_backup():
import subprocess
subprocess.call(['rsync', '-av', self.root_folder, self.backup_location])
def _needs_backup(self):
"Check if the current archive is due for a backup"
import datetime
def reorganize(self, format_string, progress_function = lambda w, x, y, z: None, dry_run = False):
"""Reorganize a music archive using a specified format string.
Recognized escape characters:
%a - Artist Name %A - Album Name
%d - Disc Number %e - Number of discs
%f - Current Filename (with extension) %g - Current Filename (no extension)
%n - Track Number %o - Number of tracks on disc
%y - Album year
prev_backup_time = self.last_backup
current_time = datetime.datetime.now()
Note that all organization takes place relative to the archive's root folder.
The progress_function is called with the current song number as its first argument, total songs as its second,
current song URL as the third argument, and new URL as the fourth.
"""
import os, shutil, errno
delta = current_time - prev_backup_time
if delta > datetime.timedelta(seconds = self.backup_frequency):
return True
else:
return False
total_songs = song_set.count()
def quick_scan(self):
"""
Scan this archive's root folder, add or remove songs from the DB
as necessary.
"""
self._scan_filesystem()
for index, song in enumerate(song_set.all()):
_current_filename = os.path.basename(song.url)
_current_filename_no_extension = os.path.splitext(_current_filename)[0]
def scan(self):
"""
Like :func:`quick_scan` but makes sure all metadata is current.
"""
#This is a longer scan - validate whether songs should exist, and use local data to update
#the database
_release_year = song.release_date.year
self._scan_filesystem()
self._update_song_metadata()
new_location = format_string.replace("%a", song.artist)\
.replace("%A", song.album)\
.replace("%d", str(song.disc_number))\
.replace("%e", str(song.disc_total))\
.replace("%f", _current_filename)\
.replace("%g", _current_filename_no_extension)\
.replace("%n", str(song.track_number))\
.replace("%o", str(song.track_total))\
.replace("%y", str(_release_year))
def run_backup(self, force_backup = False):
"""
Backup the current archive
new_url = os.path.join(self.root_folder, new_location)
:param force_backup: Boolean value, if `True` will ensure backup runs.
"""
if force_backup or self._needs_backup():
import subprocess
subprocess.call(['rsync', '-av', self.root_folder, self.backup_location])
progress_function(index + 1, total_songs, song.url, new_url)
def reorganize(self, format_string,
progress_function = lambda w, x, y, z: None,
dry_run = False):
"""
Reorganize a music archive using a given `format_string`. Recognized
escape characters are below:
if not dry_run:
new_folder = os.path.dirname(new_url)
try:
#`mkdir -p` functionality
if not os.path.isdir(new_folder):
os.makedirs(new_folder)
.. table::
#Safely copy the file - don't 'move' it, but do a full 'copy' 'rm'
#This way if the process is ever interrupted, we have an unaltered copy
#of the file.
shutil.copyfile(song.url, new_url)
shutil.copystat(song.url, new_url)
========== ==============
Character: Replaced with:
========== ==============
%a Artist Name
%A Album Name
%d Disc Number
%e Number of discs
%f Current Filename (with extension)
%g Current Filename (no extension)
%n Track Number
%o Number of tracks on disc
%y Album year
========== ==============
#Notify the database about the new URL
old_url = song.url
song.url = new_url
song.save()
All re-organization takes place relative to the archive's
:data:`root_folder`.
#Actually remove the file since all references to the original location have been removed
os.remove(old_url)
:param format_string: String describing how each song should be re-organized
:param progress_function: Optional function to get current progress - see notes below.
:param dry_run: Boolean, if `True` will do everything except move files
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(new_folder):
#This is safe to skip - makedirs() is complaining about a folder already existing
pass
else: raise
The progress_function is called with the current song number as its first argument, total songs as its second,
current song URL as the third argument, and new URL as the fourth.
"""
import os, shutil, errno
except IOError as exc:
#shutil error - likely that folders weren't specified correctly
total_songs = song_set.count()
for index, song in enumerate(song_set.all()):
_current_filename = os.path.basename(song.url)
_current_filename_no_extension = os.path.splitext(_current_filename)[0]
_release_year = song.release_date.year
new_location = format_string.replace("%a", song.artist)\
.replace("%A", song.album)\
.replace("%d", str(song.disc_number))\
.replace("%e", str(song.disc_total))\
.replace("%f", _current_filename)\
.replace("%g", _current_filename_no_extension)\
.replace("%n", str(song.track_number))\
.replace("%o", str(song.track_total))\
.replace("%y", str(_release_year))
new_url = os.path.join(self.root_folder, new_location)
progress_function(index + 1, total_songs, song.url, new_url)
if not dry_run:
new_folder = os.path.dirname(new_url)
try:
#`mkdir -p` functionality
if not os.path.isdir(new_folder):
os.makedirs(new_folder)
#Safely copy the file - don't 'move' it, but do a full 'copy' 'rm'
#This way if the process is ever interrupted, we have an unaltered copy
#of the file.
shutil.copyfile(song.url, new_url)
shutil.copystat(song.url, new_url)
#Notify the database about the new URL
old_url = song.url
song.url = new_url
song.save()
#Actually remove the file since all references to the original location have been removed
os.remove(old_url)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(new_folder):
#This is safe to skip - makedirs() is complaining about a folder already existing
pass
else: raise
except IOError as exc:
#shutil error - likely that folders weren't specified correctly raise
raise

View File

@ -1,85 +1,117 @@
"""
The Feed model describes a podcast of anything that can be parsed by :mod:`feedparser`.
Most of the heavy lifting is done via :mod:`feedparser`, we just download the
podcast files.
"""
from django.db import models
import datetime, time
import feedparser
from archive import Archive
"""
The "Feed" model describes a podcast feed using any of RSS, Atom, etc.
Backend handling is processed by 'feedparser', we just download all the podcast files,
control how many are stored, etc. The feed is intended to belong to an archive -
this way the feed is backed up automatically (and we don't have the podcast spewing
files everywhere).
It is important to note - the "max_episodes" field regulates how many episodes are
stored and backed up. A value < 1 indicates storing all episodes.
"""
# What mime types should be downloaded from the podcast XML
_audio_type_mime_types = [
u'audio/mpeg'
]
u'audio/mpeg'
]
_audio_type_mime_types_string = "\n".join(_audio_type_mime_types)
class Feed(models.Model):
class Meta:
app_label = 'archiver'
"""
.. data:: url
String representation of The URL from which the podcast
file should be downloaded.
url = models.URLField()
name = models.CharField(max_length = 64)
max_episodes = models.IntegerField(default = 0) # Default store everything
current_episodes = models.IntegerField(default = 0)
last_episode = models.DateTimeField(default = datetime.datetime(1970, 1, 1))
parent_archive = models.ForeignKey(Archive)
.. data:: name
Human-readable string for this podcast. This is set by the user, not by
the XML podcast name. Is the name for the folder in which this podcast
is stored.
def _get_episode_time(episode):
"""
Get a datetime.datetime object of a podcast episode's published time.
Expects a specific element from feed_object.entries.
"""
t = time.mktime(episode.published_parsed)
return datetime.datetime.fromtimestamp(t)
.. data:: max_episodes
def _calculate_new_episodes(feed_object):
"""
Calculate how many new episodes there are of a podcast (and consequently
how many we need to remove).
"""
num_episodes = 0
Integer for how many fields should be stored at a time. A value of ``0``
(or ``< 0``) indicates that all episodes should be stored. A positive
value controls how many episodes are stored at a time.
#feed_object.entries starts at the most recent
for episode in feed_object.entries:
if _get_episode_time(episode) > last_episode:
num_episodes += 1
.. data:: current_episodes
#Don't set ourselves up to download any more than max_episodes
if num_episodes > max_episodes and max_episodes > 0:
return num_episodes
Integer for how many episodes are currently stored locally. This will
be deprecated, as it can be calculated.
return num_episodes
.. data:: last_episode
DateTime object for the date of the most recent file downloaded. This
should not be modified by anything outside this model.
def _download_podcast(feed_object, num_episodes = -1):
"""
Update this podcast with episodes from the server copy. The feed_object is a reference to a
feedparser object so we don't have to redownload a feed multiple times.
"""
.. data:: parent_archive
num_episodes = _calculate_new_episodes()
Reference to the :class:`Archive` this podcast belongs to. Informs the
feed where it should store its files at.
"""
#feedparser-specific way of building the list
new_episodes = feed_object.entries[:num_episodes]
url = models.URLField()
name = models.CharField(max_length = 64)
max_episodes = models.IntegerField(default = 0) # Default store everything
current_episodes = models.IntegerField(default = 0)
last_episode = models.DateTimeField(default = datetime.datetime(1970, 1, 1))
parent_archive = models.ForeignKey(Archive)
for episode in new_episodes:
episode_audio_links = [link for link in episodes['links']
if link['type'] in _audio_type_mime_types_string]
class Meta:
app_label = 'archiver'
print episode_audio_links
def _get_episode_time(episode):
"""
Get a datetime.datetime object of a podcast episode's published time.
Expects a specific element from feed_object.entries.
"""
t = time.mktime(episode.published_parsed)
return datetime.datetime.fromtimestamp(t)
def sync_podcast(dry_run = False, forbid_delete = False):
"""
Update the podcast with episodes from the server copy. If dry_run, don't actually download episodes,
but show what changes would have been made (implies forbid_delete). If forbid_delete, download all new
episodes, ignoring the max_episodes count.
"""
pass
def _calculate_new_episodes(feed_object):
"""
Calculate how many new episodes there are of a podcast (and consequently
how many we need to remove).
"""
num_episodes = 0
#feed_object.entries starts at the most recent
for episode in feed_object.entries:
if _get_episode_time(episode) > last_episode:
num_episodes += 1
#Don't set ourselves up to download any more than max_episodes
if num_episodes > max_episodes and max_episodes > 0:
return num_episodes
return num_episodes
def _download_podcast(feed_object, num_episodes = -1):
"""
Update this podcast with episodes from the server copy. The feed_object is a reference to a
feedparser object so we don't have to redownload a feed multiple times.
"""
num_episodes = _calculate_new_episodes()
#feedparser-specific way of building the list
new_episodes = feed_object.entries[:num_episodes]
for episode in new_episodes:
episode_audio_links = [link for link in episodes['links']
if link['type'] in _audio_type_mime_types_string]
print episode_audio_links
def sync_podcast(dry_run = False, forbid_delete = False):
"""
Update the podcast with episodes from the server copy.
:param dry_run: Calculate what would have been downloaded or deleted, but do not actually do either.
:param forbid_delete: Run, and only download new episodes. Ignores the :data:`max_episodes` field for this podcast.
"""
pass

View File

@ -1,56 +0,0 @@
from django.db import models
import re, itertools
class IntegerListField(models.TextField):
class Meta:
app_label = 'archiver'
"""
Store a list of integers in a database string.
Format is:
[<int_1>, <int_2>, <int_3>, ... , <int_n>]
"""
description = "Field type for storing lists of integers."
__metaclass__ = models.SubfieldBase
def __init__(self, *args, **kwargs):
super(IntegerListField, self).__init__(*args, **kwargs)
#Convert database to python
def to_python(self, value):
if isinstance(value, list):
return value
#Process a database string
#Validation first
if len(value) <= 0:
return []
if value[0] != '[' or value[-1] != ']':
raise ValidationError("Invalid input to parse a list of integers!")
#Note that any non-digit string is a valid separator
_csv_regex = "[0-9]"
csv_regex = re.compile(_csv_regex)
#Synonymous to:
#string_list = filter(None, csv_regex.findall(value))
string_list = itertools.ifilter(None, csv_regex.findall(value))
value_list = [int(i) for i in string_list]
return value_list
#Convert python to database
def get_prep_value(self, value):
if not isinstance(value, list):
raise ValidationError("Invalid list given to put in database!")
separator_string = ", "
list_elements = separator_string.join(map(str, value))
return "[" + list_elements + "]"

View File

@ -1,3 +1,19 @@
"""
.. module:: archiver.models
Playlist model
Each playlist is a high-level ordering of songs. There really isn't much to a playlist - just its name, and the songs inside it.
However, we need to have a way to guarantee song order, in addition to re-ordering. A ManyToMany field can't do this.
As such, a custom IntegerListField is implemented - it takes a python list of ints, converts it to a text field in the DB,
and then back to a python list. This way, we can guarantee order, and have a song appear multiple times.
The IntegerListField itself uses the ID of each song as the int in a list. For example, a list of:
[1, 3, 5, 17]
Means that the playlist is made up of four songs. The order of the playlist is the song with index 1, 3, 5, and 17.
Additionally, the ManyToMany field is included to make sure we don't use the global Songs manager - it just seems hackish.
"""
from django.db import models
from django.core.exceptions import ObjectDoesNotExist
@ -7,19 +23,6 @@ from listfield import IntegerListField
import re
from warnings import warn
"""
Playlist model
Each playlist is a high-level ordering of songs. There really isn't much to a playlist - just its name, and the songs inside it.
However, we need to have a way to guarantee song order, in addition to re-ordering. A ManyToMany field can't do this.
As such, a custom IntegerListField is implemented - it takes a python list of ints, converts it to a text field in the DB,
and then back to a python list. This way, we can guarantee order, and have a song appear multiple times.
The IntegerListField itself uses the ID of each song as the int in a list. For example, a list of:
[1, 3, 5, 17]
Means that the playlist is made up of four songs. The order of the playlist is the song with index 1, 3, 5, and 17.
Additionally, the ManyToMany field is included to make sure we don't use the global Songs manager - it just seems hackish.
"""
class Playlist(models.Model):
class Meta:
app_label = 'archiver'