1
0
mirror of https://github.com/bspeice/Melodia synced 2025-05-13 13:11:29 -04:00

Add documentation for the Archive and Feed models

This commit is contained in:
Bradlee Speice 2013-05-09 22:29:12 -04:00
parent fb74ceaa1d
commit a515a4b8d3
6 changed files with 316 additions and 281 deletions

View File

@ -1,5 +1,7 @@
''' '''
.. py:currentmodule:: .. moduleauthor:: Bradlee Speice <bspeice.nc@gmail.com>
:synopsis: Archiving back-end for the Melodia system. The ``archiver`` application is responsible for all of the backend operations in Melodia. Its purpose is to provide an easy, Pythonic API to any other applications that want to use it. Some features include:
* Multiple archive location support
* Automatic backup of archives using rsync
''' '''

View File

@ -1,3 +1,10 @@
'''
.. currentmodule:: archiver.models
I'm trying to link to :class:`~archiver.models.archive.Archive`!
'''
# Create your models here. # Create your models here.
from archive import Archive from archive import Archive
from song import Song from song import Song

View File

@ -1,188 +1,235 @@
from django.db import models """
.. module:: archiver.models.archive
This is the Archive model for the backend of Melodia. It's functionality is to
provide a grouping of songs based on where they are located in the filesystem.
It controls the high-level functionality of managing multiple archives
of music - basically, multiple filesystem folders holding your music.
""" """
This is the archive model for the archiving backend of Melodia.
It's purpose is to control the high-level functionality of managing from django.db import models
multiple archives of music. It is different from a playlist both conceptually from django.core.exceptions import ObjectDoesNotExist
and practically - an archive describes a group of files, while a playlist
describes a group of songs. import datetime
In this way, you back up archives of music - you don't back up the songs in a import re, os
playlist. Additionally, you may want to re-organize your music to use a from itertools import ifilter
cleaner directory structure - a playlist doesn't care about this.
""" from Melodia.melodia_settings import SUPPORTED_AUDIO_EXTENSIONS
from Melodia.melodia_settings import HASH_FUNCTION as hash
class Archive (models.Model): class Archive (models.Model):
class Meta: """
app_label = 'archiver' .. data:: name
""" String human-readable name of this archive -- ex. ``Steve's Music``
The archive model itself, and all functions used to interact with it.
The archive is built up from a grouping of songs, and the functions
that are used to interact with many songs at a single time. The archive
for example allows you to re-organize a specific set of music files into
a cleaner directory structure.
The archive is given a folder to use as its root directory - it finds all
music files under there, and takes control of them from there.
"""
import datetime .. data:: root_folder
name = models.CharField(max_length = 64) String containing the root folder of this archive. Should not be
modified once the archive has been created.
#Note that we're not using FilePathField since this is actually a folder .. data:: backup_location
root_folder = models.CharField(max_length = 255)
#We've removed the reference to "songs" - instead define it as a ForeignKey, String for the rsync-readable location that this archive should
#and do lookups via song_set be backed up to. Can be modified if you need to change the location.
#Backup settings .. data:: backup_frequency
backup_location = models.CharField(max_length = 255, default = None, null = True)
backup_frequency = models.IntegerField(default = 604800) #1 week in seconds
last_backup = models.DateTimeField(default = datetime.datetime.now) #Note that this by default will be the time the archive was instantiated
def _scan_filesystem(self): Integer time in minutes that should be between backups of this archive.
"Scan the archive's root filesystem and add any new songs without adding metadata, delete songs that exist no more" This should not be blank, if you want to disable backups, set the
#This method is implemented since the other scan methods all need to use the same code location to being blank.
#DRY FTW
import re, os, itertools
from django.core.exceptions import ObjectDoesNotExist
from Melodia.melodia_settings import SUPPORTED_AUDIO_EXTENSIONS
from Melodia.melodia_settings import HASH_FUNCTION as hash
_regex = '|'.join(( '.*' + ext + '$' for ext in SUPPORTED_AUDIO_EXTENSIONS)) .. data:: last_backup
regex = re.compile(_regex, re.IGNORECASE)
#It's hackish, but far fewer transactions to delete everything first, and add it all back. DateTime object that records when the last **successful** backup was run.
#If we get interrupted, just re-run it. Don't touch this.
song_set.all().delete() """
#Add new songs name = models.CharField(max_length = 64)
for dirname, dirnames, filenames in os.walk(self.root_folder):
#For each filename that is supported
for filename in itertools.ifilter(lambda filename: re.match(regex, filename), filenames):
rel_url = os.path.join(dirname, filename)
full_url = os.path.abspath(rel_url)
new_song = Song(url = full_url)
new_song.save()
song_set.add(new_song)
def _update_song_metadata(self, use_echonest = False, progress_callback = lambda x, y: None): #Note that we're not using FilePathField since this is actually a folder
"""Scan every song in this archive (database only) and make sure all songs are correct root_folder = models.CharField(max_length = 512)
The progress_callback function is called with the current song being operated on first, and the total songs second."""
#This method operates only on the songs that are in the database - if you need to make
#sure that new songs are added, use the _scan_filesystem() method in addition
total_songs = song_set.count()
for index, song in enumerate(song_set.all()): #We've removed the reference to "songs" - instead define it as a ForeignKey,
song.populate_metadata(use_echonest = use_echonest) #and do lookups via song_set
song.save()
progress_callback(index + 1, total_songs)
def _needs_backup(self): #Backup settings
"Check if the current archive is due for a backup" backup_location = models.CharField(max_length = 255, default = None, null = True)
import datetime backup_frequency = models.IntegerField(default = 10800) #1 week in minutes
last_backup = models.DateTimeField(default = datetime.datetime.now) #Note that this by default will be the time the archive was instantiated
prev_backup_time = self.last_backup class Meta:
current_time = datetime.datetime.now() app_label = 'archiver'
delta = current_time - prev_backup_time def _scan_filesystem(self):
if delta > datetime.timedelta(seconds = self.backup_frequency): """
return True Scan the archive's root filesystem and add any new songs without adding
else: metadata, delete songs that exist no more.
return False .. todo::
def quick_scan(self): This should be fixed so that we don't drop all songs and re-add
"Scan this archive's root folder and make sure that all songs are in the database." them. That's just terrible design.
#This is a quick scan - only validate whether or not songs should exist in the database """
#This method is implemented since the other scan methods all need to
#use the same code. DRY FTW
self._scan_filesystem() _supported_extns_regex = '|'.join(( '.*' + ext + '$' for ext
in SUPPORTED_AUDIO_EXTENSIONS))
regex = re.compile(_supported_extns_regex, re.IGNORECASE)
def scan(self): #It's hackish, but far fewer transactions to delete everything first,
"Scan this archive's root folder and make sure any local metadata are correct." #and add it all back. If we get interrupted, just re-run it.
#This is a longer scan - validate whether songs should exist, and use local data to update song_set.all().delete()
#the database
self._scan_filesystem() #For each filename that is supported
self._update_song_metadata() for filename in ifilter(lambda filename: re.match(regex, filename), filenames):
rel_url = os.path.join(dirname, filename)
full_url = os.path.abspath(rel_url)
new_song = Song(url = full_url)
new_song.save()
song_set.add(new_song)
def deep_scan(self): def _update_song_metadata(self, progress_callback = lambda x, y: None):
"Scan this archive's root folder and make sure that all songs are in the database, and use EchoNest to update metadata as necessary" """
#This is a very long scan - validate whether songs should exist, and use Echonest to make sure Scan every song in this archive (database only) and make sure all
#that metadata is as accurate as possible. songs are correct. The progress_callback function is called with the
self._scan_filesystem() current song being operated on first, and the total songs second.
self._update_song_metadata(use_echonest = True)
:param progess_callback: Function called to give progress. First
argument is an integer for the song currently in progress, second
argument is the total number of songs to be operated on.
"""
total_songs = song_set.count()
def run_backup(self, force_backup = False): for index, song in enumerate(song_set.all()):
"Backup the current archive" song.populate_metadata()
if force_backup or self._needs_backup(): song.save()
import subprocess progress_callback(index + 1, total_songs)
subprocess.call(['rsync', '-av', self.root_folder, self.backup_location])
def reorganize(self, format_string, progress_function = lambda w, x, y, z: None, dry_run = False): def _needs_backup(self):
"""Reorganize a music archive using a specified format string. "Check if the current archive is due for a backup"
Recognized escape characters: import datetime
%a - Artist Name %A - Album Name
%d - Disc Number %e - Number of discs
%f - Current Filename (with extension) %g - Current Filename (no extension)
%n - Track Number %o - Number of tracks on disc
%y - Album year
Note that all organization takes place relative to the archive's root folder. prev_backup_time = self.last_backup
The progress_function is called with the current song number as its first argument, total songs as its second, current_time = datetime.datetime.now()
current song URL as the third argument, and new URL as the fourth.
"""
import os, shutil, errno
total_songs = song_set.count() delta = current_time - prev_backup_time
if delta > datetime.timedelta(seconds = self.backup_frequency):
return True
else:
return False
for index, song in enumerate(song_set.all()): def quick_scan(self):
_current_filename = os.path.basename(song.url) """
_current_filename_no_extension = os.path.splitext(_current_filename)[0] Scan this archive's root folder, add or remove songs from the DB
as necessary.
"""
self._scan_filesystem()
_release_year = song.release_date.year def scan(self):
"""
Like :func:`quick_scan` but makes sure all metadata is current.
"""
#This is a longer scan - validate whether songs should exist, and use local data to update
#the database
new_location = format_string.replace("%a", song.artist)\ self._scan_filesystem()
.replace("%A", song.album)\ self._update_song_metadata()
.replace("%d", str(song.disc_number))\
.replace("%e", str(song.disc_total))\
.replace("%f", _current_filename)\
.replace("%g", _current_filename_no_extension)\
.replace("%n", str(song.track_number))\
.replace("%o", str(song.track_total))\
.replace("%y", str(_release_year))
new_url = os.path.join(self.root_folder, new_location) def run_backup(self, force_backup = False):
"""
Backup the current archive
progress_function(index + 1, total_songs, song.url, new_url) :param force_backup: Boolean value, if `True` will ensure backup runs.
"""
if force_backup or self._needs_backup():
import subprocess
subprocess.call(['rsync', '-av', self.root_folder, self.backup_location])
if not dry_run: def reorganize(self, format_string,
new_folder = os.path.dirname(new_url) progress_function = lambda w, x, y, z: None,
try: dry_run = False):
#`mkdir -p` functionality """
if not os.path.isdir(new_folder): Reorganize a music archive using a given `format_string`. Recognized
os.makedirs(new_folder) escape characters are below:
#Safely copy the file - don't 'move' it, but do a full 'copy' 'rm' .. table::
#This way if the process is ever interrupted, we have an unaltered copy
#of the file.
shutil.copyfile(song.url, new_url)
shutil.copystat(song.url, new_url)
#Notify the database about the new URL ========== ==============
old_url = song.url Character: Replaced with:
song.url = new_url ========== ==============
song.save() %a Artist Name
%A Album Name
%d Disc Number
%e Number of discs
%f Current Filename (with extension)
%g Current Filename (no extension)
%n Track Number
%o Number of tracks on disc
%y Album year
========== ==============
#Actually remove the file since all references to the original location have been removed All re-organization takes place relative to the archive's
os.remove(old_url) :data:`root_folder`.
except OSError as exc: :param format_string: String describing how each song should be re-organized
if exc.errno == errno.EEXIST and os.path.isdir(new_folder): :param progress_function: Optional function to get current progress - see notes below.
#This is safe to skip - makedirs() is complaining about a folder already existing :param dry_run: Boolean, if `True` will do everything except move files
pass
else: raise
except IOError as exc: The progress_function is called with the current song number as its first argument, total songs as its second,
#shutil error - likely that folders weren't specified correctly current song URL as the third argument, and new URL as the fourth.
"""
import os, shutil, errno
total_songs = song_set.count()
for index, song in enumerate(song_set.all()):
_current_filename = os.path.basename(song.url)
_current_filename_no_extension = os.path.splitext(_current_filename)[0]
_release_year = song.release_date.year
new_location = format_string.replace("%a", song.artist)\
.replace("%A", song.album)\
.replace("%d", str(song.disc_number))\
.replace("%e", str(song.disc_total))\
.replace("%f", _current_filename)\
.replace("%g", _current_filename_no_extension)\
.replace("%n", str(song.track_number))\
.replace("%o", str(song.track_total))\
.replace("%y", str(_release_year))
new_url = os.path.join(self.root_folder, new_location)
progress_function(index + 1, total_songs, song.url, new_url)
if not dry_run:
new_folder = os.path.dirname(new_url)
try:
#`mkdir -p` functionality
if not os.path.isdir(new_folder):
os.makedirs(new_folder)
#Safely copy the file - don't 'move' it, but do a full 'copy' 'rm'
#This way if the process is ever interrupted, we have an unaltered copy
#of the file.
shutil.copyfile(song.url, new_url)
shutil.copystat(song.url, new_url)
#Notify the database about the new URL
old_url = song.url
song.url = new_url
song.save()
#Actually remove the file since all references to the original location have been removed
os.remove(old_url)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(new_folder):
#This is safe to skip - makedirs() is complaining about a folder already existing
pass
else: raise
except IOError as exc:
#shutil error - likely that folders weren't specified correctly raise
raise raise

View File

@ -1,85 +1,117 @@
"""
The Feed model describes a podcast of anything that can be parsed by :mod:`feedparser`.
Most of the heavy lifting is done via :mod:`feedparser`, we just download the
podcast files.
"""
from django.db import models from django.db import models
import datetime, time import datetime, time
import feedparser import feedparser
from archive import Archive from archive import Archive
# What mime types should be downloaded from the podcast XML
"""
The "Feed" model describes a podcast feed using any of RSS, Atom, etc.
Backend handling is processed by 'feedparser', we just download all the podcast files,
control how many are stored, etc. The feed is intended to belong to an archive -
this way the feed is backed up automatically (and we don't have the podcast spewing
files everywhere).
It is important to note - the "max_episodes" field regulates how many episodes are
stored and backed up. A value < 1 indicates storing all episodes.
"""
_audio_type_mime_types = [ _audio_type_mime_types = [
u'audio/mpeg' u'audio/mpeg'
] ]
_audio_type_mime_types_string = "\n".join(_audio_type_mime_types) _audio_type_mime_types_string = "\n".join(_audio_type_mime_types)
class Feed(models.Model): class Feed(models.Model):
class Meta: """
app_label = 'archiver' .. data:: url
url = models.URLField() String representation of The URL from which the podcast
name = models.CharField(max_length = 64) file should be downloaded.
max_episodes = models.IntegerField(default = 0) # Default store everything
current_episodes = models.IntegerField(default = 0)
last_episode = models.DateTimeField(default = datetime.datetime(1970, 1, 1))
parent_archive = models.ForeignKey(Archive)
def _get_episode_time(episode): .. data:: name
"""
Get a datetime.datetime object of a podcast episode's published time.
Expects a specific element from feed_object.entries.
"""
t = time.mktime(episode.published_parsed)
return datetime.datetime.fromtimestamp(t)
def _calculate_new_episodes(feed_object): Human-readable string for this podcast. This is set by the user, not by
""" the XML podcast name. Is the name for the folder in which this podcast
Calculate how many new episodes there are of a podcast (and consequently is stored.
how many we need to remove).
"""
num_episodes = 0
#feed_object.entries starts at the most recent .. data:: max_episodes
for episode in feed_object.entries:
if _get_episode_time(episode) > last_episode:
num_episodes += 1
#Don't set ourselves up to download any more than max_episodes Integer for how many fields should be stored at a time. A value of ``0``
if num_episodes > max_episodes and max_episodes > 0: (or ``< 0``) indicates that all episodes should be stored. A positive
return num_episodes value controls how many episodes are stored at a time.
return num_episodes .. data:: current_episodes
def _download_podcast(feed_object, num_episodes = -1): Integer for how many episodes are currently stored locally. This will
""" be deprecated, as it can be calculated.
Update this podcast with episodes from the server copy. The feed_object is a reference to a
feedparser object so we don't have to redownload a feed multiple times.
"""
num_episodes = _calculate_new_episodes() .. data:: last_episode
#feedparser-specific way of building the list DateTime object for the date of the most recent file downloaded. This
new_episodes = feed_object.entries[:num_episodes] should not be modified by anything outside this model.
for episode in new_episodes: .. data:: parent_archive
episode_audio_links = [link for link in episodes['links']
if link['type'] in _audio_type_mime_types_string]
print episode_audio_links Reference to the :class:`Archive` this podcast belongs to. Informs the
feed where it should store its files at.
"""
url = models.URLField()
name = models.CharField(max_length = 64)
max_episodes = models.IntegerField(default = 0) # Default store everything
current_episodes = models.IntegerField(default = 0)
last_episode = models.DateTimeField(default = datetime.datetime(1970, 1, 1))
parent_archive = models.ForeignKey(Archive)
class Meta:
app_label = 'archiver'
def _get_episode_time(episode):
"""
Get a datetime.datetime object of a podcast episode's published time.
Expects a specific element from feed_object.entries.
"""
t = time.mktime(episode.published_parsed)
return datetime.datetime.fromtimestamp(t)
def _calculate_new_episodes(feed_object):
"""
Calculate how many new episodes there are of a podcast (and consequently
how many we need to remove).
"""
num_episodes = 0
#feed_object.entries starts at the most recent
for episode in feed_object.entries:
if _get_episode_time(episode) > last_episode:
num_episodes += 1
#Don't set ourselves up to download any more than max_episodes
if num_episodes > max_episodes and max_episodes > 0:
return num_episodes
return num_episodes
def _download_podcast(feed_object, num_episodes = -1):
"""
Update this podcast with episodes from the server copy. The feed_object is a reference to a
feedparser object so we don't have to redownload a feed multiple times.
"""
num_episodes = _calculate_new_episodes()
#feedparser-specific way of building the list
new_episodes = feed_object.entries[:num_episodes]
for episode in new_episodes:
episode_audio_links = [link for link in episodes['links']
if link['type'] in _audio_type_mime_types_string]
print episode_audio_links
def sync_podcast(dry_run = False, forbid_delete = False): def sync_podcast(dry_run = False, forbid_delete = False):
""" """
Update the podcast with episodes from the server copy. If dry_run, don't actually download episodes, Update the podcast with episodes from the server copy.
but show what changes would have been made (implies forbid_delete). If forbid_delete, download all new
episodes, ignoring the max_episodes count. :param dry_run: Calculate what would have been downloaded or deleted, but do not actually do either.
""" :param forbid_delete: Run, and only download new episodes. Ignores the :data:`max_episodes` field for this podcast.
pass
"""
pass

View File

@ -1,56 +0,0 @@
from django.db import models
import re, itertools
class IntegerListField(models.TextField):
class Meta:
app_label = 'archiver'
"""
Store a list of integers in a database string.
Format is:
[<int_1>, <int_2>, <int_3>, ... , <int_n>]
"""
description = "Field type for storing lists of integers."
__metaclass__ = models.SubfieldBase
def __init__(self, *args, **kwargs):
super(IntegerListField, self).__init__(*args, **kwargs)
#Convert database to python
def to_python(self, value):
if isinstance(value, list):
return value
#Process a database string
#Validation first
if len(value) <= 0:
return []
if value[0] != '[' or value[-1] != ']':
raise ValidationError("Invalid input to parse a list of integers!")
#Note that any non-digit string is a valid separator
_csv_regex = "[0-9]"
csv_regex = re.compile(_csv_regex)
#Synonymous to:
#string_list = filter(None, csv_regex.findall(value))
string_list = itertools.ifilter(None, csv_regex.findall(value))
value_list = [int(i) for i in string_list]
return value_list
#Convert python to database
def get_prep_value(self, value):
if not isinstance(value, list):
raise ValidationError("Invalid list given to put in database!")
separator_string = ", "
list_elements = separator_string.join(map(str, value))
return "[" + list_elements + "]"

View File

@ -1,3 +1,19 @@
"""
.. module:: archiver.models
Playlist model
Each playlist is a high-level ordering of songs. There really isn't much to a playlist - just its name, and the songs inside it.
However, we need to have a way to guarantee song order, in addition to re-ordering. A ManyToMany field can't do this.
As such, a custom IntegerListField is implemented - it takes a python list of ints, converts it to a text field in the DB,
and then back to a python list. This way, we can guarantee order, and have a song appear multiple times.
The IntegerListField itself uses the ID of each song as the int in a list. For example, a list of:
[1, 3, 5, 17]
Means that the playlist is made up of four songs. The order of the playlist is the song with index 1, 3, 5, and 17.
Additionally, the ManyToMany field is included to make sure we don't use the global Songs manager - it just seems hackish.
"""
from django.db import models from django.db import models
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
@ -7,19 +23,6 @@ from listfield import IntegerListField
import re import re
from warnings import warn from warnings import warn
"""
Playlist model
Each playlist is a high-level ordering of songs. There really isn't much to a playlist - just its name, and the songs inside it.
However, we need to have a way to guarantee song order, in addition to re-ordering. A ManyToMany field can't do this.
As such, a custom IntegerListField is implemented - it takes a python list of ints, converts it to a text field in the DB,
and then back to a python list. This way, we can guarantee order, and have a song appear multiple times.
The IntegerListField itself uses the ID of each song as the int in a list. For example, a list of:
[1, 3, 5, 17]
Means that the playlist is made up of four songs. The order of the playlist is the song with index 1, 3, 5, and 17.
Additionally, the ManyToMany field is included to make sure we don't use the global Songs manager - it just seems hackish.
"""
class Playlist(models.Model): class Playlist(models.Model):
class Meta: class Meta:
app_label = 'archiver' app_label = 'archiver'