Initial Mongo refactoring work

master
Bradlee Speice 2016-08-12 23:55:12 -04:00
parent 7dc9426eb9
commit 684c68bbe9
3 changed files with 70 additions and 33 deletions

29
metrik/tasks/base.py Normal file
View File

@ -0,0 +1,29 @@
from __future__ import print_function
from luigi import Task
from metrik.targets.mongo_target import MongoTarget
class MongoCreateTask(Task):
def __init__(self, *args, **kwargs):
super(MongoCreateTask, self).__init__(*args, **kwargs)
self.mongo_id = hash(str(self.to_str_params()))
def get_collection_name(self):
raise NotImplementedError('Please set the collection name.')
def output(self):
return MongoTarget(self.get_collection_name(), self.mongo_id)
def run(self):
p_names = self.get_params()
p_values = self.get_param_values(p_names, [], self.param_kwargs)
print(p_values)
data_dict = self.retrieve_data(**dict(p_values))
data_dict['_id'] = self.mongo_id
self.output().persist(data_dict)
@staticmethod
def retrieve_data(self, *args, **kwargs):
raise NotImplementedError('Get me some data!')

View File

@ -12,7 +12,7 @@ from io import StringIO
from dateutil.parser import parse
from metrik.targets.mongo_target import MongoTarget
from metrik.tasks.base import MongoCreateTask
LiborRate = namedtuple('LiborRate', [
'publication', 'overnight', 'one_week', 'one_month', 'two_month',
@ -20,18 +20,13 @@ LiborRate = namedtuple('LiborRate', [
])
class LiborRateTask(Task):
class LiborRateTask(MongoCreateTask):
date = DateParameter()
currency = Parameter()
def output(self):
h = hash(str(self.to_str_params()))
return MongoTarget('libor', h)
def run(self):
libor_record = self.retrieve_data(self.date, self.currency)
self.output().persist(libor_record._asdict())
def get_collection_name(self):
return 'libor'
@staticmethod
def retrieve_data(date, currency):
@ -46,7 +41,9 @@ class LiborRateTask(Task):
text = requests.get(url).text
f = StringIO(text)
next(f) # Skip the header
record = {'currency': currency}
# TODO: Messing with globals() is probably a terrible idea, is there
# a better way to write the below code?
for row in csv.DictReader(f, fieldnames=fields):
mapping = {
'Overnight': 'overnight',
@ -58,7 +55,8 @@ class LiborRateTask(Task):
'1 Year': 'one_year'
}
if row['usd_ice_libor']:
record[mapping[row['tenor']]] = float(row['usd_ice_libor'])
globals()[mapping[row['tenor']]] = float(row['usd_ice_libor'])
if row['publication']:
# Weird things happen with the publication field. For whatever reason,
# the *time* is correct, but very often the date gets screwed up.
@ -66,6 +64,19 @@ class LiborRateTask(Task):
# download with `requests`, I see both date (often incorrect) and time.
dt = parse(row['publication'])
dt = dt.replace(year=date.year, month=date.month, day=date.day)
record['publication'] = dt
globals()['publication'] = dt
return LiborRate(**record)
# Because of the shenanigans I did earlier with locals(), ignore
# unresolved references. Probably a better way to do this.
# noinspection PyUnresolvedReferences
return {
'currency': currency,
'publication': publication,
'overnight': overnight,
'one_week': one_week,
'one_month': one_month,
'two_month': two_month,
'three_month': three_month,
'six_month': six_month,
'one_year': one_year
}

View File

@ -8,42 +8,39 @@ from metrik.conf import USER_AGENT
# noinspection PyUnresolvedReferences
class TestICE(TestCase):
def test_correct_libor_Aug8_2016(self):
# Validate with:
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD
aug8_libor = LiborRateTask.retrieve_data(datetime(2016, 8, 8), 'USD')
assert aug8_libor.overnight == .4189
assert aug8_libor.one_week == .4431
assert aug8_libor.one_month == .5119
assert aug8_libor.two_month == .6268
assert aug8_libor.three_month == .8065
assert aug8_libor.six_month == 1.1852
assert aug8_libor.one_year == 1.5081
assert aug8_libor['overnight'] == .4189
assert aug8_libor['one_week'] == .4431
assert aug8_libor['one_month'] == .5119
assert aug8_libor['two_month'] == .6268
assert aug8_libor['three_month'] == .8065
assert aug8_libor['six_month'] == 1.1852
assert aug8_libor['one_year'] == 1.5081
london_tz = pytz.timezone('Europe/London')
actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6))
assert aug8_libor.publication == actual
assert aug8_libor['publication'] == actual
def test_correct_libor_Aug9_2010(self):
# Validate with:
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD
aug9_libor = LiborRateTask.retrieve_data(datetime(2010, 8, 9), 'USD')
assert aug9_libor.overnight == .23656
assert aug9_libor.one_week == .27725
assert aug9_libor.one_month == .29
assert aug9_libor.two_month == .3375
assert aug9_libor.three_month == .40438
assert aug9_libor.six_month == .6275
assert aug9_libor.one_year == .995
assert aug9_libor['overnight'] == .23656
assert aug9_libor['one_week'] == .27725
assert aug9_libor['one_month'] == .29
assert aug9_libor['two_month'] == .3375
assert aug9_libor['three_month'] == .40438
assert aug9_libor['six_month'] == .6275
assert aug9_libor['one_year'] == .995
london_tz = pytz.timezone('Europe/London')
actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12))
assert aug9_libor.publication == actual
assert aug9_libor['publication'] == actual
def test_correct_date_reasoning(self):
# Make sure I document how to handle datetime issues in the future
@ -59,4 +56,4 @@ class TestICE(TestCase):
# ALWAYS USE timezone.localize()
assert (london_tz.localize(datetime(2016, 8, 8, 15)) ==
ny_tz.localize(datetime(2016, 8, 8, 10)))
ny_tz.localize(datetime(2016, 8, 8, 10)))