From 684c68bbe92aafd04add53d7d0c999d1f353c05c Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Fri, 12 Aug 2016 23:55:12 -0400 Subject: [PATCH] Initial Mongo refactoring work --- metrik/tasks/base.py | 29 +++++++++++++++++++++++++++++ metrik/tasks/ice.py | 37 ++++++++++++++++++++++++------------- test/tasks/test_ice.py | 37 +++++++++++++++++-------------------- 3 files changed, 70 insertions(+), 33 deletions(-) create mode 100644 metrik/tasks/base.py diff --git a/metrik/tasks/base.py b/metrik/tasks/base.py new file mode 100644 index 0000000..7bed2cb --- /dev/null +++ b/metrik/tasks/base.py @@ -0,0 +1,29 @@ +from __future__ import print_function +from luigi import Task + +from metrik.targets.mongo_target import MongoTarget + + +class MongoCreateTask(Task): + def __init__(self, *args, **kwargs): + super(MongoCreateTask, self).__init__(*args, **kwargs) + self.mongo_id = hash(str(self.to_str_params())) + + def get_collection_name(self): + raise NotImplementedError('Please set the collection name.') + + def output(self): + return MongoTarget(self.get_collection_name(), self.mongo_id) + + def run(self): + p_names = self.get_params() + p_values = self.get_param_values(p_names, [], self.param_kwargs) + print(p_values) + data_dict = self.retrieve_data(**dict(p_values)) + data_dict['_id'] = self.mongo_id + self.output().persist(data_dict) + + @staticmethod + def retrieve_data(self, *args, **kwargs): + raise NotImplementedError('Get me some data!') + diff --git a/metrik/tasks/ice.py b/metrik/tasks/ice.py index e80f967..cded0b9 100644 --- a/metrik/tasks/ice.py +++ b/metrik/tasks/ice.py @@ -12,7 +12,7 @@ from io import StringIO from dateutil.parser import parse from metrik.targets.mongo_target import MongoTarget - +from metrik.tasks.base import MongoCreateTask LiborRate = namedtuple('LiborRate', [ 'publication', 'overnight', 'one_week', 'one_month', 'two_month', @@ -20,18 +20,13 @@ LiborRate = namedtuple('LiborRate', [ ]) -class LiborRateTask(Task): +class LiborRateTask(MongoCreateTask): date = DateParameter() currency = Parameter() - def output(self): - h = hash(str(self.to_str_params())) - return MongoTarget('libor', h) - - def run(self): - libor_record = self.retrieve_data(self.date, self.currency) - self.output().persist(libor_record._asdict()) + def get_collection_name(self): + return 'libor' @staticmethod def retrieve_data(date, currency): @@ -46,7 +41,9 @@ class LiborRateTask(Task): text = requests.get(url).text f = StringIO(text) next(f) # Skip the header - record = {'currency': currency} + + # TODO: Messing with globals() is probably a terrible idea, is there + # a better way to write the below code? for row in csv.DictReader(f, fieldnames=fields): mapping = { 'Overnight': 'overnight', @@ -58,7 +55,8 @@ class LiborRateTask(Task): '1 Year': 'one_year' } if row['usd_ice_libor']: - record[mapping[row['tenor']]] = float(row['usd_ice_libor']) + globals()[mapping[row['tenor']]] = float(row['usd_ice_libor']) + if row['publication']: # Weird things happen with the publication field. For whatever reason, # the *time* is correct, but very often the date gets screwed up. @@ -66,6 +64,19 @@ class LiborRateTask(Task): # download with `requests`, I see both date (often incorrect) and time. dt = parse(row['publication']) dt = dt.replace(year=date.year, month=date.month, day=date.day) - record['publication'] = dt + globals()['publication'] = dt - return LiborRate(**record) + # Because of the shenanigans I did earlier with locals(), ignore + # unresolved references. Probably a better way to do this. + # noinspection PyUnresolvedReferences + return { + 'currency': currency, + 'publication': publication, + 'overnight': overnight, + 'one_week': one_week, + 'one_month': one_month, + 'two_month': two_month, + 'three_month': three_month, + 'six_month': six_month, + 'one_year': one_year + } diff --git a/test/tasks/test_ice.py b/test/tasks/test_ice.py index 35fb683..7fde444 100644 --- a/test/tasks/test_ice.py +++ b/test/tasks/test_ice.py @@ -8,42 +8,39 @@ from metrik.conf import USER_AGENT # noinspection PyUnresolvedReferences class TestICE(TestCase): - def test_correct_libor_Aug8_2016(self): # Validate with: # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD aug8_libor = LiborRateTask.retrieve_data(datetime(2016, 8, 8), 'USD') - assert aug8_libor.overnight == .4189 - assert aug8_libor.one_week == .4431 - assert aug8_libor.one_month == .5119 - assert aug8_libor.two_month == .6268 - assert aug8_libor.three_month == .8065 - assert aug8_libor.six_month == 1.1852 - assert aug8_libor.one_year == 1.5081 + assert aug8_libor['overnight'] == .4189 + assert aug8_libor['one_week'] == .4431 + assert aug8_libor['one_month'] == .5119 + assert aug8_libor['two_month'] == .6268 + assert aug8_libor['three_month'] == .8065 + assert aug8_libor['six_month'] == 1.1852 + assert aug8_libor['one_year'] == 1.5081 london_tz = pytz.timezone('Europe/London') actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6)) - assert aug8_libor.publication == actual - + assert aug8_libor['publication'] == actual def test_correct_libor_Aug9_2010(self): # Validate with: # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD aug9_libor = LiborRateTask.retrieve_data(datetime(2010, 8, 9), 'USD') - assert aug9_libor.overnight == .23656 - assert aug9_libor.one_week == .27725 - assert aug9_libor.one_month == .29 - assert aug9_libor.two_month == .3375 - assert aug9_libor.three_month == .40438 - assert aug9_libor.six_month == .6275 - assert aug9_libor.one_year == .995 - + assert aug9_libor['overnight'] == .23656 + assert aug9_libor['one_week'] == .27725 + assert aug9_libor['one_month'] == .29 + assert aug9_libor['two_month'] == .3375 + assert aug9_libor['three_month'] == .40438 + assert aug9_libor['six_month'] == .6275 + assert aug9_libor['one_year'] == .995 london_tz = pytz.timezone('Europe/London') actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12)) - assert aug9_libor.publication == actual + assert aug9_libor['publication'] == actual def test_correct_date_reasoning(self): # Make sure I document how to handle datetime issues in the future @@ -59,4 +56,4 @@ class TestICE(TestCase): # ALWAYS USE timezone.localize() assert (london_tz.localize(datetime(2016, 8, 8, 15)) == - ny_tz.localize(datetime(2016, 8, 8, 10))) \ No newline at end of file + ny_tz.localize(datetime(2016, 8, 8, 10)))