Initial Mongo refactoring work

2026-01-07 04:20:14 -05:00 · 2016-08-12 23:55:12 -04:00
parent 7dc9426eb9
commit 684c68bbe9
3 changed files with 70 additions and 33 deletions
--- a/metrik/tasks/base.py
+++ b/metrik/tasks/base.py
@ -0,0 +1,29 @@
+from __future__ import print_function
+from luigi import Task
+
+from metrik.targets.mongo_target import MongoTarget
+
+
+class MongoCreateTask(Task):
+    def __init__(self, *args, **kwargs):
+        super(MongoCreateTask, self).__init__(*args, **kwargs)
+        self.mongo_id = hash(str(self.to_str_params()))
+
+    def get_collection_name(self):
+        raise NotImplementedError('Please set the collection name.')
+
+    def output(self):
+        return MongoTarget(self.get_collection_name(), self.mongo_id)
+
+    def run(self):
+        p_names = self.get_params()
+        p_values = self.get_param_values(p_names, [], self.param_kwargs)
+        print(p_values)
+        data_dict = self.retrieve_data(**dict(p_values))
+        data_dict['_id'] = self.mongo_id
+        self.output().persist(data_dict)
+
+    @staticmethod
+    def retrieve_data(self, *args, **kwargs):
+        raise NotImplementedError('Get me some data!')
+
--- a/metrik/tasks/ice.py
+++ b/metrik/tasks/ice.py
@ -12,7 +12,7 @@ from io import StringIO
 from dateutil.parser import parse

 from metrik.targets.mongo_target import MongoTarget
-
+from metrik.tasks.base import MongoCreateTask

 LiborRate = namedtuple('LiborRate', [
    'publication', 'overnight', 'one_week', 'one_month', 'two_month',
@ -20,18 +20,13 @@ LiborRate = namedtuple('LiborRate', [
 ])


-class LiborRateTask(Task):
+class LiborRateTask(MongoCreateTask):

    date = DateParameter()
    currency = Parameter()

-    def output(self):
-        h = hash(str(self.to_str_params()))
-        return MongoTarget('libor', h)
-
-    def run(self):
-        libor_record = self.retrieve_data(self.date, self.currency)
-        self.output().persist(libor_record._asdict())
+    def get_collection_name(self):
+        return 'libor'

    @staticmethod
    def retrieve_data(date, currency):
@ -46,7 +41,9 @@ class LiborRateTask(Task):
        text = requests.get(url).text
        f = StringIO(text)
        next(f)  # Skip the header
-        record = {'currency': currency}
+
+        # TODO: Messing with globals() is probably a terrible idea, is there
+        # a better way to write the below code?
        for row in csv.DictReader(f, fieldnames=fields):
            mapping = {
                'Overnight': 'overnight',
@ -58,7 +55,8 @@ class LiborRateTask(Task):
                '1 Year': 'one_year'
            }
            if row['usd_ice_libor']:
-                record[mapping[row['tenor']]] = float(row['usd_ice_libor'])
+                globals()[mapping[row['tenor']]] = float(row['usd_ice_libor'])
+
            if row['publication']:
                # Weird things happen with the publication field. For whatever reason,
                # the *time* is correct, but very often the date gets screwed up.
@ -66,6 +64,19 @@ class LiborRateTask(Task):
                # download with `requests`, I see both date (often incorrect) and time.
                dt = parse(row['publication'])
                dt = dt.replace(year=date.year, month=date.month, day=date.day)
-                record['publication'] = dt
+                globals()['publication'] = dt

-        return LiborRate(**record)
+        # Because of the shenanigans I did earlier with locals(), ignore
+        # unresolved references. Probably a better way to do this.
+        # noinspection PyUnresolvedReferences
+        return {
+            'currency': currency,
+            'publication': publication,
+            'overnight': overnight,
+            'one_week': one_week,
+            'one_month': one_month,
+            'two_month': two_month,
+            'three_month': three_month,
+            'six_month': six_month,
+            'one_year': one_year
+        }
--- a/test/tasks/test_ice.py
+++ b/test/tasks/test_ice.py
@ -8,42 +8,39 @@ from metrik.conf import USER_AGENT

 # noinspection PyUnresolvedReferences
 class TestICE(TestCase):
-
    def test_correct_libor_Aug8_2016(self):
        # Validate with:
        # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD
        aug8_libor = LiborRateTask.retrieve_data(datetime(2016, 8, 8), 'USD')

-        assert aug8_libor.overnight == .4189
-        assert aug8_libor.one_week == .4431
-        assert aug8_libor.one_month == .5119
-        assert aug8_libor.two_month == .6268
-        assert aug8_libor.three_month == .8065
-        assert aug8_libor.six_month == 1.1852
-        assert aug8_libor.one_year == 1.5081
+        assert aug8_libor['overnight'] == .4189
+        assert aug8_libor['one_week'] == .4431
+        assert aug8_libor['one_month'] == .5119
+        assert aug8_libor['two_month'] == .6268
+        assert aug8_libor['three_month'] == .8065
+        assert aug8_libor['six_month'] == 1.1852
+        assert aug8_libor['one_year'] == 1.5081

        london_tz = pytz.timezone('Europe/London')
        actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6))
-        assert aug8_libor.publication == actual
-
+        assert aug8_libor['publication'] == actual

    def test_correct_libor_Aug9_2010(self):
        # Validate with:
        # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD
        aug9_libor = LiborRateTask.retrieve_data(datetime(2010, 8, 9), 'USD')

-        assert aug9_libor.overnight == .23656
-        assert aug9_libor.one_week == .27725
-        assert aug9_libor.one_month == .29
-        assert aug9_libor.two_month == .3375
-        assert aug9_libor.three_month == .40438
-        assert aug9_libor.six_month == .6275
-        assert aug9_libor.one_year == .995
-
+        assert aug9_libor['overnight'] == .23656
+        assert aug9_libor['one_week'] == .27725
+        assert aug9_libor['one_month'] == .29
+        assert aug9_libor['two_month'] == .3375
+        assert aug9_libor['three_month'] == .40438
+        assert aug9_libor['six_month'] == .6275
+        assert aug9_libor['one_year'] == .995

        london_tz = pytz.timezone('Europe/London')
        actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12))
-        assert aug9_libor.publication == actual
+        assert aug9_libor['publication'] == actual

    def test_correct_date_reasoning(self):
        # Make sure I document how to handle datetime issues in the future
@ -59,4 +56,4 @@ class TestICE(TestCase):

        # ALWAYS USE timezone.localize()
        assert (london_tz.localize(datetime(2016, 8, 8, 15)) ==
-                ny_tz.localize(datetime(2016, 8, 8, 10)))
+                ny_tz.localize(datetime(2016, 8, 8, 10)))