From 14e0b404dd39773321904d941de0739455a803da Mon Sep 17 00:00:00 2001
From: Bradlee Speice <bradlee.speice@gmail.com>
Date: Fri, 12 Aug 2016 19:35:22 -0400
Subject: [PATCH] Wasn't pandas fault, dates are weird ICE site is doing
 something strange...

---
 metrik/tasks/ice.py    | 79 ++++++++++++++++++++++--------------------
 test/tasks/test_ice.py | 41 ++++++++++++----------
 2 files changed, 64 insertions(+), 56 deletions(-)

diff --git a/metrik/tasks/ice.py b/metrik/tasks/ice.py
index 2393ea0..c3b2369 100644
--- a/metrik/tasks/ice.py
+++ b/metrik/tasks/ice.py
@@ -1,51 +1,56 @@
 from luigi.task import Task
 # noinspection PyUnresolvedReferences
 from six.moves.urllib.parse import quote_plus
-import pandas as pd
 import pytz
+from collections import namedtuple
+import requests
+import datetime
+import csv
+from io import StringIO
 from dateutil.parser import parse
-import logging
 
 
-class USDLibor(Task):
+LiborRate = namedtuple('LiborRate', [
+    'publication', 'overnight', 'one_week', 'one_month', 'two_month',
+    'three_month', 'six_month', 'one_year', 'currency'
+])
+
+
+class LiborRateTask(Task):
 
     @staticmethod
-    def retrieve_data(date):
+    def retrieve_data(date, currency):
         url = ('https://www.theice.com/marketdata/reports/icebenchmarkadmin/'
                'ICELiborHistoricalRates.shtml?excelExport='
-               '&criteria.reportDate={}&criteria.currencyCode=USD').format(
-            quote_plus(date.strftime('%m/%d/%y'))
+               '&criteria.reportDate={}&criteria.currencyCode={}').format(
+            quote_plus(date.strftime('%m/%d/%y')),
+            currency
         )
 
-        def parse_london(dt_str):
-            # I'm getting inconsistent behavior in how Pandas parses the CSV
-            # file for dates and times. On Travis, it doesn't look like the
-            # content is being modified. On my computer, Pandas is spitting
-            # back a localized time. So, after parsing, if we have a timezone-
-            # enabled datetime, switch to Europe/London, and if not, add the
-            # Europe/London info to it
-            london_tz = pytz.timezone('Europe/London')
-            # Note that parse() implicitly adds timezone information because
-            # of how pandas gave us the value
-            dt = parse(dt_str).replace(year=date.year,
-                                       month=date.month,
-                                       day=date.day)
-            try:
-                return dt.astimezone(london_tz)
-            except ValueError:
-                return london_tz.localize(dt)
+        fields = ['tenor', 'publication', 'usd_ice_libor']
+        text = requests.get(url).text
+        f = StringIO(text)
+        next(f)  # Skip the header
+        record = {'currency': currency}
+        for row in csv.DictReader(f, fieldnames=fields):
+            mapping = {
+                'Overnight': 'overnight',
+                '1 Week': 'one_week',
+                '1 Month': 'one_month',
+                '2 Month': 'two_month',
+                '3 Month': 'three_month',
+                '6 Month': 'six_month',
+                '1 Year': 'one_year'
+            }
+            if row['usd_ice_libor']:
+                record[mapping[row['tenor']]] = float(row['usd_ice_libor'])
+            if row['publication']:
+                # Weird things happen with the publication field. For whatever reason,
+                # the *time* is correct, but very often the date gets screwed up.
+                # When I download the CSV with Firefox I only see the times - when I
+                # download with `requests`, I see both date (often incorrect) and time.
+                dt = parse(row['publication'])
+                dt = dt.replace(year=date.year, month=date.month, day=date.day)
+                record['publication'] = dt
 
-        # Skip 1 row at top for header (header=0),
-        # and read 7 total rows. For whatever reason,
-        # pandas totally ignores both skipfooter and skip_footer.
-        # WTF pandas.
-        df = pd.read_csv(
-            url, names=['Tenor', 'Publication Time', 'USD ICE LIBOR'],
-            header=0, parse_dates=['Publication Time'],
-            nrows=7, date_parser=parse_london,
-        )
-        logging.info('Publication time for USD ICE on {}: {}'.format(
-            date.strftime('%m/%d/%Y'), df['Publication Time'].unique()
-        ))
-
-        return df
+        return LiborRate(**record)
diff --git a/test/tasks/test_ice.py b/test/tasks/test_ice.py
index bc4c1c7..35fb683 100644
--- a/test/tasks/test_ice.py
+++ b/test/tasks/test_ice.py
@@ -2,7 +2,8 @@ from unittest import TestCase
 from datetime import datetime
 import pytz
 
-from metrik.tasks.ice import USDLibor
+from metrik.tasks.ice import LiborRateTask
+from metrik.conf import USER_AGENT
 
 
 # noinspection PyUnresolvedReferences
@@ -11,36 +12,38 @@ class TestICE(TestCase):
     def test_correct_libor_Aug8_2016(self):
         # Validate with:
         # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD
-        aug8_libor = USDLibor.retrieve_data(datetime(2016, 8, 8))
+        aug8_libor = LiborRateTask.retrieve_data(datetime(2016, 8, 8), 'USD')
 
-        assert (aug8_libor[aug8_libor['Tenor'] == 'Overnight']['USD ICE LIBOR'] == .4189).all()
-        assert (aug8_libor[aug8_libor['Tenor'] == '1 Week']['USD ICE LIBOR'] == .4431).all()
-        assert (aug8_libor[aug8_libor['Tenor'] == '1 Month']['USD ICE LIBOR'] == .5119).all()
-        assert (aug8_libor[aug8_libor['Tenor'] == '2 Month']['USD ICE LIBOR'] == .6268).all()
-        assert (aug8_libor[aug8_libor['Tenor'] == '3 Month']['USD ICE LIBOR'] == .8065).all()
-        assert (aug8_libor[aug8_libor['Tenor'] == '6 Month']['USD ICE LIBOR'] == 1.1852).all()
-        assert (aug8_libor[aug8_libor['Tenor'] == '1 Year']['USD ICE LIBOR'] == 1.5081).all()
+        assert aug8_libor.overnight == .4189
+        assert aug8_libor.one_week == .4431
+        assert aug8_libor.one_month == .5119
+        assert aug8_libor.two_month == .6268
+        assert aug8_libor.three_month == .8065
+        assert aug8_libor.six_month == 1.1852
+        assert aug8_libor.one_year == 1.5081
 
         london_tz = pytz.timezone('Europe/London')
         actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6))
-        assert (aug8_libor['Publication Time'] == actual).all()
+        assert aug8_libor.publication == actual
+
 
     def test_correct_libor_Aug9_2010(self):
         # Validate with:
         # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD
-        aug9_libor = USDLibor.retrieve_data(datetime(2010, 8, 9))
+        aug9_libor = LiborRateTask.retrieve_data(datetime(2010, 8, 9), 'USD')
+
+        assert aug9_libor.overnight == .23656
+        assert aug9_libor.one_week == .27725
+        assert aug9_libor.one_month == .29
+        assert aug9_libor.two_month == .3375
+        assert aug9_libor.three_month == .40438
+        assert aug9_libor.six_month == .6275
+        assert aug9_libor.one_year == .995
 
-        assert (aug9_libor[aug9_libor['Tenor'] == 'Overnight']['USD ICE LIBOR'] == .23656).all()
-        assert (aug9_libor[aug9_libor['Tenor'] == '1 Week']['USD ICE LIBOR'] == .27725).all()
-        assert (aug9_libor[aug9_libor['Tenor'] == '1 Month']['USD ICE LIBOR'] == .29).all()
-        assert (aug9_libor[aug9_libor['Tenor'] == '2 Month']['USD ICE LIBOR'] == .3375).all()
-        assert (aug9_libor[aug9_libor['Tenor'] == '3 Month']['USD ICE LIBOR'] == .40438).all()
-        assert (aug9_libor[aug9_libor['Tenor'] == '6 Month']['USD ICE LIBOR'] == .6275).all()
-        assert (aug9_libor[aug9_libor['Tenor'] == '1 Year']['USD ICE LIBOR'] == .995).all()
 
         london_tz = pytz.timezone('Europe/London')
         actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12))
-        assert (aug9_libor['Publication Time'] == actual).all()
+        assert aug9_libor.publication == actual
 
     def test_correct_date_reasoning(self):
         # Make sure I document how to handle datetime issues in the future