From 14e0b404dd39773321904d941de0739455a803da Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Fri, 12 Aug 2016 19:35:22 -0400 Subject: [PATCH] Wasn't pandas fault, dates are weird ICE site is doing something strange... --- metrik/tasks/ice.py | 79 ++++++++++++++++++++++-------------------- test/tasks/test_ice.py | 41 ++++++++++++---------- 2 files changed, 64 insertions(+), 56 deletions(-) diff --git a/metrik/tasks/ice.py b/metrik/tasks/ice.py index 2393ea0..c3b2369 100644 --- a/metrik/tasks/ice.py +++ b/metrik/tasks/ice.py @@ -1,51 +1,56 @@ from luigi.task import Task # noinspection PyUnresolvedReferences from six.moves.urllib.parse import quote_plus -import pandas as pd import pytz +from collections import namedtuple +import requests +import datetime +import csv +from io import StringIO from dateutil.parser import parse -import logging -class USDLibor(Task): +LiborRate = namedtuple('LiborRate', [ + 'publication', 'overnight', 'one_week', 'one_month', 'two_month', + 'three_month', 'six_month', 'one_year', 'currency' +]) + + +class LiborRateTask(Task): @staticmethod - def retrieve_data(date): + def retrieve_data(date, currency): url = ('https://www.theice.com/marketdata/reports/icebenchmarkadmin/' 'ICELiborHistoricalRates.shtml?excelExport=' - '&criteria.reportDate={}&criteria.currencyCode=USD').format( - quote_plus(date.strftime('%m/%d/%y')) + '&criteria.reportDate={}&criteria.currencyCode={}').format( + quote_plus(date.strftime('%m/%d/%y')), + currency ) - def parse_london(dt_str): - # I'm getting inconsistent behavior in how Pandas parses the CSV - # file for dates and times. On Travis, it doesn't look like the - # content is being modified. On my computer, Pandas is spitting - # back a localized time. So, after parsing, if we have a timezone- - # enabled datetime, switch to Europe/London, and if not, add the - # Europe/London info to it - london_tz = pytz.timezone('Europe/London') - # Note that parse() implicitly adds timezone information because - # of how pandas gave us the value - dt = parse(dt_str).replace(year=date.year, - month=date.month, - day=date.day) - try: - return dt.astimezone(london_tz) - except ValueError: - return london_tz.localize(dt) + fields = ['tenor', 'publication', 'usd_ice_libor'] + text = requests.get(url).text + f = StringIO(text) + next(f) # Skip the header + record = {'currency': currency} + for row in csv.DictReader(f, fieldnames=fields): + mapping = { + 'Overnight': 'overnight', + '1 Week': 'one_week', + '1 Month': 'one_month', + '2 Month': 'two_month', + '3 Month': 'three_month', + '6 Month': 'six_month', + '1 Year': 'one_year' + } + if row['usd_ice_libor']: + record[mapping[row['tenor']]] = float(row['usd_ice_libor']) + if row['publication']: + # Weird things happen with the publication field. For whatever reason, + # the *time* is correct, but very often the date gets screwed up. + # When I download the CSV with Firefox I only see the times - when I + # download with `requests`, I see both date (often incorrect) and time. + dt = parse(row['publication']) + dt = dt.replace(year=date.year, month=date.month, day=date.day) + record['publication'] = dt - # Skip 1 row at top for header (header=0), - # and read 7 total rows. For whatever reason, - # pandas totally ignores both skipfooter and skip_footer. - # WTF pandas. - df = pd.read_csv( - url, names=['Tenor', 'Publication Time', 'USD ICE LIBOR'], - header=0, parse_dates=['Publication Time'], - nrows=7, date_parser=parse_london, - ) - logging.info('Publication time for USD ICE on {}: {}'.format( - date.strftime('%m/%d/%Y'), df['Publication Time'].unique() - )) - - return df + return LiborRate(**record) diff --git a/test/tasks/test_ice.py b/test/tasks/test_ice.py index bc4c1c7..35fb683 100644 --- a/test/tasks/test_ice.py +++ b/test/tasks/test_ice.py @@ -2,7 +2,8 @@ from unittest import TestCase from datetime import datetime import pytz -from metrik.tasks.ice import USDLibor +from metrik.tasks.ice import LiborRateTask +from metrik.conf import USER_AGENT # noinspection PyUnresolvedReferences @@ -11,36 +12,38 @@ class TestICE(TestCase): def test_correct_libor_Aug8_2016(self): # Validate with: # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD - aug8_libor = USDLibor.retrieve_data(datetime(2016, 8, 8)) + aug8_libor = LiborRateTask.retrieve_data(datetime(2016, 8, 8), 'USD') - assert (aug8_libor[aug8_libor['Tenor'] == 'Overnight']['USD ICE LIBOR'] == .4189).all() - assert (aug8_libor[aug8_libor['Tenor'] == '1 Week']['USD ICE LIBOR'] == .4431).all() - assert (aug8_libor[aug8_libor['Tenor'] == '1 Month']['USD ICE LIBOR'] == .5119).all() - assert (aug8_libor[aug8_libor['Tenor'] == '2 Month']['USD ICE LIBOR'] == .6268).all() - assert (aug8_libor[aug8_libor['Tenor'] == '3 Month']['USD ICE LIBOR'] == .8065).all() - assert (aug8_libor[aug8_libor['Tenor'] == '6 Month']['USD ICE LIBOR'] == 1.1852).all() - assert (aug8_libor[aug8_libor['Tenor'] == '1 Year']['USD ICE LIBOR'] == 1.5081).all() + assert aug8_libor.overnight == .4189 + assert aug8_libor.one_week == .4431 + assert aug8_libor.one_month == .5119 + assert aug8_libor.two_month == .6268 + assert aug8_libor.three_month == .8065 + assert aug8_libor.six_month == 1.1852 + assert aug8_libor.one_year == 1.5081 london_tz = pytz.timezone('Europe/London') actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6)) - assert (aug8_libor['Publication Time'] == actual).all() + assert aug8_libor.publication == actual + def test_correct_libor_Aug9_2010(self): # Validate with: # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD - aug9_libor = USDLibor.retrieve_data(datetime(2010, 8, 9)) + aug9_libor = LiborRateTask.retrieve_data(datetime(2010, 8, 9), 'USD') + + assert aug9_libor.overnight == .23656 + assert aug9_libor.one_week == .27725 + assert aug9_libor.one_month == .29 + assert aug9_libor.two_month == .3375 + assert aug9_libor.three_month == .40438 + assert aug9_libor.six_month == .6275 + assert aug9_libor.one_year == .995 - assert (aug9_libor[aug9_libor['Tenor'] == 'Overnight']['USD ICE LIBOR'] == .23656).all() - assert (aug9_libor[aug9_libor['Tenor'] == '1 Week']['USD ICE LIBOR'] == .27725).all() - assert (aug9_libor[aug9_libor['Tenor'] == '1 Month']['USD ICE LIBOR'] == .29).all() - assert (aug9_libor[aug9_libor['Tenor'] == '2 Month']['USD ICE LIBOR'] == .3375).all() - assert (aug9_libor[aug9_libor['Tenor'] == '3 Month']['USD ICE LIBOR'] == .40438).all() - assert (aug9_libor[aug9_libor['Tenor'] == '6 Month']['USD ICE LIBOR'] == .6275).all() - assert (aug9_libor[aug9_libor['Tenor'] == '1 Year']['USD ICE LIBOR'] == .995).all() london_tz = pytz.timezone('Europe/London') actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12)) - assert (aug9_libor['Publication Time'] == actual).all() + assert aug9_libor.publication == actual def test_correct_date_reasoning(self): # Make sure I document how to handle datetime issues in the future