1
0
mirror of https://github.com/bspeice/metrik synced 2024-11-23 15:48:10 -05:00

Wasn't pandas fault, dates are weird

ICE site is doing something strange...
This commit is contained in:
Bradlee Speice 2016-08-12 19:35:22 -04:00
parent 71d6f951ba
commit 14e0b404dd
2 changed files with 64 additions and 56 deletions

View File

@ -1,51 +1,56 @@
from luigi.task import Task from luigi.task import Task
# noinspection PyUnresolvedReferences # noinspection PyUnresolvedReferences
from six.moves.urllib.parse import quote_plus from six.moves.urllib.parse import quote_plus
import pandas as pd
import pytz import pytz
from collections import namedtuple
import requests
import datetime
import csv
from io import StringIO
from dateutil.parser import parse from dateutil.parser import parse
import logging
class USDLibor(Task): LiborRate = namedtuple('LiborRate', [
'publication', 'overnight', 'one_week', 'one_month', 'two_month',
'three_month', 'six_month', 'one_year', 'currency'
])
class LiborRateTask(Task):
@staticmethod @staticmethod
def retrieve_data(date): def retrieve_data(date, currency):
url = ('https://www.theice.com/marketdata/reports/icebenchmarkadmin/' url = ('https://www.theice.com/marketdata/reports/icebenchmarkadmin/'
'ICELiborHistoricalRates.shtml?excelExport=' 'ICELiborHistoricalRates.shtml?excelExport='
'&criteria.reportDate={}&criteria.currencyCode=USD').format( '&criteria.reportDate={}&criteria.currencyCode={}').format(
quote_plus(date.strftime('%m/%d/%y')) quote_plus(date.strftime('%m/%d/%y')),
currency
) )
def parse_london(dt_str): fields = ['tenor', 'publication', 'usd_ice_libor']
# I'm getting inconsistent behavior in how Pandas parses the CSV text = requests.get(url).text
# file for dates and times. On Travis, it doesn't look like the f = StringIO(text)
# content is being modified. On my computer, Pandas is spitting next(f) # Skip the header
# back a localized time. So, after parsing, if we have a timezone- record = {'currency': currency}
# enabled datetime, switch to Europe/London, and if not, add the for row in csv.DictReader(f, fieldnames=fields):
# Europe/London info to it mapping = {
london_tz = pytz.timezone('Europe/London') 'Overnight': 'overnight',
# Note that parse() implicitly adds timezone information because '1 Week': 'one_week',
# of how pandas gave us the value '1 Month': 'one_month',
dt = parse(dt_str).replace(year=date.year, '2 Month': 'two_month',
month=date.month, '3 Month': 'three_month',
day=date.day) '6 Month': 'six_month',
try: '1 Year': 'one_year'
return dt.astimezone(london_tz) }
except ValueError: if row['usd_ice_libor']:
return london_tz.localize(dt) record[mapping[row['tenor']]] = float(row['usd_ice_libor'])
if row['publication']:
# Weird things happen with the publication field. For whatever reason,
# the *time* is correct, but very often the date gets screwed up.
# When I download the CSV with Firefox I only see the times - when I
# download with `requests`, I see both date (often incorrect) and time.
dt = parse(row['publication'])
dt = dt.replace(year=date.year, month=date.month, day=date.day)
record['publication'] = dt
# Skip 1 row at top for header (header=0), return LiborRate(**record)
# and read 7 total rows. For whatever reason,
# pandas totally ignores both skipfooter and skip_footer.
# WTF pandas.
df = pd.read_csv(
url, names=['Tenor', 'Publication Time', 'USD ICE LIBOR'],
header=0, parse_dates=['Publication Time'],
nrows=7, date_parser=parse_london,
)
logging.info('Publication time for USD ICE on {}: {}'.format(
date.strftime('%m/%d/%Y'), df['Publication Time'].unique()
))
return df

View File

@ -2,7 +2,8 @@ from unittest import TestCase
from datetime import datetime from datetime import datetime
import pytz import pytz
from metrik.tasks.ice import USDLibor from metrik.tasks.ice import LiborRateTask
from metrik.conf import USER_AGENT
# noinspection PyUnresolvedReferences # noinspection PyUnresolvedReferences
@ -11,36 +12,38 @@ class TestICE(TestCase):
def test_correct_libor_Aug8_2016(self): def test_correct_libor_Aug8_2016(self):
# Validate with: # Validate with:
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD
aug8_libor = USDLibor.retrieve_data(datetime(2016, 8, 8)) aug8_libor = LiborRateTask.retrieve_data(datetime(2016, 8, 8), 'USD')
assert (aug8_libor[aug8_libor['Tenor'] == 'Overnight']['USD ICE LIBOR'] == .4189).all() assert aug8_libor.overnight == .4189
assert (aug8_libor[aug8_libor['Tenor'] == '1 Week']['USD ICE LIBOR'] == .4431).all() assert aug8_libor.one_week == .4431
assert (aug8_libor[aug8_libor['Tenor'] == '1 Month']['USD ICE LIBOR'] == .5119).all() assert aug8_libor.one_month == .5119
assert (aug8_libor[aug8_libor['Tenor'] == '2 Month']['USD ICE LIBOR'] == .6268).all() assert aug8_libor.two_month == .6268
assert (aug8_libor[aug8_libor['Tenor'] == '3 Month']['USD ICE LIBOR'] == .8065).all() assert aug8_libor.three_month == .8065
assert (aug8_libor[aug8_libor['Tenor'] == '6 Month']['USD ICE LIBOR'] == 1.1852).all() assert aug8_libor.six_month == 1.1852
assert (aug8_libor[aug8_libor['Tenor'] == '1 Year']['USD ICE LIBOR'] == 1.5081).all() assert aug8_libor.one_year == 1.5081
london_tz = pytz.timezone('Europe/London') london_tz = pytz.timezone('Europe/London')
actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6)) actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6))
assert (aug8_libor['Publication Time'] == actual).all() assert aug8_libor.publication == actual
def test_correct_libor_Aug9_2010(self): def test_correct_libor_Aug9_2010(self):
# Validate with: # Validate with:
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD
aug9_libor = USDLibor.retrieve_data(datetime(2010, 8, 9)) aug9_libor = LiborRateTask.retrieve_data(datetime(2010, 8, 9), 'USD')
assert aug9_libor.overnight == .23656
assert aug9_libor.one_week == .27725
assert aug9_libor.one_month == .29
assert aug9_libor.two_month == .3375
assert aug9_libor.three_month == .40438
assert aug9_libor.six_month == .6275
assert aug9_libor.one_year == .995
assert (aug9_libor[aug9_libor['Tenor'] == 'Overnight']['USD ICE LIBOR'] == .23656).all()
assert (aug9_libor[aug9_libor['Tenor'] == '1 Week']['USD ICE LIBOR'] == .27725).all()
assert (aug9_libor[aug9_libor['Tenor'] == '1 Month']['USD ICE LIBOR'] == .29).all()
assert (aug9_libor[aug9_libor['Tenor'] == '2 Month']['USD ICE LIBOR'] == .3375).all()
assert (aug9_libor[aug9_libor['Tenor'] == '3 Month']['USD ICE LIBOR'] == .40438).all()
assert (aug9_libor[aug9_libor['Tenor'] == '6 Month']['USD ICE LIBOR'] == .6275).all()
assert (aug9_libor[aug9_libor['Tenor'] == '1 Year']['USD ICE LIBOR'] == .995).all()
london_tz = pytz.timezone('Europe/London') london_tz = pytz.timezone('Europe/London')
actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12)) actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12))
assert (aug9_libor['Publication Time'] == actual).all() assert aug9_libor.publication == actual
def test_correct_date_reasoning(self): def test_correct_date_reasoning(self):
# Make sure I document how to handle datetime issues in the future # Make sure I document how to handle datetime issues in the future