mirror of
https://github.com/bspeice/metrik
synced 2024-11-23 15:48:10 -05:00
Wasn't pandas fault, dates are weird
ICE site is doing something strange...
This commit is contained in:
parent
71d6f951ba
commit
14e0b404dd
@ -1,51 +1,56 @@
|
|||||||
from luigi.task import Task
|
from luigi.task import Task
|
||||||
# noinspection PyUnresolvedReferences
|
# noinspection PyUnresolvedReferences
|
||||||
from six.moves.urllib.parse import quote_plus
|
from six.moves.urllib.parse import quote_plus
|
||||||
import pandas as pd
|
|
||||||
import pytz
|
import pytz
|
||||||
|
from collections import namedtuple
|
||||||
|
import requests
|
||||||
|
import datetime
|
||||||
|
import csv
|
||||||
|
from io import StringIO
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
import logging
|
|
||||||
|
|
||||||
|
|
||||||
class USDLibor(Task):
|
LiborRate = namedtuple('LiborRate', [
|
||||||
|
'publication', 'overnight', 'one_week', 'one_month', 'two_month',
|
||||||
|
'three_month', 'six_month', 'one_year', 'currency'
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
class LiborRateTask(Task):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def retrieve_data(date):
|
def retrieve_data(date, currency):
|
||||||
url = ('https://www.theice.com/marketdata/reports/icebenchmarkadmin/'
|
url = ('https://www.theice.com/marketdata/reports/icebenchmarkadmin/'
|
||||||
'ICELiborHistoricalRates.shtml?excelExport='
|
'ICELiborHistoricalRates.shtml?excelExport='
|
||||||
'&criteria.reportDate={}&criteria.currencyCode=USD').format(
|
'&criteria.reportDate={}&criteria.currencyCode={}').format(
|
||||||
quote_plus(date.strftime('%m/%d/%y'))
|
quote_plus(date.strftime('%m/%d/%y')),
|
||||||
|
currency
|
||||||
)
|
)
|
||||||
|
|
||||||
def parse_london(dt_str):
|
fields = ['tenor', 'publication', 'usd_ice_libor']
|
||||||
# I'm getting inconsistent behavior in how Pandas parses the CSV
|
text = requests.get(url).text
|
||||||
# file for dates and times. On Travis, it doesn't look like the
|
f = StringIO(text)
|
||||||
# content is being modified. On my computer, Pandas is spitting
|
next(f) # Skip the header
|
||||||
# back a localized time. So, after parsing, if we have a timezone-
|
record = {'currency': currency}
|
||||||
# enabled datetime, switch to Europe/London, and if not, add the
|
for row in csv.DictReader(f, fieldnames=fields):
|
||||||
# Europe/London info to it
|
mapping = {
|
||||||
london_tz = pytz.timezone('Europe/London')
|
'Overnight': 'overnight',
|
||||||
# Note that parse() implicitly adds timezone information because
|
'1 Week': 'one_week',
|
||||||
# of how pandas gave us the value
|
'1 Month': 'one_month',
|
||||||
dt = parse(dt_str).replace(year=date.year,
|
'2 Month': 'two_month',
|
||||||
month=date.month,
|
'3 Month': 'three_month',
|
||||||
day=date.day)
|
'6 Month': 'six_month',
|
||||||
try:
|
'1 Year': 'one_year'
|
||||||
return dt.astimezone(london_tz)
|
}
|
||||||
except ValueError:
|
if row['usd_ice_libor']:
|
||||||
return london_tz.localize(dt)
|
record[mapping[row['tenor']]] = float(row['usd_ice_libor'])
|
||||||
|
if row['publication']:
|
||||||
|
# Weird things happen with the publication field. For whatever reason,
|
||||||
|
# the *time* is correct, but very often the date gets screwed up.
|
||||||
|
# When I download the CSV with Firefox I only see the times - when I
|
||||||
|
# download with `requests`, I see both date (often incorrect) and time.
|
||||||
|
dt = parse(row['publication'])
|
||||||
|
dt = dt.replace(year=date.year, month=date.month, day=date.day)
|
||||||
|
record['publication'] = dt
|
||||||
|
|
||||||
# Skip 1 row at top for header (header=0),
|
return LiborRate(**record)
|
||||||
# and read 7 total rows. For whatever reason,
|
|
||||||
# pandas totally ignores both skipfooter and skip_footer.
|
|
||||||
# WTF pandas.
|
|
||||||
df = pd.read_csv(
|
|
||||||
url, names=['Tenor', 'Publication Time', 'USD ICE LIBOR'],
|
|
||||||
header=0, parse_dates=['Publication Time'],
|
|
||||||
nrows=7, date_parser=parse_london,
|
|
||||||
)
|
|
||||||
logging.info('Publication time for USD ICE on {}: {}'.format(
|
|
||||||
date.strftime('%m/%d/%Y'), df['Publication Time'].unique()
|
|
||||||
))
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
@ -2,7 +2,8 @@ from unittest import TestCase
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
from metrik.tasks.ice import USDLibor
|
from metrik.tasks.ice import LiborRateTask
|
||||||
|
from metrik.conf import USER_AGENT
|
||||||
|
|
||||||
|
|
||||||
# noinspection PyUnresolvedReferences
|
# noinspection PyUnresolvedReferences
|
||||||
@ -11,36 +12,38 @@ class TestICE(TestCase):
|
|||||||
def test_correct_libor_Aug8_2016(self):
|
def test_correct_libor_Aug8_2016(self):
|
||||||
# Validate with:
|
# Validate with:
|
||||||
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD
|
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD
|
||||||
aug8_libor = USDLibor.retrieve_data(datetime(2016, 8, 8))
|
aug8_libor = LiborRateTask.retrieve_data(datetime(2016, 8, 8), 'USD')
|
||||||
|
|
||||||
assert (aug8_libor[aug8_libor['Tenor'] == 'Overnight']['USD ICE LIBOR'] == .4189).all()
|
assert aug8_libor.overnight == .4189
|
||||||
assert (aug8_libor[aug8_libor['Tenor'] == '1 Week']['USD ICE LIBOR'] == .4431).all()
|
assert aug8_libor.one_week == .4431
|
||||||
assert (aug8_libor[aug8_libor['Tenor'] == '1 Month']['USD ICE LIBOR'] == .5119).all()
|
assert aug8_libor.one_month == .5119
|
||||||
assert (aug8_libor[aug8_libor['Tenor'] == '2 Month']['USD ICE LIBOR'] == .6268).all()
|
assert aug8_libor.two_month == .6268
|
||||||
assert (aug8_libor[aug8_libor['Tenor'] == '3 Month']['USD ICE LIBOR'] == .8065).all()
|
assert aug8_libor.three_month == .8065
|
||||||
assert (aug8_libor[aug8_libor['Tenor'] == '6 Month']['USD ICE LIBOR'] == 1.1852).all()
|
assert aug8_libor.six_month == 1.1852
|
||||||
assert (aug8_libor[aug8_libor['Tenor'] == '1 Year']['USD ICE LIBOR'] == 1.5081).all()
|
assert aug8_libor.one_year == 1.5081
|
||||||
|
|
||||||
london_tz = pytz.timezone('Europe/London')
|
london_tz = pytz.timezone('Europe/London')
|
||||||
actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6))
|
actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6))
|
||||||
assert (aug8_libor['Publication Time'] == actual).all()
|
assert aug8_libor.publication == actual
|
||||||
|
|
||||||
|
|
||||||
def test_correct_libor_Aug9_2010(self):
|
def test_correct_libor_Aug9_2010(self):
|
||||||
# Validate with:
|
# Validate with:
|
||||||
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD
|
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD
|
||||||
aug9_libor = USDLibor.retrieve_data(datetime(2010, 8, 9))
|
aug9_libor = LiborRateTask.retrieve_data(datetime(2010, 8, 9), 'USD')
|
||||||
|
|
||||||
|
assert aug9_libor.overnight == .23656
|
||||||
|
assert aug9_libor.one_week == .27725
|
||||||
|
assert aug9_libor.one_month == .29
|
||||||
|
assert aug9_libor.two_month == .3375
|
||||||
|
assert aug9_libor.three_month == .40438
|
||||||
|
assert aug9_libor.six_month == .6275
|
||||||
|
assert aug9_libor.one_year == .995
|
||||||
|
|
||||||
assert (aug9_libor[aug9_libor['Tenor'] == 'Overnight']['USD ICE LIBOR'] == .23656).all()
|
|
||||||
assert (aug9_libor[aug9_libor['Tenor'] == '1 Week']['USD ICE LIBOR'] == .27725).all()
|
|
||||||
assert (aug9_libor[aug9_libor['Tenor'] == '1 Month']['USD ICE LIBOR'] == .29).all()
|
|
||||||
assert (aug9_libor[aug9_libor['Tenor'] == '2 Month']['USD ICE LIBOR'] == .3375).all()
|
|
||||||
assert (aug9_libor[aug9_libor['Tenor'] == '3 Month']['USD ICE LIBOR'] == .40438).all()
|
|
||||||
assert (aug9_libor[aug9_libor['Tenor'] == '6 Month']['USD ICE LIBOR'] == .6275).all()
|
|
||||||
assert (aug9_libor[aug9_libor['Tenor'] == '1 Year']['USD ICE LIBOR'] == .995).all()
|
|
||||||
|
|
||||||
london_tz = pytz.timezone('Europe/London')
|
london_tz = pytz.timezone('Europe/London')
|
||||||
actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12))
|
actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12))
|
||||||
assert (aug9_libor['Publication Time'] == actual).all()
|
assert aug9_libor.publication == actual
|
||||||
|
|
||||||
def test_correct_date_reasoning(self):
|
def test_correct_date_reasoning(self):
|
||||||
# Make sure I document how to handle datetime issues in the future
|
# Make sure I document how to handle datetime issues in the future
|
||||||
|
Loading…
Reference in New Issue
Block a user