Get command-line tools up and running

I now have something that actually gets stuff done.
master
Bradlee Speice 2016-08-22 21:34:54 -04:00
parent 96c503a106
commit f2390ba34f
12 changed files with 173 additions and 31 deletions

View File

@ -54,7 +54,7 @@
<ConfirmationsSetting value="0" id="Add" />
<ConfirmationsSetting value="0" id="Remove" />
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_3" default="false" assert-keyword="false" jdk-15="false" project-jdk-name="Python 2.7.12 (/usr/bin/python2)" project-jdk-type="Python SDK">
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_3" default="false" assert-keyword="false" jdk-15="false" project-jdk-name="Python 2.7.12 virtualenv at ~/Development/metrik/py2-virt" project-jdk-type="Python SDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
<component name="PythonCompatibilityInspectionAdvertiser">

View File

@ -0,0 +1,2 @@
__version__ = 0.1
__release__ = __version__

View File

@ -1,9 +1,64 @@
from __future__ import print_function
from luigi import build
from metrik.flows.libor_flow import LiborFlow
from datetime import datetime
from argparse import ArgumentParser
from dateutil.parser import parse
from metrik.flows.rates_flow import LiborFlow
flows = {
'LiborFlow': LiborFlow
}
def run_flow(flow_class, present):
build([flow_class(present=present)])
def build_cron_file():
EXEC = 'metrik'
FLOW_FLAG = '-f'
cron_strings = []
for flow_name, flow_class in flows.items():
cron_string = flow_class.get_schedule().get_cron_string()
cron_strings.append(
cron_string + ' ' + EXEC + ' ' + FLOW_FLAG + ' ' + flow_name
)
return '\n'.join(cron_strings)
def list_flows():
pass
def handle_commandline():
parser = ArgumentParser(description='Capture ALL THE DATA off the Internet.')
parser.add_argument('-c', '--cron', dest='cron', action='store_true',
help='Build the cron file used to schedule'
'running all flows')
parser.add_argument('-d', '--date', dest='present',
help='Run a flow as if it was this time '
'(default: %(default)s).',
default=datetime.now())
parser.add_argument('-f', '--flow', dest='flow', help='The flow to be run')
parser.add_argument('-l', '--list-flows', dest='list', action='store_true',
help='List all available flows to be run.')
args = parser.parse_args()
if args.cron:
print(build_cron_file())
elif args.list:
print(list_flows())
elif args.flow:
if type(args.present) is datetime:
run_flow(flows[args.flow], args.present)
else:
run_flow(flows[args.flow], parse(args.present))
else:
print("No actions specified, exiting.")
if __name__ == '__main__':
l = LiborFlow(datetime(2016, 5, 9).date())
build([l], local_scheduler=True)
handle_commandline()

50
metrik/flows/base.py Normal file
View File

@ -0,0 +1,50 @@
from luigi.task import WrapperTask
from luigi.parameter import DateMinuteParameter
import pandas as pd
from metrik.trading_days import is_trading_day
class Flow(WrapperTask):
present = DateMinuteParameter()
def __init__(self, force=False, *args, **kwargs):
super(Flow, self).__init__(*args, **kwargs)
self.force = force
@staticmethod
def get_schedule():
raise NotImplementedError('Your flow should know when it should be run.')
def _requires(self):
raise NotImplementedError('I need to know what tasks should be run!')
def requires(self):
if self.force or self.get_schedule().check_trigger(self.present):
return self._requires()
class Schedule(object):
def get_cron_string(self):
raise NotImplementedError()
def check_trigger(self, present):
return True
class DailyMidnight(Schedule):
def get_cron_string(self):
return '0 0 * * *'
class WeekdayMidnight(Schedule):
def get_cron_string(self):
return '0 0 * * 1-5'
class MarketClose(Schedule):
def get_cron_string(self):
return '5 16 * * 1-5'
def check_trigger(self, present):
return is_trading_day(present)

View File

@ -1,13 +0,0 @@
from luigi import WrapperTask, DateParameter, LocalTarget
from metrik.tasks.ice import LiborRateTask
from metrik.targets.temp_file import TempFileTarget
class LiborFlow(WrapperTask):
date = DateParameter()
def requires(self):
currencies = ['USD']
return [LiborRateTask(self.date, currency)
for currency in currencies]

View File

@ -0,0 +1,14 @@
from metrik.flows.base import Flow, WeekdayMidnight
from metrik.tasks.ice import LiborRateTask
class LiborFlow(Flow):
@staticmethod
def get_schedule():
return WeekdayMidnight()
def _requires(self):
currencies = ['USD']
return [LiborRateTask(self.present, currency)
for currency in currencies]

View File

@ -11,6 +11,7 @@ from luigi.parameter import DateParameter, Parameter
from six.moves.urllib.parse import quote_plus
from metrik.tasks.base import MongoCreateTask
from metrik.trading_days import TradingDay
LiborRate = namedtuple('LiborRate', [
'publication', 'overnight', 'one_week', 'one_month', 'two_month',
@ -27,7 +28,7 @@ class LiborRateTask(MongoCreateTask):
return 'libor'
@staticmethod
def retrieve_data(date, currency):
def retrieve_historical_libor(date, currency):
url = ('https://www.theice.com/marketdata/reports/icebenchmarkadmin/'
'ICELiborHistoricalRates.shtml?excelExport='
'&criteria.reportDate={}&criteria.currencyCode={}').format(
@ -80,3 +81,12 @@ class LiborRateTask(MongoCreateTask):
'six_month': six_month,
'one_year': one_year
}
@staticmethod
def retrieve_data(date, currency):
# ICE publish data a day late, so we actually need to retrieve data
# for the trading day prior to this.
return LiborRateTask.retrieve_historical_libor(
date - TradingDay(1),
currency
)

View File

@ -1,7 +1,8 @@
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, \
nearest_workday, USMartinLutherKingJr, USPresidentsDay, GoodFriday,\
nearest_workday, USMartinLutherKingJr, USPresidentsDay, GoodFriday, \
USMemorialDay, USLaborDay, USThanksgivingDay
from pandas.tseries.offsets import CustomBusinessDay
from pandas import date_range
class USTradingCalendar(AbstractHolidayCalendar):
@ -17,5 +18,10 @@ class USTradingCalendar(AbstractHolidayCalendar):
Holiday('Christmas', month=12, day=25, observance=nearest_workday)
]
def TradingDay(n):
return CustomBusinessDay(n, calendar=USTradingCalendar())
return CustomBusinessDay(n, calendar=USTradingCalendar())
def is_trading_day(date):
return bool(len(date_range(date, date, freq=TradingDay(1))))

View File

@ -4,4 +4,5 @@ pyquery>=1.2.9
requests>=2.9.1
pymongo>=3.2
python-dateutil>=2.4.2
pandas>=0.17.1
pandas>=0.17.1
argparse>=1.1.0

View File

@ -15,7 +15,8 @@ setup(
'pymongo >= 3.2',
'pytz >= 2016.6.1',
'python-dateutil >= 2.4.2',
'pandas >= 0.17.1'
'pandas >= 0.17.1',
'argparse >= 1.1.0'
],
setup_requires=[
'pytest_runner'
@ -23,5 +24,10 @@ setup(
tests_require=[
'pytest',
'pytest-catchlog'
]
],
entry_points={
'console_scripts': [
'metrik = metrik.batch:handle_commandline'
]
}
)

View File

@ -11,7 +11,8 @@ class TestICE(TestCase):
def test_correct_libor_Aug8_2016(self):
# Validate with:
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD
aug8_libor = LiborRateTask.retrieve_data(datetime(2016, 8, 8), 'USD')
aug8_libor = LiborRateTask.retrieve_historical_libor(
datetime(2016, 8, 8), 'USD')
assert aug8_libor['overnight'] == .4189
assert aug8_libor['one_week'] == .4431
@ -23,13 +24,15 @@ class TestICE(TestCase):
london_tz = pytz.timezone('Europe/London')
actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6))
logging.info('Publication date in London time: {}'.format(aug8_libor['publication'].astimezone(london_tz)))
logging.info('Publication date in London time: {}'.format(
aug8_libor['publication'].astimezone(london_tz)))
assert aug8_libor['publication'] == actual
def test_correct_libor_Aug9_2010(self):
# Validate with:
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD
aug9_libor = LiborRateTask.retrieve_data(datetime(2010, 8, 9), 'USD')
aug9_libor = LiborRateTask.retrieve_historical_libor(
datetime(2010, 8, 9), 'USD')
assert aug9_libor['overnight'] == .23656
assert aug9_libor['one_week'] == .27725
@ -41,7 +44,8 @@ class TestICE(TestCase):
london_tz = pytz.timezone('Europe/London')
actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12))
logging.info('Publication date in London time: {}'.format(aug9_libor['publication'].astimezone(london_tz)))
logging.info('Publication date in London time: {}'.format(
aug9_libor['publication'].astimezone(london_tz)))
assert aug9_libor['publication'] == actual
def test_correct_date_reasoning(self):

View File

@ -1,11 +1,10 @@
from unittest import TestCase
from datetime import datetime
from metrik.trading_days import TradingDay
from metrik.trading_days import TradingDay, is_trading_day
class TradingDayTest(TestCase):
def test_skip_july4(self):
start = datetime(2016, 7, 1) # Friday
end = start + TradingDay(1)
@ -14,4 +13,12 @@ class TradingDayTest(TestCase):
def test_skip_july4_backwards(self):
end = datetime(2016, 7, 5)
start = end - TradingDay(1)
assert start == datetime(2016, 7, 1)
assert start == datetime(2016, 7, 1)
def test_not_bday(self):
for year in range(2000, 2016):
date = datetime(year, 7, 4)
assert not is_trading_day(date)
def test_is_bday(self):
assert is_trading_day(datetime(2016, 8, 23))