diff --git a/.idea/misc.xml b/.idea/misc.xml index a1539c0..e659c40 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -54,7 +54,7 @@ - + diff --git a/metrik/__init__.py b/metrik/__init__.py index e69de29..40f30c5 100644 --- a/metrik/__init__.py +++ b/metrik/__init__.py @@ -0,0 +1,2 @@ +__version__ = 0.1 +__release__ = __version__ \ No newline at end of file diff --git a/metrik/batch.py b/metrik/batch.py index e7e4ef4..178dc9a 100644 --- a/metrik/batch.py +++ b/metrik/batch.py @@ -1,9 +1,64 @@ +from __future__ import print_function from luigi import build -from metrik.flows.libor_flow import LiborFlow from datetime import datetime +from argparse import ArgumentParser +from dateutil.parser import parse + +from metrik.flows.rates_flow import LiborFlow + +flows = { + 'LiborFlow': LiborFlow +} + + +def run_flow(flow_class, present): + build([flow_class(present=present)]) + + +def build_cron_file(): + EXEC = 'metrik' + FLOW_FLAG = '-f' + + cron_strings = [] + for flow_name, flow_class in flows.items(): + cron_string = flow_class.get_schedule().get_cron_string() + cron_strings.append( + cron_string + ' ' + EXEC + ' ' + FLOW_FLAG + ' ' + flow_name + ) + + return '\n'.join(cron_strings) + + +def list_flows(): + pass + + +def handle_commandline(): + parser = ArgumentParser(description='Capture ALL THE DATA off the Internet.') + parser.add_argument('-c', '--cron', dest='cron', action='store_true', + help='Build the cron file used to schedule' + 'running all flows') + parser.add_argument('-d', '--date', dest='present', + help='Run a flow as if it was this time ' + '(default: %(default)s).', + default=datetime.now()) + parser.add_argument('-f', '--flow', dest='flow', help='The flow to be run') + parser.add_argument('-l', '--list-flows', dest='list', action='store_true', + help='List all available flows to be run.') + args = parser.parse_args() + + if args.cron: + print(build_cron_file()) + elif args.list: + print(list_flows()) + elif args.flow: + if type(args.present) is datetime: + run_flow(flows[args.flow], args.present) + else: + run_flow(flows[args.flow], parse(args.present)) + else: + print("No actions specified, exiting.") if __name__ == '__main__': - l = LiborFlow(datetime(2016, 5, 9).date()) - - build([l], local_scheduler=True) \ No newline at end of file + handle_commandline() diff --git a/metrik/flows/base.py b/metrik/flows/base.py new file mode 100644 index 0000000..0d42172 --- /dev/null +++ b/metrik/flows/base.py @@ -0,0 +1,50 @@ +from luigi.task import WrapperTask +from luigi.parameter import DateMinuteParameter +import pandas as pd + +from metrik.trading_days import is_trading_day + + +class Flow(WrapperTask): + present = DateMinuteParameter() + + def __init__(self, force=False, *args, **kwargs): + super(Flow, self).__init__(*args, **kwargs) + self.force = force + + @staticmethod + def get_schedule(): + raise NotImplementedError('Your flow should know when it should be run.') + + def _requires(self): + raise NotImplementedError('I need to know what tasks should be run!') + + def requires(self): + if self.force or self.get_schedule().check_trigger(self.present): + return self._requires() + + +class Schedule(object): + def get_cron_string(self): + raise NotImplementedError() + + def check_trigger(self, present): + return True + + +class DailyMidnight(Schedule): + def get_cron_string(self): + return '0 0 * * *' + + +class WeekdayMidnight(Schedule): + def get_cron_string(self): + return '0 0 * * 1-5' + + +class MarketClose(Schedule): + def get_cron_string(self): + return '5 16 * * 1-5' + + def check_trigger(self, present): + return is_trading_day(present) diff --git a/metrik/flows/libor_flow.py b/metrik/flows/libor_flow.py deleted file mode 100644 index 37f843c..0000000 --- a/metrik/flows/libor_flow.py +++ /dev/null @@ -1,13 +0,0 @@ -from luigi import WrapperTask, DateParameter, LocalTarget - -from metrik.tasks.ice import LiborRateTask -from metrik.targets.temp_file import TempFileTarget - - -class LiborFlow(WrapperTask): - date = DateParameter() - - def requires(self): - currencies = ['USD'] - return [LiborRateTask(self.date, currency) - for currency in currencies] diff --git a/metrik/flows/rates_flow.py b/metrik/flows/rates_flow.py new file mode 100644 index 0000000..8bbde71 --- /dev/null +++ b/metrik/flows/rates_flow.py @@ -0,0 +1,14 @@ +from metrik.flows.base import Flow, WeekdayMidnight +from metrik.tasks.ice import LiborRateTask + + +class LiborFlow(Flow): + + @staticmethod + def get_schedule(): + return WeekdayMidnight() + + def _requires(self): + currencies = ['USD'] + return [LiborRateTask(self.present, currency) + for currency in currencies] diff --git a/metrik/tasks/ice.py b/metrik/tasks/ice.py index 7294d75..f2fef0e 100644 --- a/metrik/tasks/ice.py +++ b/metrik/tasks/ice.py @@ -11,6 +11,7 @@ from luigi.parameter import DateParameter, Parameter from six.moves.urllib.parse import quote_plus from metrik.tasks.base import MongoCreateTask +from metrik.trading_days import TradingDay LiborRate = namedtuple('LiborRate', [ 'publication', 'overnight', 'one_week', 'one_month', 'two_month', @@ -27,7 +28,7 @@ class LiborRateTask(MongoCreateTask): return 'libor' @staticmethod - def retrieve_data(date, currency): + def retrieve_historical_libor(date, currency): url = ('https://www.theice.com/marketdata/reports/icebenchmarkadmin/' 'ICELiborHistoricalRates.shtml?excelExport=' '&criteria.reportDate={}&criteria.currencyCode={}').format( @@ -80,3 +81,12 @@ class LiborRateTask(MongoCreateTask): 'six_month': six_month, 'one_year': one_year } + + @staticmethod + def retrieve_data(date, currency): + # ICE publish data a day late, so we actually need to retrieve data + # for the trading day prior to this. + return LiborRateTask.retrieve_historical_libor( + date - TradingDay(1), + currency + ) diff --git a/metrik/trading_days.py b/metrik/trading_days.py index e179789..9193984 100644 --- a/metrik/trading_days.py +++ b/metrik/trading_days.py @@ -1,7 +1,8 @@ from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, \ - nearest_workday, USMartinLutherKingJr, USPresidentsDay, GoodFriday,\ + nearest_workday, USMartinLutherKingJr, USPresidentsDay, GoodFriday, \ USMemorialDay, USLaborDay, USThanksgivingDay from pandas.tseries.offsets import CustomBusinessDay +from pandas import date_range class USTradingCalendar(AbstractHolidayCalendar): @@ -17,5 +18,10 @@ class USTradingCalendar(AbstractHolidayCalendar): Holiday('Christmas', month=12, day=25, observance=nearest_workday) ] + def TradingDay(n): - return CustomBusinessDay(n, calendar=USTradingCalendar()) \ No newline at end of file + return CustomBusinessDay(n, calendar=USTradingCalendar()) + + +def is_trading_day(date): + return bool(len(date_range(date, date, freq=TradingDay(1)))) diff --git a/requirements.txt b/requirements.txt index 2971921..002610a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ pyquery>=1.2.9 requests>=2.9.1 pymongo>=3.2 python-dateutil>=2.4.2 -pandas>=0.17.1 \ No newline at end of file +pandas>=0.17.1 +argparse>=1.1.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 9c3ede9..55f1b36 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,8 @@ setup( 'pymongo >= 3.2', 'pytz >= 2016.6.1', 'python-dateutil >= 2.4.2', - 'pandas >= 0.17.1' + 'pandas >= 0.17.1', + 'argparse >= 1.1.0' ], setup_requires=[ 'pytest_runner' @@ -23,5 +24,10 @@ setup( tests_require=[ 'pytest', 'pytest-catchlog' - ] + ], + entry_points={ + 'console_scripts': [ + 'metrik = metrik.batch:handle_commandline' + ] + } ) \ No newline at end of file diff --git a/test/tasks/test_ice.py b/test/tasks/test_ice.py index 1d44a69..c9f9e5b 100644 --- a/test/tasks/test_ice.py +++ b/test/tasks/test_ice.py @@ -11,7 +11,8 @@ class TestICE(TestCase): def test_correct_libor_Aug8_2016(self): # Validate with: # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD - aug8_libor = LiborRateTask.retrieve_data(datetime(2016, 8, 8), 'USD') + aug8_libor = LiborRateTask.retrieve_historical_libor( + datetime(2016, 8, 8), 'USD') assert aug8_libor['overnight'] == .4189 assert aug8_libor['one_week'] == .4431 @@ -23,13 +24,15 @@ class TestICE(TestCase): london_tz = pytz.timezone('Europe/London') actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6)) - logging.info('Publication date in London time: {}'.format(aug8_libor['publication'].astimezone(london_tz))) + logging.info('Publication date in London time: {}'.format( + aug8_libor['publication'].astimezone(london_tz))) assert aug8_libor['publication'] == actual def test_correct_libor_Aug9_2010(self): # Validate with: # https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD - aug9_libor = LiborRateTask.retrieve_data(datetime(2010, 8, 9), 'USD') + aug9_libor = LiborRateTask.retrieve_historical_libor( + datetime(2010, 8, 9), 'USD') assert aug9_libor['overnight'] == .23656 assert aug9_libor['one_week'] == .27725 @@ -41,7 +44,8 @@ class TestICE(TestCase): london_tz = pytz.timezone('Europe/London') actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12)) - logging.info('Publication date in London time: {}'.format(aug9_libor['publication'].astimezone(london_tz))) + logging.info('Publication date in London time: {}'.format( + aug9_libor['publication'].astimezone(london_tz))) assert aug9_libor['publication'] == actual def test_correct_date_reasoning(self): diff --git a/test/test_trading_days.py b/test/test_trading_days.py index dfe3d74..157825d 100644 --- a/test/test_trading_days.py +++ b/test/test_trading_days.py @@ -1,11 +1,10 @@ from unittest import TestCase from datetime import datetime -from metrik.trading_days import TradingDay +from metrik.trading_days import TradingDay, is_trading_day class TradingDayTest(TestCase): - def test_skip_july4(self): start = datetime(2016, 7, 1) # Friday end = start + TradingDay(1) @@ -14,4 +13,12 @@ class TradingDayTest(TestCase): def test_skip_july4_backwards(self): end = datetime(2016, 7, 5) start = end - TradingDay(1) - assert start == datetime(2016, 7, 1) \ No newline at end of file + assert start == datetime(2016, 7, 1) + + def test_not_bday(self): + for year in range(2000, 2016): + date = datetime(year, 7, 4) + assert not is_trading_day(date) + + def test_is_bday(self): + assert is_trading_day(datetime(2016, 8, 23))