diff --git a/.idea/misc.xml b/.idea/misc.xml
index a1539c0..e659c40 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -54,7 +54,7 @@
-
+
diff --git a/metrik/__init__.py b/metrik/__init__.py
index e69de29..40f30c5 100644
--- a/metrik/__init__.py
+++ b/metrik/__init__.py
@@ -0,0 +1,2 @@
+__version__ = 0.1
+__release__ = __version__
\ No newline at end of file
diff --git a/metrik/batch.py b/metrik/batch.py
index e7e4ef4..178dc9a 100644
--- a/metrik/batch.py
+++ b/metrik/batch.py
@@ -1,9 +1,64 @@
+from __future__ import print_function
from luigi import build
-from metrik.flows.libor_flow import LiborFlow
from datetime import datetime
+from argparse import ArgumentParser
+from dateutil.parser import parse
+
+from metrik.flows.rates_flow import LiborFlow
+
+flows = {
+ 'LiborFlow': LiborFlow
+}
+
+
+def run_flow(flow_class, present):
+ build([flow_class(present=present)])
+
+
+def build_cron_file():
+ EXEC = 'metrik'
+ FLOW_FLAG = '-f'
+
+ cron_strings = []
+ for flow_name, flow_class in flows.items():
+ cron_string = flow_class.get_schedule().get_cron_string()
+ cron_strings.append(
+ cron_string + ' ' + EXEC + ' ' + FLOW_FLAG + ' ' + flow_name
+ )
+
+ return '\n'.join(cron_strings)
+
+
+def list_flows():
+ pass
+
+
+def handle_commandline():
+ parser = ArgumentParser(description='Capture ALL THE DATA off the Internet.')
+ parser.add_argument('-c', '--cron', dest='cron', action='store_true',
+ help='Build the cron file used to schedule'
+ 'running all flows')
+ parser.add_argument('-d', '--date', dest='present',
+ help='Run a flow as if it was this time '
+ '(default: %(default)s).',
+ default=datetime.now())
+ parser.add_argument('-f', '--flow', dest='flow', help='The flow to be run')
+ parser.add_argument('-l', '--list-flows', dest='list', action='store_true',
+ help='List all available flows to be run.')
+ args = parser.parse_args()
+
+ if args.cron:
+ print(build_cron_file())
+ elif args.list:
+ print(list_flows())
+ elif args.flow:
+ if type(args.present) is datetime:
+ run_flow(flows[args.flow], args.present)
+ else:
+ run_flow(flows[args.flow], parse(args.present))
+ else:
+ print("No actions specified, exiting.")
if __name__ == '__main__':
- l = LiborFlow(datetime(2016, 5, 9).date())
-
- build([l], local_scheduler=True)
\ No newline at end of file
+ handle_commandline()
diff --git a/metrik/flows/base.py b/metrik/flows/base.py
new file mode 100644
index 0000000..0d42172
--- /dev/null
+++ b/metrik/flows/base.py
@@ -0,0 +1,50 @@
+from luigi.task import WrapperTask
+from luigi.parameter import DateMinuteParameter
+import pandas as pd
+
+from metrik.trading_days import is_trading_day
+
+
+class Flow(WrapperTask):
+ present = DateMinuteParameter()
+
+ def __init__(self, force=False, *args, **kwargs):
+ super(Flow, self).__init__(*args, **kwargs)
+ self.force = force
+
+ @staticmethod
+ def get_schedule():
+ raise NotImplementedError('Your flow should know when it should be run.')
+
+ def _requires(self):
+ raise NotImplementedError('I need to know what tasks should be run!')
+
+ def requires(self):
+ if self.force or self.get_schedule().check_trigger(self.present):
+ return self._requires()
+
+
+class Schedule(object):
+ def get_cron_string(self):
+ raise NotImplementedError()
+
+ def check_trigger(self, present):
+ return True
+
+
+class DailyMidnight(Schedule):
+ def get_cron_string(self):
+ return '0 0 * * *'
+
+
+class WeekdayMidnight(Schedule):
+ def get_cron_string(self):
+ return '0 0 * * 1-5'
+
+
+class MarketClose(Schedule):
+ def get_cron_string(self):
+ return '5 16 * * 1-5'
+
+ def check_trigger(self, present):
+ return is_trading_day(present)
diff --git a/metrik/flows/libor_flow.py b/metrik/flows/libor_flow.py
deleted file mode 100644
index 37f843c..0000000
--- a/metrik/flows/libor_flow.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from luigi import WrapperTask, DateParameter, LocalTarget
-
-from metrik.tasks.ice import LiborRateTask
-from metrik.targets.temp_file import TempFileTarget
-
-
-class LiborFlow(WrapperTask):
- date = DateParameter()
-
- def requires(self):
- currencies = ['USD']
- return [LiborRateTask(self.date, currency)
- for currency in currencies]
diff --git a/metrik/flows/rates_flow.py b/metrik/flows/rates_flow.py
new file mode 100644
index 0000000..8bbde71
--- /dev/null
+++ b/metrik/flows/rates_flow.py
@@ -0,0 +1,14 @@
+from metrik.flows.base import Flow, WeekdayMidnight
+from metrik.tasks.ice import LiborRateTask
+
+
+class LiborFlow(Flow):
+
+ @staticmethod
+ def get_schedule():
+ return WeekdayMidnight()
+
+ def _requires(self):
+ currencies = ['USD']
+ return [LiborRateTask(self.present, currency)
+ for currency in currencies]
diff --git a/metrik/tasks/ice.py b/metrik/tasks/ice.py
index 7294d75..f2fef0e 100644
--- a/metrik/tasks/ice.py
+++ b/metrik/tasks/ice.py
@@ -11,6 +11,7 @@ from luigi.parameter import DateParameter, Parameter
from six.moves.urllib.parse import quote_plus
from metrik.tasks.base import MongoCreateTask
+from metrik.trading_days import TradingDay
LiborRate = namedtuple('LiborRate', [
'publication', 'overnight', 'one_week', 'one_month', 'two_month',
@@ -27,7 +28,7 @@ class LiborRateTask(MongoCreateTask):
return 'libor'
@staticmethod
- def retrieve_data(date, currency):
+ def retrieve_historical_libor(date, currency):
url = ('https://www.theice.com/marketdata/reports/icebenchmarkadmin/'
'ICELiborHistoricalRates.shtml?excelExport='
'&criteria.reportDate={}&criteria.currencyCode={}').format(
@@ -80,3 +81,12 @@ class LiborRateTask(MongoCreateTask):
'six_month': six_month,
'one_year': one_year
}
+
+ @staticmethod
+ def retrieve_data(date, currency):
+ # ICE publish data a day late, so we actually need to retrieve data
+ # for the trading day prior to this.
+ return LiborRateTask.retrieve_historical_libor(
+ date - TradingDay(1),
+ currency
+ )
diff --git a/metrik/trading_days.py b/metrik/trading_days.py
index e179789..9193984 100644
--- a/metrik/trading_days.py
+++ b/metrik/trading_days.py
@@ -1,7 +1,8 @@
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, \
- nearest_workday, USMartinLutherKingJr, USPresidentsDay, GoodFriday,\
+ nearest_workday, USMartinLutherKingJr, USPresidentsDay, GoodFriday, \
USMemorialDay, USLaborDay, USThanksgivingDay
from pandas.tseries.offsets import CustomBusinessDay
+from pandas import date_range
class USTradingCalendar(AbstractHolidayCalendar):
@@ -17,5 +18,10 @@ class USTradingCalendar(AbstractHolidayCalendar):
Holiday('Christmas', month=12, day=25, observance=nearest_workday)
]
+
def TradingDay(n):
- return CustomBusinessDay(n, calendar=USTradingCalendar())
\ No newline at end of file
+ return CustomBusinessDay(n, calendar=USTradingCalendar())
+
+
+def is_trading_day(date):
+ return bool(len(date_range(date, date, freq=TradingDay(1))))
diff --git a/requirements.txt b/requirements.txt
index 2971921..002610a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ pyquery>=1.2.9
requests>=2.9.1
pymongo>=3.2
python-dateutil>=2.4.2
-pandas>=0.17.1
\ No newline at end of file
+pandas>=0.17.1
+argparse>=1.1.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 9c3ede9..55f1b36 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,8 @@ setup(
'pymongo >= 3.2',
'pytz >= 2016.6.1',
'python-dateutil >= 2.4.2',
- 'pandas >= 0.17.1'
+ 'pandas >= 0.17.1',
+ 'argparse >= 1.1.0'
],
setup_requires=[
'pytest_runner'
@@ -23,5 +24,10 @@ setup(
tests_require=[
'pytest',
'pytest-catchlog'
- ]
+ ],
+ entry_points={
+ 'console_scripts': [
+ 'metrik = metrik.batch:handle_commandline'
+ ]
+ }
)
\ No newline at end of file
diff --git a/test/tasks/test_ice.py b/test/tasks/test_ice.py
index 1d44a69..c9f9e5b 100644
--- a/test/tasks/test_ice.py
+++ b/test/tasks/test_ice.py
@@ -11,7 +11,8 @@ class TestICE(TestCase):
def test_correct_libor_Aug8_2016(self):
# Validate with:
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F8%2F16&criteria.currencyCode=USD
- aug8_libor = LiborRateTask.retrieve_data(datetime(2016, 8, 8), 'USD')
+ aug8_libor = LiborRateTask.retrieve_historical_libor(
+ datetime(2016, 8, 8), 'USD')
assert aug8_libor['overnight'] == .4189
assert aug8_libor['one_week'] == .4431
@@ -23,13 +24,15 @@ class TestICE(TestCase):
london_tz = pytz.timezone('Europe/London')
actual = london_tz.localize(datetime(2016, 8, 8, 11, 45, 6))
- logging.info('Publication date in London time: {}'.format(aug8_libor['publication'].astimezone(london_tz)))
+ logging.info('Publication date in London time: {}'.format(
+ aug8_libor['publication'].astimezone(london_tz)))
assert aug8_libor['publication'] == actual
def test_correct_libor_Aug9_2010(self):
# Validate with:
# https://www.theice.com/marketdata/reports/icebenchmarkadmin/ICELiborHistoricalRates.shtml?excelExport=&criteria.reportDate=8%2F9%2F10&criteria.currencyCode=USD
- aug9_libor = LiborRateTask.retrieve_data(datetime(2010, 8, 9), 'USD')
+ aug9_libor = LiborRateTask.retrieve_historical_libor(
+ datetime(2010, 8, 9), 'USD')
assert aug9_libor['overnight'] == .23656
assert aug9_libor['one_week'] == .27725
@@ -41,7 +44,8 @@ class TestICE(TestCase):
london_tz = pytz.timezone('Europe/London')
actual = london_tz.localize(datetime(2010, 8, 9, 15, 49, 12))
- logging.info('Publication date in London time: {}'.format(aug9_libor['publication'].astimezone(london_tz)))
+ logging.info('Publication date in London time: {}'.format(
+ aug9_libor['publication'].astimezone(london_tz)))
assert aug9_libor['publication'] == actual
def test_correct_date_reasoning(self):
diff --git a/test/test_trading_days.py b/test/test_trading_days.py
index dfe3d74..157825d 100644
--- a/test/test_trading_days.py
+++ b/test/test_trading_days.py
@@ -1,11 +1,10 @@
from unittest import TestCase
from datetime import datetime
-from metrik.trading_days import TradingDay
+from metrik.trading_days import TradingDay, is_trading_day
class TradingDayTest(TestCase):
-
def test_skip_july4(self):
start = datetime(2016, 7, 1) # Friday
end = start + TradingDay(1)
@@ -14,4 +13,12 @@ class TradingDayTest(TestCase):
def test_skip_july4_backwards(self):
end = datetime(2016, 7, 5)
start = end - TradingDay(1)
- assert start == datetime(2016, 7, 1)
\ No newline at end of file
+ assert start == datetime(2016, 7, 1)
+
+ def test_not_bday(self):
+ for year in range(2000, 2016):
+ date = datetime(year, 7, 4)
+ assert not is_trading_day(date)
+
+ def test_is_bday(self):
+ assert is_trading_day(datetime(2016, 8, 23))