diff --git a/.idea/misc.xml b/.idea/misc.xml index fcf3066..f518f84 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -54,7 +54,7 @@ - + diff --git a/metrik/tasks/cboe.py b/metrik/tasks/cboe.py new file mode 100644 index 0000000..85a4729 --- /dev/null +++ b/metrik/tasks/cboe.py @@ -0,0 +1,32 @@ +import requests +import pandas as pd +from six import StringIO + +from metrik.tasks.base import MongoNoBackCreateTask + + +class CboeOptionableList(MongoNoBackCreateTask): + def get_collection_name(self): + return 'cboe_optionable_list' + + @staticmethod + def retrieve_data(*args, **kwargs): + # Explicitly use requests to make mocking easy + csv_bytes = requests.get('http://www.cboe.com/publish/scheduledtask/' + 'mktdata/cboesymboldir2.csv').content + csv_str = csv_bytes.decode('ascii') + + # Because some of the fields include extra commas, we need to + # pre-process them out + old_sep = ',' + new_sep = '|' + csv_rows = csv_str.split('\r\n') + csv_header = csv_rows[1] + num_cols = len(csv_header.split(old_sep)) + csv_content = [r.replace(old_sep, new_sep, num_cols - 1) + for r in csv_rows[1:]] + content_str = '\n'.join(csv_content) + csv_filelike = StringIO(content_str) + + company_csv = pd.read_csv(csv_filelike, sep=new_sep) + return {'companies': company_csv.to_dict(orient='records')} diff --git a/test/tasks/test_cboe.py b/test/tasks/test_cboe.py new file mode 100644 index 0000000..e0a8736 --- /dev/null +++ b/test/tasks/test_cboe.py @@ -0,0 +1,12 @@ +from unittest import TestCase + +from metrik.tasks.cboe import CboeOptionableList + + +class CboeTest(TestCase): + + def test_optionable_list(self): + companies = CboeOptionableList.retrieve_data()['companies'] + assert len(companies) > 2000 + # TODO: Get lists of companies from ETF holdings and verify that they + # can be found here as well - this should be a superset \ No newline at end of file