From d48649c5657c23acff09017c82b134d4ca8605a3 Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Thu, 1 Sep 2016 14:22:58 -0400 Subject: [PATCH] Add initial State Street holdings functionality --- .idea/inspectionProfiles/Project_Default.xml | 10 ++-- metrik/tasks/state_street.py | 44 ++++++++++++++++++ test/tasks/test_state_street.py | 49 ++++++++++++++++++++ 3 files changed, 97 insertions(+), 6 deletions(-) create mode 100644 metrik/tasks/state_street.py create mode 100644 test/tasks/test_state_street.py diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml index a446b12..6459c4f 100644 --- a/.idea/inspectionProfiles/Project_Default.xml +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -4,13 +4,11 @@ diff --git a/metrik/tasks/state_street.py b/metrik/tasks/state_street.py new file mode 100644 index 0000000..954ddb6 --- /dev/null +++ b/metrik/tasks/state_street.py @@ -0,0 +1,44 @@ +import requests +from luigi.parameter import Parameter +import pandas as pd + +from metrik.tasks.base import MongoNoBackCreateTask + + +class StateStreetHoldings(MongoNoBackCreateTask): + ticker = Parameter() # type: str + + @staticmethod + def retrieve_data(ticker, current_datetime, live): + # TODO: Actually make this static + base_url = 'https://www.spdrs.com/site-content/xls/{fund}_All_Holdings.xls' + fund_url = base_url.format(fund=ticker) + + excel_content = pd.read_excel(fund_url, header=None) + + # The actual stuff we care about is arranged in tabular format, thus + # we actually want to get the rows where the far-right column is + # not null. + final_column_index = len(excel_content.columns) - 1 + # And build a series of True/False for "We do want this row" and + # "we do not want this row" respectively + do_retain = excel_content[[final_column_index]].isnull() == False + retain_index = do_retain[do_retain[final_column_index] == True].index + + # Actual content is in rows 2 onwards + holding_df = excel_content.ix[retain_index[1:]] + # Headers are in row 1 + holding_df.columns = excel_content.ix[retain_index[0]] + + # And also get the metadata that are in the rows prior to content + metadata = excel_content.ix[0:retain_index[0]-1].dropna(axis=1) + metadata_dict = {row[0].strip(':'): row[1] + for i, row in metadata.iterrows()} + + return dict( + holdings=holding_df.to_dict(orient='record'), + **metadata_dict + ) + + def get_collection_name(self): + return 'state_street_holdings' \ No newline at end of file diff --git a/test/tasks/test_state_street.py b/test/tasks/test_state_street.py new file mode 100644 index 0000000..448ea13 --- /dev/null +++ b/test/tasks/test_state_street.py @@ -0,0 +1,49 @@ +# coding=utf-8 +from unittest import TestCase +from datetime import datetime + +from metrik.tasks.state_street import StateStreetHoldings + + +class StateStreetHoldingTest(TestCase): + def test_spy_holdings(self): + holdings_dict = StateStreetHoldings.retrieve_data( + 'SPY', datetime.now(), True + ) + + self.assertEqual(holdings_dict['Ticker Symbol'], 'SPY') + self.assertEqual(holdings_dict['Fund Name'], u'SPDR® S&P 500® ETF') + self.assertGreaterEqual(len(holdings_dict['holdings']), 500) + # Long live AAPL + self.assertTrue(holdings_dict['holdings'][0]['Identifier'] == u'AAPL') + + def test_sdy_holdings(self): + holdings_dict = StateStreetHoldings.retrieve_data( + 'SDY', datetime.now(), True + ) + + self.assertEqual(holdings_dict['Ticker Symbol'], 'SDY') + self.assertEqual(holdings_dict['Fund Name'], u'SPDR® S&P® Dividend ETF') + self.assertTrue(holdings_dict['holdings'][0]['Identifier'] == 'HCP') + + def test_spyd_holdings(self): + holdings_dict = StateStreetHoldings.retrieve_data( + 'SPYD', datetime.now(), True + ) + + self.assertEqual(holdings_dict['Ticker Symbol'], 'SPYD') + self.assertEqual(holdings_dict['Fund Name'], u'SPDR® S&P® 500 High Dividend ETF') + + def test_r3k_holdings(self): + holdings_dict = StateStreetHoldings.retrieve_data( + 'THRK', datetime.now(), True + ) + + self.assertEqual(holdings_dict['Ticker Symbol'], 'THRK') + self.assertEqual(holdings_dict['Fund Name'], u'SPDR Russell 3000® ETF') + # Interesting story: the fund is not required to actually invest in all + # 3000 Russell equities, but just seeks to track the index in general. + # That's why the test is against 2000, not 3000. + # This also means that we can't check lists of say iShares against this + # because they're not guaranteed to be consistent. + self.assertGreaterEqual(len(holdings_dict['holdings']), 2000)