mirror of
https://github.com/bspeice/metrik
synced 2024-11-23 15:48:10 -05:00
Add NASDAQ Company and ETF lists
This commit is contained in:
parent
c42f5a6980
commit
88c4401960
6
metrik/targets/noop.py
Normal file
6
metrik/targets/noop.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
from luigi.target import Target
|
||||||
|
|
||||||
|
|
||||||
|
class NoOpTarget(Target):
|
||||||
|
def exists(self):
|
||||||
|
return True
|
@ -1,7 +1,12 @@
|
|||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from luigi import Task
|
|
||||||
|
|
||||||
from metrik.targets.mongo_target import MongoTarget
|
import logging
|
||||||
|
|
||||||
|
from luigi import Task
|
||||||
|
from luigi.parameter import DateMinuteParameter
|
||||||
|
|
||||||
|
from metrik.targets.mongo import MongoTarget
|
||||||
|
from metrik.targets.noop import NoOpTarget
|
||||||
|
|
||||||
|
|
||||||
class MongoCreateTask(Task):
|
class MongoCreateTask(Task):
|
||||||
@ -27,3 +32,31 @@ class MongoCreateTask(Task):
|
|||||||
def retrieve_data(self, *args, **kwargs):
|
def retrieve_data(self, *args, **kwargs):
|
||||||
raise NotImplementedError('Get me some data!')
|
raise NotImplementedError('Get me some data!')
|
||||||
|
|
||||||
|
|
||||||
|
# noinspection PyAbstractClass
|
||||||
|
class MongoNoBackCreateTask(MongoCreateTask):
|
||||||
|
# Have one parameter to make sure that the MongoTarget created by `super`
|
||||||
|
# doesn't blow up.
|
||||||
|
current_datetime = DateMinuteParameter()
|
||||||
|
|
||||||
|
def __init__(self, live=False, *args, **kwargs):
|
||||||
|
super(MongoNoBackCreateTask, self).__init__(*args, **kwargs)
|
||||||
|
self.live = live
|
||||||
|
child_name = type(self).__name__
|
||||||
|
if not live:
|
||||||
|
logging.warning('Trying to create {child_name} without running'
|
||||||
|
' live, errors potentially to ensue.'.format(child_name))
|
||||||
|
|
||||||
|
def output(self):
|
||||||
|
if self.live:
|
||||||
|
return super(MongoNoBackCreateTask, self).output()
|
||||||
|
else:
|
||||||
|
return NoOpTarget()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
# It only makes sense to run these tasks live: they can only retrieve
|
||||||
|
# data in the moment, and can not go back to back-fill data. This is
|
||||||
|
# very unfortunate, but there is plenty of valuable to be had that we
|
||||||
|
# wish to persist for the future.
|
||||||
|
if self.live:
|
||||||
|
return super(MongoNoBackCreateTask, self).run()
|
||||||
|
37
metrik/tasks/nasdaq.py
Normal file
37
metrik/tasks/nasdaq.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import requests
|
||||||
|
import pandas as pd
|
||||||
|
from six import StringIO
|
||||||
|
|
||||||
|
from metrik.tasks.base import MongoNoBackCreateTask
|
||||||
|
|
||||||
|
|
||||||
|
class NasdaqCompanyList(MongoNoBackCreateTask):
|
||||||
|
def get_collection_name(self):
|
||||||
|
return 'nasdaq_company_list'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def retrieve_data(*args, **kwargs):
|
||||||
|
# Explicitly use requests to make mocking easy
|
||||||
|
csv_bytes = requests.get('http://www.nasdaq.com/screening/'
|
||||||
|
'companies-by-region.aspx?&render=download') \
|
||||||
|
.content
|
||||||
|
csv_filelike = StringIO(csv_bytes)
|
||||||
|
company_csv = pd.read_csv(csv_filelike)[
|
||||||
|
['Symbol', 'Name', 'LastSale', 'MarketCap', 'Country', 'IPOyear',
|
||||||
|
'Sector', 'Industry']
|
||||||
|
]
|
||||||
|
return {'companies': company_csv.to_dict(orient='records')}
|
||||||
|
|
||||||
|
|
||||||
|
class NasdaqETFList(MongoNoBackCreateTask):
|
||||||
|
def get_collection_name(self):
|
||||||
|
return 'nasdaq_etf_list'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def retrieve_data(*args, **kwargs):
|
||||||
|
csv_bytes = requests.get('http://www.nasdaq.com/investing/etfs/'
|
||||||
|
'etf-finder-results.aspx?download=Yes') \
|
||||||
|
.content
|
||||||
|
csv_filelike = StringIO(csv_bytes)
|
||||||
|
etf_csv = pd.read_csv(csv_filelike)[['Symbol', 'Name', 'LastSale']]
|
||||||
|
return {'etfs': etf_csv.to_dict(orient='records')}
|
@ -2,7 +2,7 @@ from unittest import TestCase
|
|||||||
from pymongo import MongoClient
|
from pymongo import MongoClient
|
||||||
|
|
||||||
from metrik.conf import MONGO_DATABASE, MONGO_PORT, MONGO_HOST
|
from metrik.conf import MONGO_DATABASE, MONGO_PORT, MONGO_HOST
|
||||||
from metrik.targets.mongo_target import MongoTarget
|
from metrik.targets.mongo import MongoTarget
|
||||||
|
|
||||||
|
|
||||||
class MongoTest(TestCase):
|
class MongoTest(TestCase):
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from pymongo import MongoClient
|
from pymongo import MongoClient
|
||||||
from random import randint
|
from random import randint
|
||||||
|
|
||||||
from metrik.targets.mongo_target import MongoTarget
|
from metrik.targets.mongo import MongoTarget
|
||||||
from metrik.conf import MONGO_DATABASE, MONGO_HOST, MONGO_PORT
|
from metrik.conf import MONGO_DATABASE, MONGO_HOST, MONGO_PORT
|
||||||
from test.mongo_test import MongoTest
|
from test.mongo_test import MongoTest
|
||||||
|
|
||||||
|
9
test/targets/test_noop.py
Normal file
9
test/targets/test_noop.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from metrik.targets.noop import NoOpTarget
|
||||||
|
|
||||||
|
|
||||||
|
class NoOpTest(TestCase):
|
||||||
|
def test_sanity(self):
|
||||||
|
t = NoOpTarget()
|
||||||
|
assert t.exists()
|
17
test/tasks/test_nasdaq.py
Normal file
17
test/tasks/test_nasdaq.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from metrik.tasks.nasdaq import NasdaqCompanyList, NasdaqETFList
|
||||||
|
|
||||||
|
|
||||||
|
class NasdaqTest(TestCase):
|
||||||
|
|
||||||
|
def test_company_list(self):
|
||||||
|
companies = NasdaqCompanyList.retrieve_data()['companies']
|
||||||
|
assert len(companies) > 6000
|
||||||
|
# TODO: Get lists of companies from ETF holdings and verify that they
|
||||||
|
# can be found here as well - this should be a superset
|
||||||
|
|
||||||
|
|
||||||
|
def test_etf_list(self):
|
||||||
|
etfs = NasdaqETFList.retrieve_data()['etfs']
|
||||||
|
assert len(etfs) > 1500
|
Loading…
Reference in New Issue
Block a user