mirror of
https://github.com/bspeice/metrik
synced 2024-11-23 07:38:09 -05:00
Add NASDAQ Company and ETF lists
This commit is contained in:
parent
c42f5a6980
commit
88c4401960
6
metrik/targets/noop.py
Normal file
6
metrik/targets/noop.py
Normal file
@ -0,0 +1,6 @@
|
||||
from luigi.target import Target
|
||||
|
||||
|
||||
class NoOpTarget(Target):
|
||||
def exists(self):
|
||||
return True
|
@ -1,7 +1,12 @@
|
||||
from __future__ import print_function
|
||||
from luigi import Task
|
||||
|
||||
from metrik.targets.mongo_target import MongoTarget
|
||||
import logging
|
||||
|
||||
from luigi import Task
|
||||
from luigi.parameter import DateMinuteParameter
|
||||
|
||||
from metrik.targets.mongo import MongoTarget
|
||||
from metrik.targets.noop import NoOpTarget
|
||||
|
||||
|
||||
class MongoCreateTask(Task):
|
||||
@ -27,3 +32,31 @@ class MongoCreateTask(Task):
|
||||
def retrieve_data(self, *args, **kwargs):
|
||||
raise NotImplementedError('Get me some data!')
|
||||
|
||||
|
||||
# noinspection PyAbstractClass
|
||||
class MongoNoBackCreateTask(MongoCreateTask):
|
||||
# Have one parameter to make sure that the MongoTarget created by `super`
|
||||
# doesn't blow up.
|
||||
current_datetime = DateMinuteParameter()
|
||||
|
||||
def __init__(self, live=False, *args, **kwargs):
|
||||
super(MongoNoBackCreateTask, self).__init__(*args, **kwargs)
|
||||
self.live = live
|
||||
child_name = type(self).__name__
|
||||
if not live:
|
||||
logging.warning('Trying to create {child_name} without running'
|
||||
' live, errors potentially to ensue.'.format(child_name))
|
||||
|
||||
def output(self):
|
||||
if self.live:
|
||||
return super(MongoNoBackCreateTask, self).output()
|
||||
else:
|
||||
return NoOpTarget()
|
||||
|
||||
def run(self):
|
||||
# It only makes sense to run these tasks live: they can only retrieve
|
||||
# data in the moment, and can not go back to back-fill data. This is
|
||||
# very unfortunate, but there is plenty of valuable to be had that we
|
||||
# wish to persist for the future.
|
||||
if self.live:
|
||||
return super(MongoNoBackCreateTask, self).run()
|
||||
|
37
metrik/tasks/nasdaq.py
Normal file
37
metrik/tasks/nasdaq.py
Normal file
@ -0,0 +1,37 @@
|
||||
import requests
|
||||
import pandas as pd
|
||||
from six import StringIO
|
||||
|
||||
from metrik.tasks.base import MongoNoBackCreateTask
|
||||
|
||||
|
||||
class NasdaqCompanyList(MongoNoBackCreateTask):
|
||||
def get_collection_name(self):
|
||||
return 'nasdaq_company_list'
|
||||
|
||||
@staticmethod
|
||||
def retrieve_data(*args, **kwargs):
|
||||
# Explicitly use requests to make mocking easy
|
||||
csv_bytes = requests.get('http://www.nasdaq.com/screening/'
|
||||
'companies-by-region.aspx?&render=download') \
|
||||
.content
|
||||
csv_filelike = StringIO(csv_bytes)
|
||||
company_csv = pd.read_csv(csv_filelike)[
|
||||
['Symbol', 'Name', 'LastSale', 'MarketCap', 'Country', 'IPOyear',
|
||||
'Sector', 'Industry']
|
||||
]
|
||||
return {'companies': company_csv.to_dict(orient='records')}
|
||||
|
||||
|
||||
class NasdaqETFList(MongoNoBackCreateTask):
|
||||
def get_collection_name(self):
|
||||
return 'nasdaq_etf_list'
|
||||
|
||||
@staticmethod
|
||||
def retrieve_data(*args, **kwargs):
|
||||
csv_bytes = requests.get('http://www.nasdaq.com/investing/etfs/'
|
||||
'etf-finder-results.aspx?download=Yes') \
|
||||
.content
|
||||
csv_filelike = StringIO(csv_bytes)
|
||||
etf_csv = pd.read_csv(csv_filelike)[['Symbol', 'Name', 'LastSale']]
|
||||
return {'etfs': etf_csv.to_dict(orient='records')}
|
@ -2,7 +2,7 @@ from unittest import TestCase
|
||||
from pymongo import MongoClient
|
||||
|
||||
from metrik.conf import MONGO_DATABASE, MONGO_PORT, MONGO_HOST
|
||||
from metrik.targets.mongo_target import MongoTarget
|
||||
from metrik.targets.mongo import MongoTarget
|
||||
|
||||
|
||||
class MongoTest(TestCase):
|
||||
|
@ -1,7 +1,7 @@
|
||||
from pymongo import MongoClient
|
||||
from random import randint
|
||||
|
||||
from metrik.targets.mongo_target import MongoTarget
|
||||
from metrik.targets.mongo import MongoTarget
|
||||
from metrik.conf import MONGO_DATABASE, MONGO_HOST, MONGO_PORT
|
||||
from test.mongo_test import MongoTest
|
||||
|
||||
|
9
test/targets/test_noop.py
Normal file
9
test/targets/test_noop.py
Normal file
@ -0,0 +1,9 @@
|
||||
from unittest import TestCase
|
||||
|
||||
from metrik.targets.noop import NoOpTarget
|
||||
|
||||
|
||||
class NoOpTest(TestCase):
|
||||
def test_sanity(self):
|
||||
t = NoOpTarget()
|
||||
assert t.exists()
|
17
test/tasks/test_nasdaq.py
Normal file
17
test/tasks/test_nasdaq.py
Normal file
@ -0,0 +1,17 @@
|
||||
from unittest import TestCase
|
||||
|
||||
from metrik.tasks.nasdaq import NasdaqCompanyList, NasdaqETFList
|
||||
|
||||
|
||||
class NasdaqTest(TestCase):
|
||||
|
||||
def test_company_list(self):
|
||||
companies = NasdaqCompanyList.retrieve_data()['companies']
|
||||
assert len(companies) > 6000
|
||||
# TODO: Get lists of companies from ETF holdings and verify that they
|
||||
# can be found here as well - this should be a superset
|
||||
|
||||
|
||||
def test_etf_list(self):
|
||||
etfs = NasdaqETFList.retrieve_data()['etfs']
|
||||
assert len(etfs) > 1500
|
Loading…
Reference in New Issue
Block a user