commit 5f267009fbb096d2713f467b88a097ae5cd15725 Author: Bradlee Speice Date: Sun Aug 7 16:25:04 2016 -0400 Initial commit for metrik diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..50156d5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.idea/workspace.xml +.eggs/ +Metrik.egg-info/ +*.pyc diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..96cc43e --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml new file mode 100644 index 0000000..e7bedf3 --- /dev/null +++ b/.idea/copyright/profiles_settings.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..3c04f0e --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..1e89793 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..1b71f81 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,10 @@ +language: python +python: + - "2.6" + - "2.7" + - "3.2" + - "3.3" + - "3.4" + - "3.5" + +script: python setup.py test \ No newline at end of file diff --git a/metrik.iml b/metrik.iml new file mode 100644 index 0000000..ad3c0a3 --- /dev/null +++ b/metrik.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/metrik/__init__.py b/metrik/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/metrik/conf.py b/metrik/conf.py new file mode 100644 index 0000000..8f0583a --- /dev/null +++ b/metrik/conf.py @@ -0,0 +1 @@ +USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0" \ No newline at end of file diff --git a/metrik/targets/__init__.py b/metrik/targets/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/metrik/targets/pickle_target.py b/metrik/targets/pickle_target.py new file mode 100644 index 0000000..55eb57a --- /dev/null +++ b/metrik/targets/pickle_target.py @@ -0,0 +1,24 @@ +from luigi.target import Target +from pickle import dump, dumps, load, loads +from os.path import exists, join +from os import unlink +from tempfile import tempdir +import base64 + + +class PickleTarget(Target): + def __init__(self, name): + self.name = name + + def full_path(self): + return join(tempdir, self.name) + + def exists(self): + return exists(self.full_path()) + + def write(self, object): + with open(self.full_path(), 'w+b') as handle: + dump(object, handle) + + def read(self): + return load(open(self.full_path(), 'rb')) diff --git a/metrik/tasks/__init__.py b/metrik/tasks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/metrik/tasks/bloomberg.py b/metrik/tasks/bloomberg.py new file mode 100644 index 0000000..8c644df --- /dev/null +++ b/metrik/tasks/bloomberg.py @@ -0,0 +1,21 @@ +from luigi import Task, Parameter +from pyquery import PyQuery as pq +import requests + + +class BloombergEquityInfo(Task): + bbg_code = Parameter() + user_agent = Parameter() + + @staticmethod + def retrieve_info(bbg_code, user_agent): + html = pq('http://www.bloomberg.com/quote/{}'.format(bbg_code), + {'User-Agent': user_agent}) + + sector, industry, sub_industry = ( + html("div.cell:nth-child(13) > div:nth-child(2)").text(), + html("div.cell:nth-child(14) > div:nth-child(2)").text(), + html("div.cell:nth-child(15) > div:nth-child(2)").text() + ) + + return sector, industry, sub_industry diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..9af7e6f --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[aliases] +test=pytest \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..af1e7cb --- /dev/null +++ b/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup, find_packages + +setup( + name='Metrik', + description='Data aggregation framework for Python', + version='0.1.0', + author='Bradlee Speice', + author_email='bradlee.speice@gmail.com', + packages=find_packages(), + install_requires=[ + 'pyquery >= 1.2.13', + 'luigi >= 2.2.0' + ], + setup_requires=[ + 'pytest_runner' + ], + tests_require=[ + 'pytest' + ] +) \ No newline at end of file diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/targets/pickle_target.py b/test/targets/pickle_target.py new file mode 100644 index 0000000..537510d --- /dev/null +++ b/test/targets/pickle_target.py @@ -0,0 +1,40 @@ +from unittest import TestCase +import luigi +from metrik.targets.pickle_target import PickleTarget + + +class FibTask(luigi.Task): + s = luigi.IntParameter() + + def requires(self): + if self.s >= 2: + return [FibTask(self.s - 1), FibTask(self.s - 2)] + else: + return [] + + def output(self): + return PickleTarget(self.task_id) + + def run(self): + if self.s <= 1: + val = self.s + else: + count = 0 + for input in self.input(): + count += input.read() + val = count + + self.output().write(val) + + +class TestPickleTarget(TestCase): + def test_fibonacci(self): + f = FibTask(6) + luigi.build([f], local_scheduler=True) + + ret = f.output().read() + assert ret == 8 + + f = FibTask(100) + luigi.build([f], local_scheduler=True) + assert f.output().read() == 354224848179261915075 diff --git a/test/tasks/__init__.py b/test/tasks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/tasks/test_bloomberg.py b/test/tasks/test_bloomberg.py new file mode 100644 index 0000000..aab605a --- /dev/null +++ b/test/tasks/test_bloomberg.py @@ -0,0 +1,22 @@ +from unittest import TestCase + +from metrik.conf import USER_AGENT +from metrik.tasks.bloomberg import BloombergEquityInfo + + +class BloombergTest(TestCase): + def test_correct_info_apple(self): + sector, industry, sub_industry = \ + BloombergEquityInfo.retrieve_info("AAPL:US", USER_AGENT) + + assert sector == 'Technology' + assert industry == 'Hardware' + assert sub_industry == 'Communications Equipment' + + def test_correct_info_kcg(self): + sector, industry, sub_industry = \ + BloombergEquityInfo.retrieve_info("KCG:US", USER_AGENT) + + assert sector == 'Financials' + assert industry == 'Institutional Financial Svcs' + assert sub_industry == 'Institutional Brokerage' \ No newline at end of file