mirror of
https://github.com/bspeice/metrik
synced 2024-11-04 22:48:11 -05:00
Switch to a more robust way of parsing equity info
This commit is contained in:
parent
28c91aa34e
commit
f87653913e
@ -1,3 +1,6 @@
|
|||||||
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
|
import requests
|
||||||
from luigi import Task, Parameter
|
from luigi import Task, Parameter
|
||||||
from pyquery import PyQuery as pq
|
from pyquery import PyQuery as pq
|
||||||
import logging
|
import logging
|
||||||
@ -15,15 +18,55 @@ class BloombergEquityInfo(Task):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def retrieve_info(bbg_code, user_agent):
|
def retrieve_info(bbg_code, user_agent):
|
||||||
|
class EquityInfoParser(HTMLParser):
|
||||||
|
|
||||||
|
def __init__(self, keys):
|
||||||
|
super(EquityInfoParser, self).__init__()
|
||||||
|
self.keys = keys
|
||||||
|
self.records = {k: None for k in keys}
|
||||||
|
self.do_record = {k: False for k in keys}
|
||||||
|
|
||||||
|
def handle_data(self, data):
|
||||||
|
stripped = data.strip()
|
||||||
|
# Ignore blank lines
|
||||||
|
if not stripped:
|
||||||
|
return
|
||||||
|
for k, v in self.do_record.items():
|
||||||
|
if v:
|
||||||
|
self.records[k] = stripped
|
||||||
|
self.do_record[k] = False
|
||||||
|
|
||||||
|
if stripped in self.keys:
|
||||||
|
self.do_record[stripped] = True
|
||||||
|
|
||||||
|
def get_records(self):
|
||||||
|
return self.records
|
||||||
|
|
||||||
url = 'http://www.bloomberg.com/quote/{}'.format(
|
url = 'http://www.bloomberg.com/quote/{}'.format(
|
||||||
quote_plus(bbg_code))
|
quote_plus(bbg_code))
|
||||||
logging.info('Visiting "{}" with agent "{}'.format(url, user_agent))
|
logging.info('Visiting "{}" with agent "{}'.format(url, user_agent))
|
||||||
|
html = requests.get(url, headers={'User-Agent': user_agent}).text
|
||||||
|
|
||||||
|
keys = ['Sector', 'Industry', 'Sub-Industry']
|
||||||
|
eq_info = EquityInfoParser(keys)
|
||||||
|
eq_info.feed(html)
|
||||||
|
records = eq_info.get_records()
|
||||||
|
|
||||||
|
return [records[k] for k in keys]
|
||||||
|
|
||||||
|
|
||||||
|
class BloombergFXPrice(Task):
|
||||||
|
bbg_code = Parameter()
|
||||||
|
user_agent = Parameter()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def retrieve_price(bbg_code, user_agent):
|
||||||
|
url = 'http://www.bloomberg.com/quote/{}'.format(
|
||||||
|
quote_plus(bbg_code)
|
||||||
|
)
|
||||||
|
logging.info('Visiting "{}" with agent "{}"'.format(url, user_agent))
|
||||||
html = pq(url, {'User-Agent': user_agent})
|
html = pq(url, {'User-Agent': user_agent})
|
||||||
|
|
||||||
sector, industry, sub_industry = (
|
price = float(html('.price').text())
|
||||||
html("div.cell:nth-child(13) > div:nth-child(2)").text(),
|
logging.info('Found FX price {}: {}'.format(bbg_code, price))
|
||||||
html("div.cell:nth-child(14) > div:nth-child(2)").text(),
|
return price
|
||||||
html("div.cell:nth-child(15) > div:nth-child(2)").text()
|
|
||||||
)
|
|
||||||
|
|
||||||
return sector, industry, sub_industry
|
|
||||||
|
@ -2,6 +2,7 @@ from unittest import TestCase
|
|||||||
|
|
||||||
from metrik.conf import USER_AGENT
|
from metrik.conf import USER_AGENT
|
||||||
from metrik.tasks.bloomberg import BloombergEquityInfo
|
from metrik.tasks.bloomberg import BloombergEquityInfo
|
||||||
|
from metrik.tasks.bloomberg import BloombergFXPrice
|
||||||
|
|
||||||
|
|
||||||
class BloombergTest(TestCase):
|
class BloombergTest(TestCase):
|
||||||
@ -20,3 +21,19 @@ class BloombergTest(TestCase):
|
|||||||
assert sector == 'Financials'
|
assert sector == 'Financials'
|
||||||
assert industry == 'Institutional Financial Svcs'
|
assert industry == 'Institutional Financial Svcs'
|
||||||
assert sub_industry == 'Institutional Brokerage'
|
assert sub_industry == 'Institutional Brokerage'
|
||||||
|
|
||||||
|
def test_fx_triangle_euj(self):
|
||||||
|
eur_usd = BloombergFXPrice.retrieve_price('EURUSD:CUR', USER_AGENT)
|
||||||
|
usd_jpy = BloombergFXPrice.retrieve_price('USDJPY:CUR', USER_AGENT)
|
||||||
|
eur_jpy = BloombergFXPrice.retrieve_price('EURJPY:CUR', USER_AGENT)
|
||||||
|
|
||||||
|
diff = abs(eur_usd * usd_jpy - eur_jpy)
|
||||||
|
assert diff < .05
|
||||||
|
|
||||||
|
def test_fx_triangle_ghc(self):
|
||||||
|
gbp_hkd = BloombergFXPrice.retrieve_price('GBPHKD:CUR', USER_AGENT)
|
||||||
|
hkd_inr = BloombergFXPrice.retrieve_price('HKDCAD:CUR', USER_AGENT)
|
||||||
|
gbp_inr = BloombergFXPrice.retrieve_price('GBPCAD:CUR', USER_AGENT)
|
||||||
|
|
||||||
|
diff = abs(gbp_hkd * hkd_inr - gbp_inr)
|
||||||
|
assert diff < .05
|
Loading…
Reference in New Issue
Block a user