mirror of
https://github.com/bspeice/metrik
synced 2024-12-25 06:58:12 -05:00
Switch to a more robust way of parsing equity info
This commit is contained in:
parent
28c91aa34e
commit
f87653913e
@ -1,3 +1,6 @@
|
||||
from html.parser import HTMLParser
|
||||
|
||||
import requests
|
||||
from luigi import Task, Parameter
|
||||
from pyquery import PyQuery as pq
|
||||
import logging
|
||||
@ -15,15 +18,55 @@ class BloombergEquityInfo(Task):
|
||||
|
||||
@staticmethod
|
||||
def retrieve_info(bbg_code, user_agent):
|
||||
class EquityInfoParser(HTMLParser):
|
||||
|
||||
def __init__(self, keys):
|
||||
super(EquityInfoParser, self).__init__()
|
||||
self.keys = keys
|
||||
self.records = {k: None for k in keys}
|
||||
self.do_record = {k: False for k in keys}
|
||||
|
||||
def handle_data(self, data):
|
||||
stripped = data.strip()
|
||||
# Ignore blank lines
|
||||
if not stripped:
|
||||
return
|
||||
for k, v in self.do_record.items():
|
||||
if v:
|
||||
self.records[k] = stripped
|
||||
self.do_record[k] = False
|
||||
|
||||
if stripped in self.keys:
|
||||
self.do_record[stripped] = True
|
||||
|
||||
def get_records(self):
|
||||
return self.records
|
||||
|
||||
url = 'http://www.bloomberg.com/quote/{}'.format(
|
||||
quote_plus(bbg_code))
|
||||
logging.info('Visiting "{}" with agent "{}'.format(url, user_agent))
|
||||
html = requests.get(url, headers={'User-Agent': user_agent}).text
|
||||
|
||||
keys = ['Sector', 'Industry', 'Sub-Industry']
|
||||
eq_info = EquityInfoParser(keys)
|
||||
eq_info.feed(html)
|
||||
records = eq_info.get_records()
|
||||
|
||||
return [records[k] for k in keys]
|
||||
|
||||
|
||||
class BloombergFXPrice(Task):
|
||||
bbg_code = Parameter()
|
||||
user_agent = Parameter()
|
||||
|
||||
@staticmethod
|
||||
def retrieve_price(bbg_code, user_agent):
|
||||
url = 'http://www.bloomberg.com/quote/{}'.format(
|
||||
quote_plus(bbg_code)
|
||||
)
|
||||
logging.info('Visiting "{}" with agent "{}"'.format(url, user_agent))
|
||||
html = pq(url, {'User-Agent': user_agent})
|
||||
|
||||
sector, industry, sub_industry = (
|
||||
html("div.cell:nth-child(13) > div:nth-child(2)").text(),
|
||||
html("div.cell:nth-child(14) > div:nth-child(2)").text(),
|
||||
html("div.cell:nth-child(15) > div:nth-child(2)").text()
|
||||
)
|
||||
|
||||
return sector, industry, sub_industry
|
||||
price = float(html('.price').text())
|
||||
logging.info('Found FX price {}: {}'.format(bbg_code, price))
|
||||
return price
|
||||
|
@ -2,6 +2,7 @@ from unittest import TestCase
|
||||
|
||||
from metrik.conf import USER_AGENT
|
||||
from metrik.tasks.bloomberg import BloombergEquityInfo
|
||||
from metrik.tasks.bloomberg import BloombergFXPrice
|
||||
|
||||
|
||||
class BloombergTest(TestCase):
|
||||
@ -19,4 +20,20 @@ class BloombergTest(TestCase):
|
||||
|
||||
assert sector == 'Financials'
|
||||
assert industry == 'Institutional Financial Svcs'
|
||||
assert sub_industry == 'Institutional Brokerage'
|
||||
assert sub_industry == 'Institutional Brokerage'
|
||||
|
||||
def test_fx_triangle_euj(self):
|
||||
eur_usd = BloombergFXPrice.retrieve_price('EURUSD:CUR', USER_AGENT)
|
||||
usd_jpy = BloombergFXPrice.retrieve_price('USDJPY:CUR', USER_AGENT)
|
||||
eur_jpy = BloombergFXPrice.retrieve_price('EURJPY:CUR', USER_AGENT)
|
||||
|
||||
diff = abs(eur_usd * usd_jpy - eur_jpy)
|
||||
assert diff < .05
|
||||
|
||||
def test_fx_triangle_ghc(self):
|
||||
gbp_hkd = BloombergFXPrice.retrieve_price('GBPHKD:CUR', USER_AGENT)
|
||||
hkd_inr = BloombergFXPrice.retrieve_price('HKDCAD:CUR', USER_AGENT)
|
||||
gbp_inr = BloombergFXPrice.retrieve_price('GBPCAD:CUR', USER_AGENT)
|
||||
|
||||
diff = abs(gbp_hkd * hkd_inr - gbp_inr)
|
||||
assert diff < .05
|
Loading…
Reference in New Issue
Block a user