Switch to a more robust way of parsing equity info

master
Bradlee Speice 2016-08-08 19:19:56 -04:00
parent 28c91aa34e
commit f87653913e
2 changed files with 68 additions and 8 deletions

View File

@ -1,3 +1,6 @@
from html.parser import HTMLParser
import requests
from luigi import Task, Parameter
from pyquery import PyQuery as pq
import logging
@ -15,15 +18,55 @@ class BloombergEquityInfo(Task):
@staticmethod
def retrieve_info(bbg_code, user_agent):
class EquityInfoParser(HTMLParser):
def __init__(self, keys):
super(EquityInfoParser, self).__init__()
self.keys = keys
self.records = {k: None for k in keys}
self.do_record = {k: False for k in keys}
def handle_data(self, data):
stripped = data.strip()
# Ignore blank lines
if not stripped:
return
for k, v in self.do_record.items():
if v:
self.records[k] = stripped
self.do_record[k] = False
if stripped in self.keys:
self.do_record[stripped] = True
def get_records(self):
return self.records
url = 'http://www.bloomberg.com/quote/{}'.format(
quote_plus(bbg_code))
logging.info('Visiting "{}" with agent "{}'.format(url, user_agent))
html = requests.get(url, headers={'User-Agent': user_agent}).text
keys = ['Sector', 'Industry', 'Sub-Industry']
eq_info = EquityInfoParser(keys)
eq_info.feed(html)
records = eq_info.get_records()
return [records[k] for k in keys]
class BloombergFXPrice(Task):
bbg_code = Parameter()
user_agent = Parameter()
@staticmethod
def retrieve_price(bbg_code, user_agent):
url = 'http://www.bloomberg.com/quote/{}'.format(
quote_plus(bbg_code)
)
logging.info('Visiting "{}" with agent "{}"'.format(url, user_agent))
html = pq(url, {'User-Agent': user_agent})
sector, industry, sub_industry = (
html("div.cell:nth-child(13) > div:nth-child(2)").text(),
html("div.cell:nth-child(14) > div:nth-child(2)").text(),
html("div.cell:nth-child(15) > div:nth-child(2)").text()
)
return sector, industry, sub_industry
price = float(html('.price').text())
logging.info('Found FX price {}: {}'.format(bbg_code, price))
return price

View File

@ -2,6 +2,7 @@ from unittest import TestCase
from metrik.conf import USER_AGENT
from metrik.tasks.bloomberg import BloombergEquityInfo
from metrik.tasks.bloomberg import BloombergFXPrice
class BloombergTest(TestCase):
@ -19,4 +20,20 @@ class BloombergTest(TestCase):
assert sector == 'Financials'
assert industry == 'Institutional Financial Svcs'
assert sub_industry == 'Institutional Brokerage'
assert sub_industry == 'Institutional Brokerage'
def test_fx_triangle_euj(self):
eur_usd = BloombergFXPrice.retrieve_price('EURUSD:CUR', USER_AGENT)
usd_jpy = BloombergFXPrice.retrieve_price('USDJPY:CUR', USER_AGENT)
eur_jpy = BloombergFXPrice.retrieve_price('EURJPY:CUR', USER_AGENT)
diff = abs(eur_usd * usd_jpy - eur_jpy)
assert diff < .05
def test_fx_triangle_ghc(self):
gbp_hkd = BloombergFXPrice.retrieve_price('GBPHKD:CUR', USER_AGENT)
hkd_inr = BloombergFXPrice.retrieve_price('HKDCAD:CUR', USER_AGENT)
gbp_inr = BloombergFXPrice.retrieve_price('GBPCAD:CUR', USER_AGENT)
diff = abs(gbp_hkd * hkd_inr - gbp_inr)
assert diff < .05