1
0
mirror of https://github.com/bspeice/metrik synced 2024-11-23 15:48:10 -05:00

Switch to a more robust way of parsing equity info

This commit is contained in:
Bradlee Speice 2016-08-08 19:19:56 -04:00
parent 28c91aa34e
commit f87653913e
2 changed files with 68 additions and 8 deletions

View File

@ -1,3 +1,6 @@
from html.parser import HTMLParser
import requests
from luigi import Task, Parameter from luigi import Task, Parameter
from pyquery import PyQuery as pq from pyquery import PyQuery as pq
import logging import logging
@ -15,15 +18,55 @@ class BloombergEquityInfo(Task):
@staticmethod @staticmethod
def retrieve_info(bbg_code, user_agent): def retrieve_info(bbg_code, user_agent):
class EquityInfoParser(HTMLParser):
def __init__(self, keys):
super(EquityInfoParser, self).__init__()
self.keys = keys
self.records = {k: None for k in keys}
self.do_record = {k: False for k in keys}
def handle_data(self, data):
stripped = data.strip()
# Ignore blank lines
if not stripped:
return
for k, v in self.do_record.items():
if v:
self.records[k] = stripped
self.do_record[k] = False
if stripped in self.keys:
self.do_record[stripped] = True
def get_records(self):
return self.records
url = 'http://www.bloomberg.com/quote/{}'.format( url = 'http://www.bloomberg.com/quote/{}'.format(
quote_plus(bbg_code)) quote_plus(bbg_code))
logging.info('Visiting "{}" with agent "{}'.format(url, user_agent)) logging.info('Visiting "{}" with agent "{}'.format(url, user_agent))
html = requests.get(url, headers={'User-Agent': user_agent}).text
keys = ['Sector', 'Industry', 'Sub-Industry']
eq_info = EquityInfoParser(keys)
eq_info.feed(html)
records = eq_info.get_records()
return [records[k] for k in keys]
class BloombergFXPrice(Task):
bbg_code = Parameter()
user_agent = Parameter()
@staticmethod
def retrieve_price(bbg_code, user_agent):
url = 'http://www.bloomberg.com/quote/{}'.format(
quote_plus(bbg_code)
)
logging.info('Visiting "{}" with agent "{}"'.format(url, user_agent))
html = pq(url, {'User-Agent': user_agent}) html = pq(url, {'User-Agent': user_agent})
sector, industry, sub_industry = ( price = float(html('.price').text())
html("div.cell:nth-child(13) > div:nth-child(2)").text(), logging.info('Found FX price {}: {}'.format(bbg_code, price))
html("div.cell:nth-child(14) > div:nth-child(2)").text(), return price
html("div.cell:nth-child(15) > div:nth-child(2)").text()
)
return sector, industry, sub_industry

View File

@ -2,6 +2,7 @@ from unittest import TestCase
from metrik.conf import USER_AGENT from metrik.conf import USER_AGENT
from metrik.tasks.bloomberg import BloombergEquityInfo from metrik.tasks.bloomberg import BloombergEquityInfo
from metrik.tasks.bloomberg import BloombergFXPrice
class BloombergTest(TestCase): class BloombergTest(TestCase):
@ -20,3 +21,19 @@ class BloombergTest(TestCase):
assert sector == 'Financials' assert sector == 'Financials'
assert industry == 'Institutional Financial Svcs' assert industry == 'Institutional Financial Svcs'
assert sub_industry == 'Institutional Brokerage' assert sub_industry == 'Institutional Brokerage'
def test_fx_triangle_euj(self):
eur_usd = BloombergFXPrice.retrieve_price('EURUSD:CUR', USER_AGENT)
usd_jpy = BloombergFXPrice.retrieve_price('USDJPY:CUR', USER_AGENT)
eur_jpy = BloombergFXPrice.retrieve_price('EURJPY:CUR', USER_AGENT)
diff = abs(eur_usd * usd_jpy - eur_jpy)
assert diff < .05
def test_fx_triangle_ghc(self):
gbp_hkd = BloombergFXPrice.retrieve_price('GBPHKD:CUR', USER_AGENT)
hkd_inr = BloombergFXPrice.retrieve_price('HKDCAD:CUR', USER_AGENT)
gbp_inr = BloombergFXPrice.retrieve_price('GBPCAD:CUR', USER_AGENT)
diff = abs(gbp_hkd * hkd_inr - gbp_inr)
assert diff < .05