From f87653913eee5edcd5ecab79177f7137093ed0fe Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Mon, 8 Aug 2016 19:19:56 -0400 Subject: [PATCH] Switch to a more robust way of parsing equity info --- metrik/tasks/bloomberg.py | 57 +++++++++++++++++++++++++++++++----- test/tasks/test_bloomberg.py | 19 +++++++++++- 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/metrik/tasks/bloomberg.py b/metrik/tasks/bloomberg.py index 1bccb98..d348203 100644 --- a/metrik/tasks/bloomberg.py +++ b/metrik/tasks/bloomberg.py @@ -1,3 +1,6 @@ +from html.parser import HTMLParser + +import requests from luigi import Task, Parameter from pyquery import PyQuery as pq import logging @@ -15,15 +18,55 @@ class BloombergEquityInfo(Task): @staticmethod def retrieve_info(bbg_code, user_agent): + class EquityInfoParser(HTMLParser): + + def __init__(self, keys): + super(EquityInfoParser, self).__init__() + self.keys = keys + self.records = {k: None for k in keys} + self.do_record = {k: False for k in keys} + + def handle_data(self, data): + stripped = data.strip() + # Ignore blank lines + if not stripped: + return + for k, v in self.do_record.items(): + if v: + self.records[k] = stripped + self.do_record[k] = False + + if stripped in self.keys: + self.do_record[stripped] = True + + def get_records(self): + return self.records + url = 'http://www.bloomberg.com/quote/{}'.format( quote_plus(bbg_code)) logging.info('Visiting "{}" with agent "{}'.format(url, user_agent)) + html = requests.get(url, headers={'User-Agent': user_agent}).text + + keys = ['Sector', 'Industry', 'Sub-Industry'] + eq_info = EquityInfoParser(keys) + eq_info.feed(html) + records = eq_info.get_records() + + return [records[k] for k in keys] + + +class BloombergFXPrice(Task): + bbg_code = Parameter() + user_agent = Parameter() + + @staticmethod + def retrieve_price(bbg_code, user_agent): + url = 'http://www.bloomberg.com/quote/{}'.format( + quote_plus(bbg_code) + ) + logging.info('Visiting "{}" with agent "{}"'.format(url, user_agent)) html = pq(url, {'User-Agent': user_agent}) - sector, industry, sub_industry = ( - html("div.cell:nth-child(13) > div:nth-child(2)").text(), - html("div.cell:nth-child(14) > div:nth-child(2)").text(), - html("div.cell:nth-child(15) > div:nth-child(2)").text() - ) - - return sector, industry, sub_industry + price = float(html('.price').text()) + logging.info('Found FX price {}: {}'.format(bbg_code, price)) + return price diff --git a/test/tasks/test_bloomberg.py b/test/tasks/test_bloomberg.py index aab605a..30a3b22 100644 --- a/test/tasks/test_bloomberg.py +++ b/test/tasks/test_bloomberg.py @@ -2,6 +2,7 @@ from unittest import TestCase from metrik.conf import USER_AGENT from metrik.tasks.bloomberg import BloombergEquityInfo +from metrik.tasks.bloomberg import BloombergFXPrice class BloombergTest(TestCase): @@ -19,4 +20,20 @@ class BloombergTest(TestCase): assert sector == 'Financials' assert industry == 'Institutional Financial Svcs' - assert sub_industry == 'Institutional Brokerage' \ No newline at end of file + assert sub_industry == 'Institutional Brokerage' + + def test_fx_triangle_euj(self): + eur_usd = BloombergFXPrice.retrieve_price('EURUSD:CUR', USER_AGENT) + usd_jpy = BloombergFXPrice.retrieve_price('USDJPY:CUR', USER_AGENT) + eur_jpy = BloombergFXPrice.retrieve_price('EURJPY:CUR', USER_AGENT) + + diff = abs(eur_usd * usd_jpy - eur_jpy) + assert diff < .05 + + def test_fx_triangle_ghc(self): + gbp_hkd = BloombergFXPrice.retrieve_price('GBPHKD:CUR', USER_AGENT) + hkd_inr = BloombergFXPrice.retrieve_price('HKDCAD:CUR', USER_AGENT) + gbp_inr = BloombergFXPrice.retrieve_price('GBPCAD:CUR', USER_AGENT) + + diff = abs(gbp_hkd * hkd_inr - gbp_inr) + assert diff < .05 \ No newline at end of file