mirror of
				https://github.com/bspeice/metrik
				synced 2025-11-03 18:00:51 -05:00 
			
		
		
		
	Switch to a more robust way of parsing equity info
This commit is contained in:
		@ -1,3 +1,6 @@
 | 
				
			|||||||
 | 
					from html.parser import HTMLParser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
from luigi import Task, Parameter
 | 
					from luigi import Task, Parameter
 | 
				
			||||||
from pyquery import PyQuery as pq
 | 
					from pyquery import PyQuery as pq
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
@ -15,15 +18,55 @@ class BloombergEquityInfo(Task):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    def retrieve_info(bbg_code, user_agent):
 | 
					    def retrieve_info(bbg_code, user_agent):
 | 
				
			||||||
 | 
					        class EquityInfoParser(HTMLParser):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            def __init__(self, keys):
 | 
				
			||||||
 | 
					                super(EquityInfoParser, self).__init__()
 | 
				
			||||||
 | 
					                self.keys = keys
 | 
				
			||||||
 | 
					                self.records = {k: None for k in keys}
 | 
				
			||||||
 | 
					                self.do_record = {k: False for k in keys}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            def handle_data(self, data):
 | 
				
			||||||
 | 
					                stripped = data.strip()
 | 
				
			||||||
 | 
					                # Ignore blank lines
 | 
				
			||||||
 | 
					                if not stripped:
 | 
				
			||||||
 | 
					                    return
 | 
				
			||||||
 | 
					                for k, v in self.do_record.items():
 | 
				
			||||||
 | 
					                    if v:
 | 
				
			||||||
 | 
					                        self.records[k] = stripped
 | 
				
			||||||
 | 
					                        self.do_record[k] = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if stripped in self.keys:
 | 
				
			||||||
 | 
					                    self.do_record[stripped] = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            def get_records(self):
 | 
				
			||||||
 | 
					                return self.records
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        url = 'http://www.bloomberg.com/quote/{}'.format(
 | 
					        url = 'http://www.bloomberg.com/quote/{}'.format(
 | 
				
			||||||
            quote_plus(bbg_code))
 | 
					            quote_plus(bbg_code))
 | 
				
			||||||
        logging.info('Visiting "{}" with agent "{}'.format(url, user_agent))
 | 
					        logging.info('Visiting "{}" with agent "{}'.format(url, user_agent))
 | 
				
			||||||
 | 
					        html = requests.get(url, headers={'User-Agent': user_agent}).text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        keys = ['Sector', 'Industry', 'Sub-Industry']
 | 
				
			||||||
 | 
					        eq_info = EquityInfoParser(keys)
 | 
				
			||||||
 | 
					        eq_info.feed(html)
 | 
				
			||||||
 | 
					        records = eq_info.get_records()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return [records[k] for k in keys]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class BloombergFXPrice(Task):
 | 
				
			||||||
 | 
					    bbg_code = Parameter()
 | 
				
			||||||
 | 
					    user_agent = Parameter()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @staticmethod
 | 
				
			||||||
 | 
					    def retrieve_price(bbg_code, user_agent):
 | 
				
			||||||
 | 
					        url = 'http://www.bloomberg.com/quote/{}'.format(
 | 
				
			||||||
 | 
					            quote_plus(bbg_code)
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        logging.info('Visiting "{}" with agent "{}"'.format(url, user_agent))
 | 
				
			||||||
        html = pq(url, {'User-Agent': user_agent})
 | 
					        html = pq(url, {'User-Agent': user_agent})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        sector, industry, sub_industry = (
 | 
					        price = float(html('.price').text())
 | 
				
			||||||
            html("div.cell:nth-child(13) > div:nth-child(2)").text(),
 | 
					        logging.info('Found FX price {}: {}'.format(bbg_code, price))
 | 
				
			||||||
            html("div.cell:nth-child(14) > div:nth-child(2)").text(),
 | 
					        return price
 | 
				
			||||||
            html("div.cell:nth-child(15) > div:nth-child(2)").text()
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        return sector, industry, sub_industry
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -2,6 +2,7 @@ from unittest import TestCase
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from metrik.conf import USER_AGENT
 | 
					from metrik.conf import USER_AGENT
 | 
				
			||||||
from metrik.tasks.bloomberg import BloombergEquityInfo
 | 
					from metrik.tasks.bloomberg import BloombergEquityInfo
 | 
				
			||||||
 | 
					from metrik.tasks.bloomberg import BloombergFXPrice
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class BloombergTest(TestCase):
 | 
					class BloombergTest(TestCase):
 | 
				
			||||||
@ -19,4 +20,20 @@ class BloombergTest(TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        assert sector == 'Financials'
 | 
					        assert sector == 'Financials'
 | 
				
			||||||
        assert industry == 'Institutional Financial Svcs'
 | 
					        assert industry == 'Institutional Financial Svcs'
 | 
				
			||||||
        assert sub_industry == 'Institutional Brokerage'
 | 
					        assert sub_industry == 'Institutional Brokerage'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_fx_triangle_euj(self):
 | 
				
			||||||
 | 
					        eur_usd = BloombergFXPrice.retrieve_price('EURUSD:CUR', USER_AGENT)
 | 
				
			||||||
 | 
					        usd_jpy = BloombergFXPrice.retrieve_price('USDJPY:CUR', USER_AGENT)
 | 
				
			||||||
 | 
					        eur_jpy = BloombergFXPrice.retrieve_price('EURJPY:CUR', USER_AGENT)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        diff = abs(eur_usd * usd_jpy - eur_jpy)
 | 
				
			||||||
 | 
					        assert diff < .05
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_fx_triangle_ghc(self):
 | 
				
			||||||
 | 
					        gbp_hkd = BloombergFXPrice.retrieve_price('GBPHKD:CUR', USER_AGENT)
 | 
				
			||||||
 | 
					        hkd_inr = BloombergFXPrice.retrieve_price('HKDCAD:CUR', USER_AGENT)
 | 
				
			||||||
 | 
					        gbp_inr = BloombergFXPrice.retrieve_price('GBPCAD:CUR', USER_AGENT)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        diff = abs(gbp_hkd * hkd_inr - gbp_inr)
 | 
				
			||||||
 | 
					        assert diff < .05
 | 
				
			||||||
		Reference in New Issue
	
	Block a user