mirror of
https://github.com/bspeice/bspeice.github.io
synced 2025-07-01 13:56:41 -04:00
New blog post on Kaggle competition
This commit is contained in:
@ -28,6 +28,17 @@
|
||||
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<!-- Google Analytics -->
|
||||
<script>
|
||||
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-74711362-1', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script>
|
||||
<!-- /Google Analytics -->
|
||||
|
||||
|
||||
</head>
|
||||
@ -81,7 +92,7 @@
|
||||
<div class="prompt input_prompt">In [1]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">requests</span>
|
||||
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">requests</span>
|
||||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">dateutil</span> <span class="k">import</span> <span class="n">parser</span> <span class="k">as</span> <span class="n">dtparser</span>
|
||||
@ -116,7 +127,7 @@
|
||||
<div class="prompt input_prompt">In [2]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="k">class</span> <span class="nc">ArticleListParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span>
|
||||
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">class</span> <span class="nc">ArticleListParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span>
|
||||
<span class="sd">"""Given a web page with articles on it, parse out the article links"""</span>
|
||||
|
||||
<span class="n">articles</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
@ -129,7 +140,7 @@
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">articles</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">href</span><span class="p">)</span>
|
||||
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s2">"http://seekingalpha.com/author/wall-street-breakfast/articles"</span>
|
||||
<span class="n">article_page_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">base_url</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/{}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">20</span><span class="p">)]</span>
|
||||
<span class="n">article_page_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">base_url</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">20</span><span class="p">)]</span>
|
||||
|
||||
<span class="n">global_articles</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">page</span> <span class="ow">in</span> <span class="n">article_page_urls</span><span class="p">:</span>
|
||||
@ -151,7 +162,7 @@
|
||||
<div class="prompt input_prompt">In [3]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="k">class</span> <span class="nc">ArticleReturnParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span>
|
||||
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">class</span> <span class="nc">ArticleReturnParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span>
|
||||
<span class="s2">"Given an article, parse out the futures returns in it"</span>
|
||||
|
||||
<span class="n">record_font_tags</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
@ -238,7 +249,7 @@
|
||||
<div class="prompt input_prompt">In [4]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="c1"># article_df is sorted by date, so we get the first row.</span>
|
||||
<div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># article_df is sorted by date, so we get the first row.</span>
|
||||
<span class="n">start_date</span> <span class="o">=</span> <span class="n">article_df</span><span class="o">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s1">'date'</span><span class="p">)</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'date'</span><span class="p">]</span> <span class="o">-</span> <span class="n">relativedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">SPY</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"GOOG/NYSE_SPY"</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span>
|
||||
<span class="n">DJIA</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"GOOG/AMS_DIA"</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span>
|
||||
@ -267,7 +278,7 @@
|
||||
<div class="prompt input_prompt">In [5]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">calculate_opening_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span>
|
||||
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">calculate_opening_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span>
|
||||
<span class="c1"># I'm not a huge fan of the appending for loop,</span>
|
||||
<span class="c1"># but it's a bit verbose for a comprehension</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
@ -380,7 +391,7 @@ S&P 0.604478 0.597015 0.811808 0.848708
|
||||
<div class="prompt input_prompt">In [6]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">calculate_closing_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span>
|
||||
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">calculate_closing_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span>
|
||||
<span class="c1"># I'm not a huge fan of the appending for loop,</span>
|
||||
<span class="c1"># but it's a bit verbose for a comprehension</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
@ -501,6 +512,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
<script async src='https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML'></script>
|
||||
|
||||
|
||||
<div class="comments">
|
||||
<div id="disqus_thread"></div>
|
||||
<script type="text/javascript">
|
||||
var disqus_shortname = 'bradleespeice';
|
||||
var disqus_identifier = 'testing-cramer.html';
|
||||
var disqus_url = 'https://bspeice.github.io/testing-cramer.html';
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script>
|
||||
<noscript>Please enable JavaScript to view the comments.</noscript>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<!-- /Content -->
|
||||
|
Reference in New Issue
Block a user