New blog post on Kaggle competition

This commit is contained in:
Bradlee Speice
2016-03-05 11:58:46 -05:00
parent e682eb2a35
commit f60cb5f8ef
31 changed files with 3765 additions and 210 deletions

View File

@ -28,6 +28,17 @@
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<!-- Google Analytics -->
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-74711362-1', 'auto');
ga('send', 'pageview');
</script>
<!-- /Google Analytics -->
</head>
@ -81,7 +92,7 @@
<div class="prompt input_prompt">In&nbsp;[1]:</div>
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">requests</span>
<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">requests</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">dateutil</span> <span class="k">import</span> <span class="n">parser</span> <span class="k">as</span> <span class="n">dtparser</span>
@ -116,7 +127,7 @@
<div class="prompt input_prompt">In&nbsp;[2]:</div>
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span class="k">class</span> <span class="nc">ArticleListParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span>
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">class</span> <span class="nc">ArticleListParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Given a web page with articles on it, parse out the article links&quot;&quot;&quot;</span>
<span class="n">articles</span> <span class="o">=</span> <span class="p">[]</span>
@ -129,7 +140,7 @@
<span class="bp">self</span><span class="o">.</span><span class="n">articles</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">href</span><span class="p">)</span>
<span class="n">base_url</span> <span class="o">=</span> <span class="s2">&quot;http://seekingalpha.com/author/wall-street-breakfast/articles&quot;</span>
<span class="n">article_page_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">base_url</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">base_url</span> <span class="o">+</span> <span class="s1">&#39;/{}&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">20</span><span class="p">)]</span>
<span class="n">article_page_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">base_url</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">base_url</span> <span class="o">+</span> <span class="s1">&#39;/</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">20</span><span class="p">)]</span>
<span class="n">global_articles</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">page</span> <span class="ow">in</span> <span class="n">article_page_urls</span><span class="p">:</span>
@ -151,7 +162,7 @@
<div class="prompt input_prompt">In&nbsp;[3]:</div>
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span class="k">class</span> <span class="nc">ArticleReturnParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span>
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">class</span> <span class="nc">ArticleReturnParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span>
<span class="s2">&quot;Given an article, parse out the futures returns in it&quot;</span>
<span class="n">record_font_tags</span> <span class="o">=</span> <span class="kc">False</span>
@ -238,7 +249,7 @@
<div class="prompt input_prompt">In&nbsp;[4]:</div>
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span class="c1"># article_df is sorted by date, so we get the first row.</span>
<div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># article_df is sorted by date, so we get the first row.</span>
<span class="n">start_date</span> <span class="o">=</span> <span class="n">article_df</span><span class="o">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s1">&#39;date&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">&#39;date&#39;</span><span class="p">]</span> <span class="o">-</span> <span class="n">relativedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="n">SPY</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;GOOG/NYSE_SPY&quot;</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span>
<span class="n">DJIA</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;GOOG/AMS_DIA&quot;</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span>
@ -267,7 +278,7 @@
<div class="prompt input_prompt">In&nbsp;[5]:</div>
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">calculate_opening_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span>
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">calculate_opening_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span>
<span class="c1"># I&#39;m not a huge fan of the appending for loop,</span>
<span class="c1"># but it&#39;s a bit verbose for a comprehension</span>
<span class="n">data</span> <span class="o">=</span> <span class="p">{}</span>
@ -380,7 +391,7 @@ S&amp;P 0.604478 0.597015 0.811808 0.848708
<div class="prompt input_prompt">In&nbsp;[6]:</div>
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">calculate_closing_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span>
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">calculate_closing_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span>
<span class="c1"># I&#39;m not a huge fan of the appending for loop,</span>
<span class="c1"># but it&#39;s a bit verbose for a comprehension</span>
<span class="n">data</span> <span class="o">=</span> <span class="p">{}</span>
@ -501,6 +512,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
<script async src='https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML'></script>
<div class="comments">
<div id="disqus_thread"></div>
<script type="text/javascript">
var disqus_shortname = 'bradleespeice';
var disqus_identifier = 'testing-cramer.html';
var disqus_url = 'https://bspeice.github.io/testing-cramer.html';
(function() {
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
</script>
<noscript>Please enable JavaScript to view the comments.</noscript>
</div>
</div>
<!-- /Content -->