mirror of
https://github.com/bspeice/bspeice.github.io
synced 2024-12-30 02:08:09 -05:00
Fix notebooks not showing up
This commit is contained in:
parent
3338b5b0ad
commit
fc491198aa
@ -4,20 +4,22 @@
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="description" content="I listen to a lot of Drum and Bass music, because it's beautiful music. And there's a particular site, Bassdrive.com that hosts a lot of great content. Specifically, the archives section of the...">
|
||||
<meta name="description" content="I listen to a lot of Drum and Bass music, because it's beautiful music. And there's a particular site, Bassdrive.com that hosts a lot of great content. Specifically, the archives section of the ...">
|
||||
<meta name="keywords" content="nutone, Rust">
|
||||
<link rel="icon" href="/favicon.ico">
|
||||
<link rel="icon" href="https://bspeice.github.io/favicon.ico">
|
||||
|
||||
<title>A Rustic Re-Podcasting Server (Part 1) - Bradlee Speice</title>
|
||||
|
||||
<!-- Stylesheets -->
|
||||
<link href="/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="/theme/css/pygment.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/pygment.css" rel="stylesheet">
|
||||
<!-- /Stylesheets -->
|
||||
|
||||
<!-- RSS Feeds -->
|
||||
<link href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Full Atom Feed" />
|
||||
<link href="https://bspeice.github.io/feeds/blog.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Categories Atom Feed" />
|
||||
<!-- /RSS Feeds -->
|
||||
|
||||
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
||||
@ -26,6 +28,17 @@
|
||||
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<!-- Google Analytics -->
|
||||
<script>
|
||||
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-74711362-1', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script>
|
||||
<!-- /Google Analytics -->
|
||||
|
||||
|
||||
</head>
|
||||
@ -39,7 +52,7 @@
|
||||
<div class="container">
|
||||
<div class="header-nav">
|
||||
<div class="header-logo">
|
||||
<a class="pull-left" href="/"><img class="mr20" src="/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
<a class="pull-left" href="https://bspeice.github.io/"><img class="mr20" src="https://bspeice.github.io/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
</div>
|
||||
<div class="nav pull-right">
|
||||
</div>
|
||||
@ -54,12 +67,12 @@
|
||||
<div class="col-lg-12">
|
||||
<div class="header-content">
|
||||
<h1 class="header-title">A Rustic Re-Podcasting Server (Part 1)</h1>
|
||||
<p class="header-date"> <a href="/author/bradlee-speice.html">Bradlee Speice</a>, Sat 22 October 2016, <a href="/category/blog.html">Blog</a></p>
|
||||
<p class="header-date"> <a href="https://bspeice.github.io/author/bradlee-speice.html">Bradlee Speice</a>, Sat 22 October 2016, <a href="https://bspeice.github.io/category/blog.html">Blog</a></p>
|
||||
<div class="header-underline"></div>
|
||||
<div class="clearfix"></div>
|
||||
<p class="pull-right header-tags">
|
||||
<span class="glyphicon glyphicon-tags mr5" aria-hidden="true"></span>
|
||||
<a href="/tag/nutone.html">nutone</a>, <a href="/tag/rust.html">Rust</a> </p>
|
||||
<a href="https://bspeice.github.io/tag/nutone.html">nutone</a>, <a href="https://bspeice.github.io/tag/rust.html">Rust</a> </p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -115,15 +128,15 @@ on some bad cases, <code>str <-> bytes</code> specifically), but Rust is h
|
||||
should be incredibly simple: All I want is to echo back
|
||||
<code>Didn't find URL: <url></code>. Shouldn't be that hard right? In Python I'd just do
|
||||
something like:</p>
|
||||
<div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">echo_handler</span><span class="p">(</span><span class="n">request</span><span class="p">):</span>
|
||||
<div class="highlight"><pre><span class="k">def</span> <span class="nf">echo_handler</span><span class="p">(</span><span class="n">request</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="s2">"You're visiting: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">request</span><span class="o">.</span><span class="n">uri</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
|
||||
<p>And we'd call it a day. Rust isn't so simple. Let's start with the trivial
|
||||
examples people post online:</p>
|
||||
<div class="highlight"><pre><span></span><span class="k">fn</span> <span class="nf">hello_world</span><span class="p">(</span><span class="n">req</span>: <span class="kp">&</span><span class="nc">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span>-> <span class="nc">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span>::<span class="n">with</span><span class="p">((</span><span class="n">status</span>::<span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="s">"You found the server!"</span><span class="p">)))</span><span class="w"></span>
|
||||
<div class="highlight"><pre><span class="k">fn</span><span class="w"> </span><span class="n">hello_world</span><span class="p">(</span><span class="n">req</span><span class="o">:</span><span class="w"> </span><span class="o">&</span><span class="k">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span><span class="o">-></span><span class="w"> </span><span class="n">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span><span class="o">::</span><span class="n">with</span><span class="p">((</span><span class="n">status</span><span class="o">::</span><span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="s">"You found the server!"</span><span class="p">)))</span><span class="w"></span>
|
||||
<span class="p">}</span><span class="w"></span>
|
||||
</pre></div>
|
||||
|
||||
@ -132,22 +145,22 @@ examples people post online:</p>
|
||||
version! All we need to do is just send back a string of some form. So, we
|
||||
look up the documentation for <a href="http://ironframework.io/doc/iron/request/struct.Request.html"><code>Request</code></a> and see a <code>url</code> field that will contain
|
||||
what we want. Let's try the first iteration:</p>
|
||||
<div class="highlight"><pre><span></span><span class="k">fn</span> <span class="nf">hello_world</span><span class="p">(</span><span class="n">req</span>: <span class="kp">&</span><span class="nc">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span>-> <span class="nc">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span>::<span class="n">with</span><span class="p">((</span><span class="n">status</span>::<span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="s">"You found the URL: "</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">req</span><span class="p">.</span><span class="n">url</span><span class="p">)))</span><span class="w"></span>
|
||||
<div class="highlight"><pre><span class="k">fn</span><span class="w"> </span><span class="n">hello_world</span><span class="p">(</span><span class="n">req</span><span class="o">:</span><span class="w"> </span><span class="o">&</span><span class="k">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span><span class="o">-></span><span class="w"> </span><span class="n">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span><span class="o">::</span><span class="n">with</span><span class="p">((</span><span class="n">status</span><span class="o">::</span><span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="s">"You found the URL: "</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">req</span><span class="p">.</span><span class="n">url</span><span class="p">)))</span><span class="w"></span>
|
||||
<span class="p">}</span><span class="w"></span>
|
||||
</pre></div>
|
||||
|
||||
|
||||
<p>Which yields the error:</p>
|
||||
<div class="highlight"><pre><span></span>error[E0369]: binary operation `+` cannot be applied to type `&'static str`
|
||||
<div class="highlight"><pre>error[E0369]: binary operation `+` cannot be applied to type `&'static str`
|
||||
</pre></div>
|
||||
|
||||
|
||||
<p>OK, what's going on here? Time to start Googling for <a href="https://www.google.com/#q=concatenate+strings+in+rust">"concatenate strings in Rust"</a>. That's what we
|
||||
want to do right? Concatenate a static string and the URL.</p>
|
||||
<p>After Googling, we come across a helpful <a href="https://doc.rust-lang.org/std/macro.concat!.html"><code>concat!</code></a> macro that looks really nice! Let's try that one:</p>
|
||||
<div class="highlight"><pre><span></span><span class="k">fn</span> <span class="nf">hello_world</span><span class="p">(</span><span class="n">req</span>: <span class="kp">&</span><span class="nc">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span>-> <span class="nc">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span>::<span class="n">with</span><span class="p">((</span><span class="n">status</span>::<span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="n">concat</span><span class="o">!</span><span class="p">(</span><span class="s">"You found the URL: "</span><span class="p">,</span><span class="w"> </span><span class="n">req</span><span class="p">.</span><span class="n">url</span><span class="p">))))</span><span class="w"></span>
|
||||
<div class="highlight"><pre><span class="k">fn</span><span class="w"> </span><span class="n">hello_world</span><span class="p">(</span><span class="n">req</span><span class="o">:</span><span class="w"> </span><span class="o">&</span><span class="k">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span><span class="o">-></span><span class="w"> </span><span class="n">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span><span class="o">::</span><span class="n">with</span><span class="p">((</span><span class="n">status</span><span class="o">::</span><span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="n">concat</span><span class="o">!</span><span class="p">(</span><span class="s">"You found the URL: "</span><span class="p">,</span><span class="w"> </span><span class="n">req</span><span class="p">.</span><span class="n">url</span><span class="p">))))</span><span class="w"></span>
|
||||
<span class="p">}</span><span class="w"></span>
|
||||
</pre></div>
|
||||
|
||||
@ -159,8 +172,8 @@ at compile time what <code>req.url</code> is. Which, in my outsider opinion, is
|
||||
strange. <code>println!</code> and <code>format!</code>, etc., all handle values they don't know at
|
||||
compile time. Why can't <code>concat!</code>? By any means, we need a new plan of attack.
|
||||
How about we try formatting strings?</p>
|
||||
<div class="highlight"><pre><span></span><span class="k">fn</span> <span class="nf">hello_world</span><span class="p">(</span><span class="n">req</span>: <span class="kp">&</span><span class="nc">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span>-> <span class="nc">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span>::<span class="n">with</span><span class="p">((</span><span class="n">status</span>::<span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="n">format</span><span class="o">!</span><span class="p">(</span><span class="s">"You found the URL: {}"</span><span class="p">,</span><span class="w"> </span><span class="n">req</span><span class="p">.</span><span class="n">url</span><span class="p">))))</span><span class="w"></span>
|
||||
<div class="highlight"><pre><span class="k">fn</span><span class="w"> </span><span class="n">hello_world</span><span class="p">(</span><span class="n">req</span><span class="o">:</span><span class="w"> </span><span class="o">&</span><span class="k">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span><span class="o">-></span><span class="w"> </span><span class="n">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span><span class="o">::</span><span class="n">with</span><span class="p">((</span><span class="n">status</span><span class="o">::</span><span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="n">format</span><span class="o">!</span><span class="p">(</span><span class="s">"You found the URL: {}"</span><span class="p">,</span><span class="w"> </span><span class="n">req</span><span class="p">.</span><span class="n">url</span><span class="p">))))</span><span class="w"></span>
|
||||
<span class="p">}</span><span class="w"></span>
|
||||
</pre></div>
|
||||
|
||||
@ -191,9 +204,9 @@ working on things that are a bit more complex?</p>
|
||||
<p>We're going to cover that here. Our first try: creating a function which returns
|
||||
other functions. This is a principle called <a href="http://stackoverflow.com/a/36321/1454178">currying</a>. We set up a function that allows us to keep some data in scope
|
||||
for another function to come later.</p>
|
||||
<div class="highlight"><pre><span></span><span class="k">fn</span> <span class="nf">build_handler</span><span class="p">(</span><span class="n">message</span>: <span class="nb">String</span><span class="p">)</span><span class="w"> </span>-> <span class="nb">Fn</span><span class="p">(</span><span class="o">&</span><span class="k">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span>-> <span class="nc">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="k">move</span><span class="w"> </span><span class="o">|</span><span class="n">_</span>: <span class="kp">&</span><span class="nc">mut</span><span class="w"> </span><span class="n">Request</span><span class="o">|</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span>::<span class="n">with</span><span class="p">((</span><span class="n">status</span>::<span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="n">message</span><span class="p">)))</span><span class="w"></span>
|
||||
<div class="highlight"><pre><span class="k">fn</span><span class="w"> </span><span class="n">build_handler</span><span class="p">(</span><span class="n">message</span><span class="o">:</span><span class="w"> </span><span class="nb">String</span><span class="p">)</span><span class="w"> </span><span class="o">-></span><span class="w"> </span><span class="nb">Fn</span><span class="p">(</span><span class="o">&</span><span class="k">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span><span class="o">-></span><span class="w"> </span><span class="n">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="n">move</span><span class="w"> </span><span class="o">|</span><span class="n">_</span><span class="o">:</span><span class="w"> </span><span class="o">&</span><span class="k">mut</span><span class="w"> </span><span class="n">Request</span><span class="o">|</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span><span class="o">::</span><span class="n">with</span><span class="p">((</span><span class="n">status</span><span class="o">::</span><span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="n">message</span><span class="p">)))</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="p">}</span><span class="w"></span>
|
||||
<span class="p">}</span><span class="w"></span>
|
||||
</pre></div>
|
||||
@ -202,7 +215,7 @@ for another function to come later.</p>
|
||||
<p>We've simply set up a function that returns another anonymous function with the
|
||||
<code>message</code> parameter scoped in. If you compile this, you get not 1, not 2, but 5
|
||||
new errors. 4 of them are the same though:</p>
|
||||
<div class="highlight"><pre><span></span>error[E0277]: the trait bound `for<'r, 'r, 'r> std::ops::Fn(&'r mut iron::Request<'r, 'r>) -> std::result::Result<iron::Response, iron::IronError> + 'static: std::marker::Sized` is not satisfied
|
||||
<div class="highlight"><pre>error[E0277]: the trait bound `for<'r, 'r, 'r> std::ops::Fn(&'r mut iron::Request<'r, 'r>) -> std::result::Result<iron::Response, iron::IronError> + 'static: std::marker::Sized` is not satisfied
|
||||
</pre></div>
|
||||
|
||||
|
||||
@ -230,19 +243,19 @@ we've been working with so far.</p>
|
||||
<p>The principle is that we need to define a new <code>struct</code> to hold our data, then
|
||||
implement that <code>handle()</code> method to return the result. Something that looks
|
||||
like this might do:</p>
|
||||
<div class="highlight"><pre><span></span><span class="k">struct</span> <span class="nc">EchoHandler</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="n">message</span>: <span class="nb">String</span>
|
||||
<div class="highlight"><pre><span class="k">struct</span><span class="w"> </span><span class="n">EchoHandler</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="n">message</span><span class="o">:</span><span class="w"> </span><span class="nb">String</span><span class="w"></span>
|
||||
<span class="p">}</span><span class="w"></span>
|
||||
|
||||
<span class="k">impl</span><span class="w"> </span><span class="n">Handler</span><span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="n">EchoHandler</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="k">fn</span> <span class="nf">handle</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">_</span>: <span class="kp">&</span><span class="nc">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span>-> <span class="nc">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span>::<span class="n">with</span><span class="p">((</span><span class="n">status</span>::<span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">message</span><span class="p">)))</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="k">fn</span><span class="w"> </span><span class="n">handle</span><span class="p">(</span><span class="o">&</span><span class="bp">self</span><span class="p">,</span><span class="w"> </span><span class="n">_</span><span class="o">:</span><span class="w"> </span><span class="o">&</span><span class="k">mut</span><span class="w"> </span><span class="n">Request</span><span class="p">)</span><span class="w"> </span><span class="o">-></span><span class="w"> </span><span class="n">IronResult</span><span class="o"><</span><span class="n">Response</span><span class="o">></span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span><span class="o">::</span><span class="n">with</span><span class="p">((</span><span class="n">status</span><span class="o">::</span><span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">message</span><span class="p">)))</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="p">}</span><span class="w"></span>
|
||||
<span class="p">}</span><span class="w"></span>
|
||||
|
||||
<span class="c1">// Later in the code when we set up the router...</span>
|
||||
<span class="kd">let</span><span class="w"> </span><span class="n">echo</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">EchoHandler</span><span class="w"> </span><span class="p">{</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="n">message</span>: <span class="s">"Is it working yet?"</span><span class="w"></span>
|
||||
<span class="w"> </span><span class="n">message</span><span class="o">:</span><span class="w"> </span><span class="s">"Is it working yet?"</span><span class="w"></span>
|
||||
<span class="p">}</span><span class="w"></span>
|
||||
<span class="n">router</span><span class="p">.</span><span class="n">get</span><span class="p">(</span><span class="s">"/"</span><span class="p">,</span><span class="w"> </span><span class="n">echo</span><span class="p">.</span><span class="n">handle</span><span class="p">,</span><span class="w"> </span><span class="s">"index"</span><span class="p">);</span><span class="w"></span>
|
||||
</pre></div>
|
||||
@ -251,38 +264,38 @@ like this might do:</p>
|
||||
<p>We attempt to build a struct, and give its <code>handle</code> method off to the router
|
||||
so the router knows what to do.</p>
|
||||
<p>You guessed it, more errors:</p>
|
||||
<div class="highlight"><pre><span></span><span class="n">error</span><span class="o">:</span> <span class="n">attempted</span> <span class="n">to</span> <span class="n">take</span> <span class="n">value</span> <span class="n">of</span> <span class="n">method</span> <span class="err">`</span><span class="n">handle</span><span class="err">`</span> <span class="n">on</span> <span class="n">type</span> <span class="err">`</span><span class="n">EchoHandler</span><span class="err">`</span>
|
||||
<div class="highlight"><pre><span class="n">error</span><span class="o">:</span> <span class="n">attempted</span> <span class="n">to</span> <span class="n">take</span> <span class="n">value</span> <span class="n">of</span> <span class="n">method</span> <span class="err">`</span><span class="n">handle</span><span class="err">`</span> <span class="n">on</span> <span class="n">type</span> <span class="err">`</span><span class="n">EchoHandler</span><span class="err">`</span>
|
||||
</pre></div>
|
||||
|
||||
|
||||
<p>Now, the Rust compiler is actually a really nice fellow, and offers us help:</p>
|
||||
<div class="highlight"><pre><span></span><span class="n">help</span><span class="o">:</span> <span class="n">maybe</span> <span class="n">a</span> <span class="err">`</span><span class="o">()</span><span class="err">`</span> <span class="n">to</span> <span class="n">call</span> <span class="n">it</span> <span class="k">is</span> <span class="n">missing</span><span class="o">?</span> <span class="n">If</span> <span class="n">not</span><span class="o">,</span> <span class="k">try</span> <span class="n">an</span> <span class="n">anonymous</span> <span class="kd">function</span>
|
||||
<div class="highlight"><pre><span class="n">help</span><span class="o">:</span> <span class="n">maybe</span> <span class="n">a</span> <span class="err">`</span><span class="o">()</span><span class="err">`</span> <span class="n">to</span> <span class="n">call</span> <span class="n">it</span> <span class="k">is</span> <span class="n">missing</span><span class="o">?</span> <span class="n">If</span> <span class="n">not</span><span class="o">,</span> <span class="k">try</span> <span class="n">an</span> <span class="n">anonymous</span> <span class="kd">function</span>
|
||||
</pre></div>
|
||||
|
||||
|
||||
<p>We definitely don't want to call that function, so maybe try an anonymous
|
||||
function as it recommends?</p>
|
||||
<div class="highlight"><pre><span></span><span class="n">router</span><span class="p">.</span><span class="n">get</span><span class="p">(</span><span class="s">"/"</span><span class="p">,</span><span class="w"> </span><span class="o">|</span><span class="n">req</span>: <span class="kp">&</span><span class="nc">mut</span><span class="w"> </span><span class="n">Request</span><span class="o">|</span><span class="w"> </span><span class="n">echo</span><span class="p">.</span><span class="n">handle</span><span class="p">(</span><span class="n">req</span><span class="p">),</span><span class="w"> </span><span class="s">"index"</span><span class="p">);</span><span class="w"></span>
|
||||
<div class="highlight"><pre><span class="n">router</span><span class="p">.</span><span class="n">get</span><span class="p">(</span><span class="s">"/"</span><span class="p">,</span><span class="w"> </span><span class="o">|</span><span class="n">req</span><span class="o">:</span><span class="w"> </span><span class="o">&</span><span class="k">mut</span><span class="w"> </span><span class="n">Request</span><span class="o">|</span><span class="w"> </span><span class="n">echo</span><span class="p">.</span><span class="n">handle</span><span class="p">(</span><span class="n">req</span><span class="p">),</span><span class="w"> </span><span class="s">"index"</span><span class="p">);</span><span class="w"></span>
|
||||
</pre></div>
|
||||
|
||||
|
||||
<p>Another error:</p>
|
||||
<div class="highlight"><pre><span></span>error[E0373]: closure may outlive the current function, but it borrows `echo`, which is owned by the current function
|
||||
<div class="highlight"><pre>error[E0373]: closure may outlive the current function, but it borrows `echo`, which is owned by the current function
|
||||
</pre></div>
|
||||
|
||||
|
||||
<p>Another helpful message:</p>
|
||||
<div class="highlight"><pre><span></span><span class="n">help</span><span class="o">:</span> <span class="n">to</span> <span class="n">force</span> <span class="n">the</span> <span class="n">closure</span> <span class="n">to</span> <span class="n">take</span> <span class="n">ownership</span> <span class="n">of</span> <span class="err">`</span><span class="n">echo</span><span class="err">`</span> <span class="o">(</span><span class="n">and</span> <span class="n">any</span> <span class="n">other</span> <span class="n">referenced</span> <span class="n">variables</span><span class="o">),</span> <span class="n">use</span> <span class="n">the</span> <span class="err">`</span><span class="n">move</span><span class="err">`</span> <span class="n">keyword</span>
|
||||
<div class="highlight"><pre><span class="n">help</span><span class="o">:</span> <span class="n">to</span> <span class="n">force</span> <span class="n">the</span> <span class="n">closure</span> <span class="n">to</span> <span class="n">take</span> <span class="n">ownership</span> <span class="n">of</span> <span class="err">`</span><span class="n">echo</span><span class="err">`</span> <span class="o">(</span><span class="n">and</span> <span class="n">any</span> <span class="n">other</span> <span class="n">referenced</span> <span class="n">variables</span><span class="o">),</span> <span class="n">use</span> <span class="n">the</span> <span class="err">`</span><span class="n">move</span><span class="err">`</span> <span class="n">keyword</span>
|
||||
</pre></div>
|
||||
|
||||
|
||||
<p>We're getting closer though! Let's implement this change:</p>
|
||||
<div class="highlight"><pre><span></span><span class="n">router</span><span class="p">.</span><span class="n">get</span><span class="p">(</span><span class="s">"/"</span><span class="p">,</span><span class="w"> </span><span class="k">move</span><span class="w"> </span><span class="o">|</span><span class="n">req</span>: <span class="kp">&</span><span class="nc">mut</span><span class="w"> </span><span class="n">Request</span><span class="o">|</span><span class="w"> </span><span class="n">echo</span><span class="p">.</span><span class="n">handle</span><span class="p">(</span><span class="n">req</span><span class="p">),</span><span class="w"> </span><span class="s">"index"</span><span class="p">);</span><span class="w"></span>
|
||||
<div class="highlight"><pre><span class="n">router</span><span class="p">.</span><span class="n">get</span><span class="p">(</span><span class="s">"/"</span><span class="p">,</span><span class="w"> </span><span class="n">move</span><span class="w"> </span><span class="o">|</span><span class="n">req</span><span class="o">:</span><span class="w"> </span><span class="o">&</span><span class="k">mut</span><span class="w"> </span><span class="n">Request</span><span class="o">|</span><span class="w"> </span><span class="n">echo</span><span class="p">.</span><span class="n">handle</span><span class="p">(</span><span class="n">req</span><span class="p">),</span><span class="w"> </span><span class="s">"index"</span><span class="p">);</span><span class="w"></span>
|
||||
</pre></div>
|
||||
|
||||
|
||||
<p>And here's where things get strange:</p>
|
||||
<div class="highlight"><pre><span></span>error[E0507]: cannot move out of borrowed content
|
||||
<div class="highlight"><pre>error[E0507]: cannot move out of borrowed content
|
||||
--> src/main.rs:18:40
|
||||
|
|
||||
18 | Ok(Response::with((status::Ok, self.message)))
|
||||
@ -307,7 +320,7 @@ instead of transferring ownership</li>
|
||||
audience out. Because <code>iron</code> won't accept a reference, we are forced into the
|
||||
second option: making a copy. To do so, we just need to change the function
|
||||
to look like this:</p>
|
||||
<div class="highlight"><pre><span></span><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span>::<span class="n">with</span><span class="p">((</span><span class="n">status</span>::<span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">message</span><span class="p">.</span><span class="n">clone</span><span class="p">())))</span><span class="w"></span>
|
||||
<div class="highlight"><pre><span class="nb">Ok</span><span class="p">(</span><span class="n">Response</span><span class="o">::</span><span class="n">with</span><span class="p">((</span><span class="n">status</span><span class="o">::</span><span class="nb">Ok</span><span class="p">,</span><span class="w"> </span><span class="bp">self</span><span class="p">.</span><span class="n">message</span><span class="p">.</span><span class="n">clone</span><span class="p">())))</span><span class="w"></span>
|
||||
</pre></div>
|
||||
|
||||
|
||||
@ -343,6 +356,20 @@ incredibly precise about how I use it.</p>
|
||||
going to take me a lot longer to do this than I originally thought.</p>
|
||||
|
||||
|
||||
<div class="comments">
|
||||
<div id="disqus_thread"></div>
|
||||
<script type="text/javascript">
|
||||
var disqus_shortname = 'bradleespeice';
|
||||
var disqus_identifier = 'a-rustic-re-podcasting-server-part-1.html';
|
||||
var disqus_url = 'https://bspeice.github.io/a-rustic-re-podcasting-server-part-1.html';
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script>
|
||||
<noscript>Please enable JavaScript to view the comments.</noscript>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<!-- /Content -->
|
||||
@ -354,6 +381,7 @@ going to take me a lot longer to do this than I originally thought.</p>
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
<div class="footer-title"></div>
|
||||
<ul class="list-unstyled">
|
||||
<li><a href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate"></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@ -4,20 +4,22 @@
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="description" content="{% notebook 2016-2-3-guaranteed-money-maker.ipynb %} MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});">
|
||||
<meta name="description" content="If you can see into the future, that is.¶My previous class in Stochastic Calculus covered a lot of interesting topics, and the important one for today is the Gambler's Ruin problem. If you're ...">
|
||||
<meta name="keywords" content="martingale, strategy">
|
||||
<link rel="icon" href="/favicon.ico">
|
||||
<link rel="icon" href="https://bspeice.github.io/favicon.ico">
|
||||
|
||||
<title>Guaranteed Money Maker - Bradlee Speice</title>
|
||||
|
||||
<!-- Stylesheets -->
|
||||
<link href="/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="/theme/css/pygment.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/pygment.css" rel="stylesheet">
|
||||
<!-- /Stylesheets -->
|
||||
|
||||
<!-- RSS Feeds -->
|
||||
<link href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Full Atom Feed" />
|
||||
<link href="https://bspeice.github.io/feeds/blog.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Categories Atom Feed" />
|
||||
<!-- /RSS Feeds -->
|
||||
|
||||
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
||||
@ -26,6 +28,17 @@
|
||||
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<!-- Google Analytics -->
|
||||
<script>
|
||||
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-74711362-1', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script>
|
||||
<!-- /Google Analytics -->
|
||||
|
||||
|
||||
</head>
|
||||
@ -39,7 +52,7 @@
|
||||
<div class="container">
|
||||
<div class="header-nav">
|
||||
<div class="header-logo">
|
||||
<a class="pull-left" href="/"><img class="mr20" src="/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
<a class="pull-left" href="https://bspeice.github.io/"><img class="mr20" src="https://bspeice.github.io/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
</div>
|
||||
<div class="nav pull-right">
|
||||
</div>
|
||||
@ -54,12 +67,12 @@
|
||||
<div class="col-lg-12">
|
||||
<div class="header-content">
|
||||
<h1 class="header-title">Guaranteed Money Maker</h1>
|
||||
<p class="header-date"> <a href="/author/bradlee-speice.html">Bradlee Speice</a>, Wed 03 February 2016, <a href="/category/blog.html">Blog</a></p>
|
||||
<p class="header-date"> <a href="https://bspeice.github.io/author/bradlee-speice.html">Bradlee Speice</a>, Wed 03 February 2016, <a href="https://bspeice.github.io/category/blog.html">Blog</a></p>
|
||||
<div class="header-underline"></div>
|
||||
<div class="clearfix"></div>
|
||||
<p class="pull-right header-tags">
|
||||
<span class="glyphicon glyphicon-tags mr5" aria-hidden="true"></span>
|
||||
<a href="/tag/martingale.html">martingale</a>, <a href="/tag/strategy.html">strategy</a> </p>
|
||||
<a href="https://bspeice.github.io/tag/martingale.html">martingale</a>, <a href="https://bspeice.github.io/tag/strategy.html">strategy</a> </p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -73,7 +86,265 @@
|
||||
|
||||
<!-- Content -->
|
||||
<div class="container content">
|
||||
<p>{% notebook 2016-2-3-guaranteed-money-maker.ipynb %}</p>
|
||||
<p>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h3 id="If-you-can-see-into-the-future,-that-is.">If you can see into the future, that is.<a class="anchor-link" href="#If-you-can-see-into-the-future,-that-is.">¶</a></h3><p>My previous class in Stochastic Calculus covered a lot of interesting topics, and the important one for today
|
||||
is the <a href="https://en.wikipedia.org/wiki/Gambler's_ruin">Gambler's Ruin</a> problem. If you're interested in some of the theory behind it, also make sure to check out
|
||||
<a href="https://en.wikipedia.org/wiki/Random_walk">random walks</a>. The important bit is that we studied the <a href="https://en.wikipedia.org/wiki/Martingale_%28betting_system%29">Martingale Betting Strategy</a>, which describes for us
|
||||
a <strong>guaranteed way</strong> to <span style='font-size: x-small'>eventually</span> make money.</p>
|
||||
<p>The strategy goes like this: You are going to toss a fair coin with a friend. If you guess heads or tails correctly, you get back double the money you bet. If you guess incorrectly, you lose money. How should you bet?</p>
|
||||
<p>The correct answer is that you should double your bet each time you lose. Then when you finally win, you'll be guaranteed to make back everything you lost and then $1 extra! Consider the scenario:</p>
|
||||
<ol>
|
||||
<li>You bet $1, and guess incorrectly. You're 1 dollar in the hole.</li>
|
||||
<li>You bet $2, and guess incorrectly. You're 3 dollars in the hole now.</li>
|
||||
<li>You bet $4, and guess incorrectly. You're 7 dollars in the hole.</li>
|
||||
<li>You bet $8, and guess correctly! You now get back those 8 dollars you bet, plus 8 extra for winning, for a <strong>total profit of one dollar</strong>!</li>
|
||||
</ol>
|
||||
<p>Mathematically, we can prove that as long as you have unlimited money to bet, you are guaranteed to make money.</p>
|
||||
<h1 id="Applying-the-Martingale-Strategy">Applying the Martingale Strategy<a class="anchor-link" href="#Applying-the-Martingale-Strategy">¶</a></h1><p>But we're all realistic people, and once you start talking about "unlimited money" eyebrows should be raised. Even still, this is an interesting strategy to investigate, and I want to apply it to the stock market. As long as we can guarantee there's a single day in which the stock goes up, we should be able to make money right? The question is just how much we have to invest to guarantee this.</p>
|
||||
<p>Now it's time for the math. We'll use the following definitions:</p>
|
||||
<ul>
|
||||
<li>$o_i$ = the share price at the opening of day $i$</li>
|
||||
<li>$c_i$ = the share price at the close of day $i$</li>
|
||||
<li>$d_i$ = the amount of money we want to invest at the beginning of day $i$</li>
|
||||
</ul>
|
||||
<p>With those definitions in place, I'd like to present the formula that is <strong>guaranteed to make you money</strong>. I call it <em>Bradlee's Investment Formula</em>:</p>
|
||||
<p>$c_n \sum_{i=1}^n \frac{d_i}{o_i} > \sum_{i=1}^{n} d_i$</p>
|
||||
<p>It might not look like much, but if you can manage to make it so that this formula holds true, you will be guaranteed to make money. The intuition behind the formula is this: The closing share price times the number of shares you have purchased ends up greater than the amount of money you invested.</p>
|
||||
<p>That is, on day $n$, <span style='font-size: x-small'>if you know what the closing price will be</span> you can set up the amount of money you invest that day to <strong>guarantee you make money</strong>. I'll even teach you to figure out how much money that is! Take a look:</p>
|
||||
<p>$
|
||||
\begin{align}
|
||||
c_n \sum_{i=1}^{n-1} \frac{d_i}{o_i} + \frac{c_nd_n}{o_n} &> \sum_{i=1}^{n-1}d_i + d_n\\
|
||||
\frac{c_nd_n}{o_n} - d_n &> \sum_{i=1}^{n-1}(d_i - \frac{c_nd_i}{o_i})\\
|
||||
d_n (\frac{c_n - o_n}{o_n}) &> \sum_{i=1}^{n-1} d_i(1 - \frac{c_n}{o_i})\\
|
||||
d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i})
|
||||
\end{align}$</p>
|
||||
<p>If you invest exactly $d_n$ that day, you'll break even. But if you can make sure the money you invest is greater than that quantity on the right <span style='font-size: x-small'>(which requires that you have a crystal ball tell you the stock's closing price)</span> you are <strong>guaranteed to make money!</strong></p>
|
||||
<h1 id="Interesting-Implications">Interesting Implications<a class="anchor-link" href="#Interesting-Implications">¶</a></h1><p>On a more serious note though, the formula above tells us a couple of interesting things:</p>
|
||||
<ol>
|
||||
<li>It's impossible to make money without the closing price at some point being greater than the opening price (or vice-versa if you are short selling) - there is no amount of money you can invest that will turn things in your favor.</li>
|
||||
<li>Close prices of the past aren't important if you're concerned about the bottom line. While chart technicians use price history to make judgment calls, in the end, the closing price on anything other than the last day is irrelevant.</li>
|
||||
<li>It's possible to make money as long as there is a single day where the closing price is greater than the opening price! You might have to invest a lot to do so, but it's possible.</li>
|
||||
<li>You must make a prediction about where the stock will close at if you want to know how much to invest. That is, we can set up our investment for the day to make money if the stock goes up 1%, but if it only goes up .5% we'll still lose money.</li>
|
||||
<li>It's possible the winning move is to scale back your position. Consider the scenario:<ul>
|
||||
<li>You invest money and the stock closes down the day .5%</li>
|
||||
<li>You invest tomorrow expecting the stock to go up 1%</li>
|
||||
<li>The winning investment to break even (assuming a 1% increase) is to scale back the position, since the shares you purchased at the beginning would then be profitable</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ol>
|
||||
<h1 id="Running-the-simulation">Running the simulation<a class="anchor-link" href="#Running-the-simulation">¶</a></h1><p>So now that we've defined our investment formula,we need to tweak a couple things in order to make an investment strategy we can actually work with. There are two issues we need to address:</p>
|
||||
<ol>
|
||||
<li>The formula only tells us how much to invest if we want to break even ($d_n$). If we actually want to turn a profit, we need to invest more than that, which we will refer to as the <strong>bias</strong>.</li>
|
||||
<li>The formula assumes we know what the closing price will be on any given day. If we don't know this, we can still invest assuming the stock price will close at a level we choose. If the price doesn't meet this objective, we try again tomorrow! This predetermined closing price will be referred to as the <strong>expectation</strong>.</li>
|
||||
</ol>
|
||||
<p>Now that we've defined our <em>bias</em> and <em>expectation</em>, we can actually build a strategy we can simulate. Much like the martingale strategy told you to bet twice your previous bet in order to make money, we've designed a system that tells us how much to bet in order to make money as well.</p>
|
||||
<p>Now, let's get to the code!</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [1]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-julia"><pre><span class="k">using</span> <span class="n">Quandl</span>
|
||||
<span class="n">api_key</span> <span class="o">=</span> <span class="s">""</span>
|
||||
<span class="n">daily_investment</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">current_open</span><span class="p">,</span> <span class="n">current_close</span><span class="p">,</span> <span class="n">purchase_history</span><span class="p">,</span> <span class="n">open_history</span><span class="p">)</span>
|
||||
<span class="c"># We're not going to safeguard against divide by 0 - that's the user's responsibility</span>
|
||||
<span class="n">t1</span> <span class="o">=</span> <span class="n">current_close</span> <span class="o">/</span> <span class="n">current_open</span> <span class="o">-</span> <span class="mi">1</span>
|
||||
<span class="n">t2</span> <span class="o">=</span> <span class="n">sum</span><span class="p">(</span><span class="n">purchase_history</span> <span class="o">-</span> <span class="n">purchase_history</span><span class="o">*</span><span class="n">current_close</span> <span class="o">./</span> <span class="n">open_history</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">t2</span> <span class="o">/</span> <span class="n">t1</span>
|
||||
<span class="k">end</span><span class="p">;</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>And let's code a way to run simulations quickly:</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [2]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-julia"><pre><span class="n">is_profitable</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">current_price</span><span class="p">,</span> <span class="n">purchase_history</span><span class="p">,</span> <span class="n">open_history</span><span class="p">)</span>
|
||||
<span class="n">shares</span> <span class="o">=</span> <span class="n">sum</span><span class="p">(</span><span class="n">purchase_history</span> <span class="o">./</span> <span class="n">open_history</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">current_price</span><span class="o">*</span><span class="n">shares</span> <span class="o">></span> <span class="n">sum</span><span class="p">(</span><span class="n">purchase_history</span><span class="p">)</span>
|
||||
<span class="k">end</span>
|
||||
|
||||
<span class="n">simulate</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">start</span><span class="p">,</span> <span class="n">init</span><span class="p">,</span> <span class="n">expected</span><span class="p">,</span> <span class="n">bias</span><span class="p">)</span>
|
||||
<span class="n">ticker_info</span> <span class="o">=</span> <span class="n">quandlget</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">from</span><span class="o">=</span><span class="n">start</span><span class="p">,</span> <span class="n">api_key</span><span class="o">=</span><span class="n">api_key</span><span class="p">)</span>
|
||||
<span class="n">open_vals</span> <span class="o">=</span> <span class="n">ticker_info</span><span class="p">[</span><span class="s">"Open"</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">close_vals</span> <span class="o">=</span> <span class="n">ticker_info</span><span class="p">[</span><span class="s">"Close"</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">invested</span> <span class="o">=</span> <span class="p">[</span><span class="n">init</span><span class="p">]</span>
|
||||
|
||||
<span class="c"># The simulation stops once we've made a profit</span>
|
||||
<span class="n">day</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="n">profitable</span> <span class="o">=</span> <span class="n">is_profitable</span><span class="p">(</span><span class="n">close_vals</span><span class="p">[</span><span class="n">day</span><span class="p">],</span> <span class="n">invested</span><span class="p">,</span> <span class="n">open_vals</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="n">length</span><span class="p">(</span><span class="n">invested</span><span class="p">)])</span> <span class="o">||</span>
|
||||
<span class="n">is_profitable</span><span class="p">(</span><span class="n">open_vals</span><span class="p">[</span><span class="n">day</span><span class="o">+</span><span class="mi">1</span><span class="p">],</span> <span class="n">invested</span><span class="p">,</span> <span class="n">open_vals</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="n">length</span><span class="p">(</span><span class="n">invested</span><span class="p">)])</span>
|
||||
<span class="k">while</span> <span class="o">!</span><span class="n">profitable</span>
|
||||
<span class="n">expected_close</span> <span class="o">=</span> <span class="n">open_vals</span><span class="p">[</span><span class="n">day</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span> <span class="o">*</span> <span class="n">expected</span>
|
||||
<span class="n">todays_purchase</span> <span class="o">=</span> <span class="n">daily_investment</span><span class="p">(</span><span class="n">open_vals</span><span class="p">[</span><span class="n">day</span><span class="o">+</span><span class="mi">1</span><span class="p">],</span> <span class="n">expected_close</span><span class="p">,</span> <span class="n">invested</span><span class="p">,</span> <span class="n">open_vals</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="n">day</span><span class="p">])</span>
|
||||
<span class="n">invested</span> <span class="o">=</span> <span class="p">[</span><span class="n">invested</span><span class="p">;</span> <span class="n">todays_purchase</span> <span class="o">+</span> <span class="n">bias</span><span class="p">]</span>
|
||||
<span class="c"># expected_profit = expected_close * sum(invested ./ open_vals[1:length(invested)]) - sum(invested)</span>
|
||||
<span class="n">day</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
<span class="n">profitable</span> <span class="o">=</span> <span class="n">is_profitable</span><span class="p">(</span><span class="n">close_vals</span><span class="p">[</span><span class="n">day</span><span class="p">],</span> <span class="n">invested</span><span class="p">,</span> <span class="n">open_vals</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="n">length</span><span class="p">(</span><span class="n">invested</span><span class="p">)])</span> <span class="o">||</span>
|
||||
<span class="n">is_profitable</span><span class="p">(</span><span class="n">open_vals</span><span class="p">[</span><span class="n">day</span><span class="o">+</span><span class="mi">1</span><span class="p">],</span> <span class="n">invested</span><span class="p">,</span> <span class="n">open_vals</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="n">length</span><span class="p">(</span><span class="n">invested</span><span class="p">)])</span>
|
||||
<span class="k">end</span>
|
||||
|
||||
<span class="n">shares</span> <span class="o">=</span> <span class="n">sum</span><span class="p">(</span><span class="n">invested</span> <span class="o">./</span> <span class="n">open_vals</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="n">length</span><span class="p">(</span><span class="n">invested</span><span class="p">)])</span>
|
||||
<span class="n">max_profit</span> <span class="o">=</span> <span class="n">max</span><span class="p">(</span><span class="n">close_vals</span><span class="p">[</span><span class="n">day</span><span class="p">],</span> <span class="n">open_vals</span><span class="p">[</span><span class="n">day</span><span class="o">+</span><span class="mi">1</span><span class="p">])</span>
|
||||
<span class="n">profit</span> <span class="o">=</span> <span class="n">shares</span> <span class="o">*</span> <span class="n">max_profit</span> <span class="o">-</span> <span class="n">sum</span><span class="p">(</span><span class="n">invested</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="p">(</span><span class="n">invested</span><span class="p">,</span> <span class="n">profit</span><span class="p">)</span>
|
||||
<span class="k">end</span>
|
||||
|
||||
<span class="n">sim_summary</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span><span class="p">)</span>
|
||||
<span class="n">leverages</span> <span class="o">=</span> <span class="p">[</span><span class="n">sum</span><span class="p">(</span><span class="n">investments</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="n">i</span><span class="p">])</span> <span class="k">for</span> <span class="n">i</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">length</span><span class="p">(</span><span class="n">investments</span><span class="p">)]</span>
|
||||
<span class="n">max_leverage</span> <span class="o">=</span> <span class="n">maximum</span><span class="p">(</span><span class="n">leverages</span><span class="p">)</span> <span class="o">/</span> <span class="n">investments</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">println</span><span class="p">(</span><span class="s">"Max leverage: </span><span class="si">$(max_leverage)</span><span class="s">"</span><span class="p">)</span>
|
||||
<span class="n">println</span><span class="p">(</span><span class="s">"Days invested: </span><span class="si">$</span><span class="s">(length(investments))"</span><span class="p">)</span>
|
||||
<span class="n">println</span><span class="p">(</span><span class="s">"Profit: </span><span class="si">$</span><span class="s">profit"</span><span class="p">)</span>
|
||||
<span class="k">end</span><span class="p">;</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>Now, let's get some data and run a simulation! Our first test:</p>
|
||||
<ul>
|
||||
<li>We'll invest 100 dollars in LMT, and expect that the stock will close up 1% every day. We'll invest $d_n$ + 10 dollars every day that we haven't turned a profit, and end the simulation once we've made a profit.</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [3]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-julia"><pre><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span> <span class="o">=</span> <span class="n">simulate</span><span class="p">(</span><span class="s">"YAHOO/LMT"</span><span class="p">,</span> <span class="n">Date</span><span class="p">(</span><span class="mi">2015</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">29</span><span class="p">),</span> <span class="mi">100</span><span class="p">,</span> <span class="mf">1.01</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
|
||||
<span class="n">sim_summary</span><span class="p">(</span><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
<div class="output_subarea output_stream output_stdout output_text">
|
||||
<pre>Max leverage: 5.590373200042106
|
||||
Days invested: 5
|
||||
Profit: 0.6894803101560001
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>The result: We need to invest 5.6x our initial position over a period of 5 days to make approximately .69¢</p>
|
||||
<ul>
|
||||
<li>Now let's try the same thing, but we'll assume the stock closes up 2% instead.</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [4]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-julia"><pre><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span> <span class="o">=</span> <span class="n">simulate</span><span class="p">(</span><span class="s">"YAHOO/LMT"</span><span class="p">,</span> <span class="n">Date</span><span class="p">(</span><span class="mi">2015</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">29</span><span class="p">),</span> <span class="mi">100</span><span class="p">,</span> <span class="mf">1.02</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
|
||||
<span class="n">sim_summary</span><span class="p">(</span><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
<div class="output_subarea output_stream output_stdout output_text">
|
||||
<pre>Max leverage: 1.854949900247809
|
||||
Days invested: 25
|
||||
Profit: 0.08304813163696423
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>In this example, we only get up to a 1.85x leveraged position, but it takes 25 days to turn a profit of 8¢</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h1 id="Summary">Summary<a class="anchor-link" href="#Summary">¶</a></h1><p>We've defined an investment strategy that can tell us how much to invest when we know what the closing position of a stock will be. We can tweak the strategy to actually make money, but plenty of work needs to be done so that we can optimize the money invested.</p>
|
||||
<p>In the next post I'm going to post more information about some backtests and strategy tests on this strategy (unless of course this experiment actually produces a significant profit potential, and then I'm keeping it for myself).</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h1 id="Side-note-and-disclaimer">Side note and disclaimer<a class="anchor-link" href="#Side-note-and-disclaimer">¶</a></h1><p>The claims made in this presentation about being able to guarantee making money are intended as a joke and do not constitute investment advice of any sort.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div></p>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
</script>
|
||||
@ -81,6 +352,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
<script async src='https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML'></script>
|
||||
|
||||
|
||||
<div class="comments">
|
||||
<div id="disqus_thread"></div>
|
||||
<script type="text/javascript">
|
||||
var disqus_shortname = 'bradleespeice';
|
||||
var disqus_identifier = 'guaranteed-money-maker.html';
|
||||
var disqus_url = 'https://bspeice.github.io/guaranteed-money-maker.html';
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script>
|
||||
<noscript>Please enable JavaScript to view the comments.</noscript>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<!-- /Content -->
|
||||
@ -92,6 +377,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
<div class="footer-title"></div>
|
||||
<ul class="list-unstyled">
|
||||
<li><a href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate"></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -4,20 +4,22 @@
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="description" content="{% notebook 2015-12-26-testing_cramer.ipynb %} MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});">
|
||||
<meta name="description" content="In [1]: import requests import pandas as pd import numpy as np from dateutil import parser as dtparser from dateutil.relativedelta import relativedelta from datetime import datetime from ...">
|
||||
<meta name="keywords" content="data science, futures">
|
||||
<link rel="icon" href="/favicon.ico">
|
||||
<link rel="icon" href="https://bspeice.github.io/favicon.ico">
|
||||
|
||||
<title>Testing Cramer - Bradlee Speice</title>
|
||||
|
||||
<!-- Stylesheets -->
|
||||
<link href="/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="/theme/css/pygment.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/pygment.css" rel="stylesheet">
|
||||
<!-- /Stylesheets -->
|
||||
|
||||
<!-- RSS Feeds -->
|
||||
<link href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Full Atom Feed" />
|
||||
<link href="https://bspeice.github.io/feeds/blog.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Categories Atom Feed" />
|
||||
<!-- /RSS Feeds -->
|
||||
|
||||
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
||||
@ -26,6 +28,17 @@
|
||||
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<!-- Google Analytics -->
|
||||
<script>
|
||||
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-74711362-1', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script>
|
||||
<!-- /Google Analytics -->
|
||||
|
||||
|
||||
</head>
|
||||
@ -39,7 +52,7 @@
|
||||
<div class="container">
|
||||
<div class="header-nav">
|
||||
<div class="header-logo">
|
||||
<a class="pull-left" href="/"><img class="mr20" src="/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
<a class="pull-left" href="https://bspeice.github.io/"><img class="mr20" src="https://bspeice.github.io/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
</div>
|
||||
<div class="nav pull-right">
|
||||
</div>
|
||||
@ -54,12 +67,12 @@
|
||||
<div class="col-lg-12">
|
||||
<div class="header-content">
|
||||
<h1 class="header-title">Testing Cramer</h1>
|
||||
<p class="header-date"> <a href="/author/bradlee-speice.html">Bradlee Speice</a>, Sat 26 December 2015, <a href="/category/blog.html">Blog</a></p>
|
||||
<p class="header-date"> <a href="https://bspeice.github.io/author/bradlee-speice.html">Bradlee Speice</a>, Sat 26 December 2015, <a href="https://bspeice.github.io/category/blog.html">Blog</a></p>
|
||||
<div class="header-underline"></div>
|
||||
<div class="clearfix"></div>
|
||||
<p class="pull-right header-tags">
|
||||
<span class="glyphicon glyphicon-tags mr5" aria-hidden="true"></span>
|
||||
<a href="/tag/data-science.html">data science</a>, <a href="/tag/futures.html">futures</a> </p>
|
||||
<a href="https://bspeice.github.io/tag/data-science.html">data science</a>, <a href="https://bspeice.github.io/tag/futures.html">futures</a> </p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -73,7 +86,425 @@
|
||||
|
||||
<!-- Content -->
|
||||
<div class="container content">
|
||||
<p>{% notebook 2015-12-26-testing_cramer.ipynb %}</p>
|
||||
<p>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [1]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">requests</span>
|
||||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">dateutil</span> <span class="k">import</span> <span class="n">parser</span> <span class="k">as</span> <span class="n">dtparser</span>
|
||||
<span class="kn">from</span> <span class="nn">dateutil.relativedelta</span> <span class="k">import</span> <span class="n">relativedelta</span>
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="k">import</span> <span class="n">datetime</span>
|
||||
<span class="kn">from</span> <span class="nn">html.parser</span> <span class="k">import</span> <span class="n">HTMLParser</span>
|
||||
<span class="kn">from</span> <span class="nn">copy</span> <span class="k">import</span> <span class="n">copy</span>
|
||||
<span class="kn">import</span> <span class="nn">Quandl</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h1 id="Testing-Cramer">Testing Cramer<a class="anchor-link" href="#Testing-Cramer">¶</a></h1><p>Pursuant to attending a graduate school studying Financial Engineering, I've been a fan of the <a href="http://seekingalpha.com/author/wall-street-breakfast?s=wall-street-breakfast">Mad Money</a> TV show featuring the bombastic Jim Cramer. One of the things that he's said is that you shouldn't use the futures to predict where the stock market is going to go. But he says it often enough, I've begun to wonder - who is he trying to convince?</p>
|
||||
<p>It makes sense that because futures on things like the S&P 500 are traded continuously, they would price in market information before the stock market opens. So is Cramer right to be convinced that strategies based on the futures are a poor idea? I wanted to test it out.</p>
|
||||
<p>The first question is where to get the future's data. I've been part of <a href="http://seekingalpha.com/">Seeking Alpha</a> for a bit, and they publish the <a href="http://seekingalpha.com/author/wall-street-breakfast?s=wall-street-breakfast">Wall Street Breakfast</a> newsletter which contains daily future's returns as of 6:20 AM EST. I'd be interested in using that data to see if we can actually make some money.</p>
|
||||
<p>First though, let's get the data:</p>
|
||||
<h1 id="Downloading-Futures-data-from-Seeking-Alpha">Downloading Futures data from Seeking Alpha<a class="anchor-link" href="#Downloading-Futures-data-from-Seeking-Alpha">¶</a></h1><p>We're going to define two HTML parsing classes - one to get the article URL's from a page, and one to get the actual data from each article.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [2]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="k">class</span> <span class="nc">ArticleListParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span>
|
||||
<span class="sd">"""Given a web page with articles on it, parse out the article links"""</span>
|
||||
|
||||
<span class="n">articles</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">handle_starttag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tag</span><span class="p">,</span> <span class="n">attrs</span><span class="p">):</span>
|
||||
<span class="c1">#if tag == 'div' and ("id", "author_articles_wrapper") in attrs:</span>
|
||||
<span class="c1"># self.fetch_links = True</span>
|
||||
<span class="k">if</span> <span class="n">tag</span> <span class="o">==</span> <span class="s1">'a'</span> <span class="ow">and</span> <span class="p">(</span><span class="s1">'class'</span><span class="p">,</span> <span class="s1">'dashboard_article_link'</span><span class="p">)</span> <span class="ow">in</span> <span class="n">attrs</span><span class="p">:</span>
|
||||
<span class="n">href</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'href'</span><span class="p">,</span> <span class="n">attrs</span><span class="p">))[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">articles</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">href</span><span class="p">)</span>
|
||||
|
||||
<span class="n">base_url</span> <span class="o">=</span> <span class="s2">"http://seekingalpha.com/author/wall-street-breakfast/articles"</span>
|
||||
<span class="n">article_page_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">base_url</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">base_url</span> <span class="o">+</span> <span class="s1">'/{}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">20</span><span class="p">)]</span>
|
||||
|
||||
<span class="n">global_articles</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">page</span> <span class="ow">in</span> <span class="n">article_page_urls</span><span class="p">:</span>
|
||||
<span class="c1"># We need to switch the user agent, as SA blocks the standard requests agent</span>
|
||||
<span class="n">articles_html</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">page</span><span class="p">,</span>
|
||||
<span class="n">headers</span><span class="o">=</span><span class="p">{</span><span class="s2">"User-Agent"</span><span class="p">:</span> <span class="s2">"Wget/1.13.4"</span><span class="p">})</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">ArticleListParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">feed</span><span class="p">(</span><span class="n">articles_html</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
<span class="n">global_articles</span> <span class="o">+=</span> <span class="p">(</span><span class="n">parser</span><span class="o">.</span><span class="n">articles</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [3]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="k">class</span> <span class="nc">ArticleReturnParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span>
|
||||
<span class="s2">"Given an article, parse out the futures returns in it"</span>
|
||||
|
||||
<span class="n">record_font_tags</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="n">in_font_tag</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="n">counter</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="c1"># data = {} # See __init__</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">handle_starttag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tag</span><span class="p">,</span> <span class="n">attrs</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">tag</span> <span class="o">==</span> <span class="s1">'span'</span> <span class="ow">and</span> <span class="p">(</span><span class="s1">'itemprop'</span><span class="p">,</span> <span class="s1">'datePublished'</span><span class="p">)</span> <span class="ow">in</span> <span class="n">attrs</span><span class="p">:</span>
|
||||
<span class="n">date_string</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">filter</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'content'</span><span class="p">,</span> <span class="n">attrs</span><span class="p">))[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">date</span> <span class="o">=</span> <span class="n">dtparser</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">date_string</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">'date'</span><span class="p">]</span> <span class="o">=</span> <span class="n">date</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">in_font_tag</span> <span class="o">=</span> <span class="n">tag</span> <span class="o">==</span> <span class="s1">'font'</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">safe_float</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">string</span><span class="p">):</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="nb">float</span><span class="p">(</span><span class="n">string</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span> <span class="o">/</span> <span class="mi">100</span>
|
||||
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">handle_data</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">content</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">record_font_tags</span> <span class="ow">and</span> <span class="s2">"Futures at 6"</span> <span class="ow">in</span> <span class="n">content</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">record_font_tags</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">record_font_tags</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">in_font_tag</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">'DOW'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">safe_float</span><span class="p">(</span><span class="n">content</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">'S&P'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">safe_float</span><span class="p">(</span><span class="n">content</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">'NASDAQ'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">safe_float</span><span class="p">(</span><span class="n">content</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">'Crude'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">safe_float</span><span class="p">(</span><span class="n">content</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">==</span> <span class="mi">4</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">[</span><span class="s1">'Gold'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">safe_float</span><span class="p">(</span><span class="n">content</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">handle_endtag</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tag</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">in_font_tag</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">retrieve_data</span><span class="p">(</span><span class="n">url</span><span class="p">):</span>
|
||||
<span class="n">sa</span> <span class="o">=</span> <span class="s2">"http://seekingalpha.com"</span>
|
||||
<span class="n">article_html</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">sa</span> <span class="o">+</span> <span class="n">url</span><span class="p">,</span>
|
||||
<span class="n">headers</span><span class="o">=</span><span class="p">{</span><span class="s2">"User-Agent"</span><span class="p">:</span> <span class="s2">"Wget/1.13.4"</span><span class="p">})</span>
|
||||
<span class="n">parser</span> <span class="o">=</span> <span class="n">ArticleReturnParser</span><span class="p">()</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">feed</span><span class="p">(</span><span class="n">article_html</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="s2">"url"</span><span class="p">:</span> <span class="n">url</span><span class="p">})</span>
|
||||
<span class="n">parser</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="s2">"text"</span><span class="p">:</span> <span class="n">article_html</span><span class="o">.</span><span class="n">text</span><span class="p">})</span>
|
||||
<span class="k">return</span> <span class="n">parser</span><span class="o">.</span><span class="n">data</span>
|
||||
|
||||
<span class="c1"># This copy **MUST** be in place. I'm not sure why,</span>
|
||||
<span class="c1"># as you'd think that the data being returned would already</span>
|
||||
<span class="c1"># represent a different memory location. Even so, it blows up</span>
|
||||
<span class="c1"># if you don't do this.</span>
|
||||
<span class="n">article_list</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="n">global_articles</span><span class="p">))</span>
|
||||
<span class="n">article_data</span> <span class="o">=</span> <span class="p">[</span><span class="n">copy</span><span class="p">(</span><span class="n">retrieve_data</span><span class="p">(</span><span class="n">url</span><span class="p">))</span> <span class="k">for</span> <span class="n">url</span> <span class="ow">in</span> <span class="n">article_list</span><span class="p">]</span>
|
||||
<span class="c1"># If there's an issue downloading the article, drop it.</span>
|
||||
<span class="n">article_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">article_data</span><span class="p">)</span><span class="o">.</span><span class="n">dropna</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h1 id="Fetching-the-Returns-data">Fetching the Returns data<a class="anchor-link" href="#Fetching-the-Returns-data">¶</a></h1><p>Now that we have the futures data, we're going to compare across 4 different indices - the S&P 500 index, Dow Jones Industrial, Russell 2000, and NASDAQ 100. Let's get the data off of Quandl to make things easier!</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [4]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="c1"># article_df is sorted by date, so we get the first row.</span>
|
||||
<span class="n">start_date</span> <span class="o">=</span> <span class="n">article_df</span><span class="o">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s1">'date'</span><span class="p">)</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">'date'</span><span class="p">]</span> <span class="o">-</span> <span class="n">relativedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">SPY</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"GOOG/NYSE_SPY"</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span>
|
||||
<span class="n">DJIA</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"GOOG/AMS_DIA"</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span>
|
||||
<span class="n">RUSS</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"GOOG/AMEX_IWM"</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span>
|
||||
<span class="n">NASDAQ</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"GOOG/EPA_QQQ"</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h1 id="Running-the-Comparison">Running the Comparison<a class="anchor-link" href="#Running-the-Comparison">¶</a></h1><p>There are two types of tests I want to determine: How accurate each futures category is at predicting the index's opening change over the close before, and predicting the index's daily return.</p>
|
||||
<p>Let's first calculate how good each future is at predicting the opening return over the previous day. I expect that the futures will be more than 50% accurate, since the information is recorded 3 hours before the markets open.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [5]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">calculate_opening_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span>
|
||||
<span class="c1"># I'm not a huge fan of the appending for loop,</span>
|
||||
<span class="c1"># but it's a bit verbose for a comprehension</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">frame</span><span class="p">)):</span>
|
||||
<span class="n">date</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">name</span>
|
||||
<span class="n">prior_close</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="o">-</span><span class="mi">1</span><span class="p">][</span><span class="s1">'Close'</span><span class="p">]</span>
|
||||
<span class="n">open_val</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="s1">'Open'</span><span class="p">]</span>
|
||||
<span class="n">data</span><span class="p">[</span><span class="n">date</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">open_val</span> <span class="o">-</span> <span class="n">prior_close</span><span class="p">)</span> <span class="o">/</span> <span class="n">prior_close</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">data</span>
|
||||
|
||||
<span class="n">SPY_open_ret</span> <span class="o">=</span> <span class="n">calculate_opening_ret</span><span class="p">(</span><span class="n">SPY</span><span class="p">)</span>
|
||||
<span class="n">DJIA_open_ret</span> <span class="o">=</span> <span class="n">calculate_opening_ret</span><span class="p">(</span><span class="n">DJIA</span><span class="p">)</span>
|
||||
<span class="n">RUSS_open_ret</span> <span class="o">=</span> <span class="n">calculate_opening_ret</span><span class="p">(</span><span class="n">RUSS</span><span class="p">)</span>
|
||||
<span class="n">NASDAQ_open_ret</span> <span class="o">=</span> <span class="n">calculate_opening_ret</span><span class="p">(</span><span class="n">NASDAQ</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">signs_match</span><span class="p">(</span><span class="n">list_1</span><span class="p">,</span> <span class="n">list_2</span><span class="p">):</span>
|
||||
<span class="c1"># This is a surprisingly difficult task - we have to match</span>
|
||||
<span class="c1"># up the dates in order to check if opening returns actually match</span>
|
||||
<span class="n">index_dict_dt</span> <span class="o">=</span> <span class="p">{</span><span class="n">key</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">():</span> <span class="n">list_2</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">list_2</span><span class="o">.</span><span class="n">keys</span><span class="p">()}</span>
|
||||
|
||||
<span class="n">matches</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">list_1</span><span class="o">.</span><span class="n">iterrows</span><span class="p">():</span>
|
||||
<span class="n">row_dt</span> <span class="o">=</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">row_value</span> <span class="o">=</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">index_dt</span> <span class="o">=</span> <span class="n">datetime</span><span class="p">(</span><span class="n">row_dt</span><span class="o">.</span><span class="n">year</span><span class="p">,</span> <span class="n">row_dt</span><span class="o">.</span><span class="n">month</span><span class="p">,</span> <span class="n">row_dt</span><span class="o">.</span><span class="n">day</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">index_dt</span> <span class="ow">in</span> <span class="n">list_2</span><span class="p">:</span>
|
||||
<span class="n">index_value</span> <span class="o">=</span> <span class="n">list_2</span><span class="p">[</span><span class="n">index_dt</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">row_value</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">index_value</span> <span class="o">></span> <span class="mi">0</span><span class="p">)</span> <span class="ow">or</span> \
|
||||
<span class="p">(</span><span class="n">row_value</span> <span class="o"><</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">index_value</span> <span class="o"><</span> <span class="mi">0</span><span class="p">)</span> <span class="ow">or</span> \
|
||||
<span class="p">(</span><span class="n">row_value</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">index_value</span> <span class="o">==</span> <span class="mi">0</span><span class="p">):</span>
|
||||
<span class="n">matches</span> <span class="o">+=</span> <span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">matches</span> <span class="o">+=</span> <span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="c1">#print("{}".format(list_2[index_dt]))</span>
|
||||
<span class="k">return</span> <span class="n">matches</span>
|
||||
|
||||
|
||||
<span class="n">prediction_dict</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">matches_dict</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">count_dict</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">index_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"SPY"</span><span class="p">:</span> <span class="n">SPY_open_ret</span><span class="p">,</span> <span class="s2">"DJIA"</span><span class="p">:</span> <span class="n">DJIA_open_ret</span><span class="p">,</span> <span class="s2">"RUSS"</span><span class="p">:</span> <span class="n">RUSS_open_ret</span><span class="p">,</span> <span class="s2">"NASDAQ"</span><span class="p">:</span> <span class="n">NASDAQ_open_ret</span><span class="p">}</span>
|
||||
<span class="n">indices</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"SPY"</span><span class="p">,</span> <span class="s2">"DJIA"</span><span class="p">,</span> <span class="s2">"RUSS"</span><span class="p">,</span> <span class="s2">"NASDAQ"</span><span class="p">]</span>
|
||||
<span class="n">futures</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"Crude"</span><span class="p">,</span> <span class="s2">"Gold"</span><span class="p">,</span> <span class="s2">"DOW"</span><span class="p">,</span> <span class="s2">"NASDAQ"</span><span class="p">,</span> <span class="s2">"S&P"</span><span class="p">]</span>
|
||||
<span class="k">for</span> <span class="n">index</span> <span class="ow">in</span> <span class="n">indices</span><span class="p">:</span>
|
||||
<span class="n">matches_dict</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">future</span><span class="p">:</span> <span class="n">signs_match</span><span class="p">(</span><span class="n">article_df</span><span class="p">[[</span><span class="n">future</span><span class="p">,</span> <span class="s1">'date'</span><span class="p">]],</span>
|
||||
<span class="n">index_dict</span><span class="p">[</span><span class="n">index</span><span class="p">])</span> <span class="k">for</span> <span class="n">future</span> <span class="ow">in</span> <span class="n">futures</span><span class="p">}</span>
|
||||
<span class="n">count_dict</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">future</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">matches_dict</span><span class="p">[</span><span class="n">index</span><span class="p">][</span><span class="n">future</span><span class="p">])</span> <span class="k">for</span> <span class="n">future</span> <span class="ow">in</span> <span class="n">futures</span><span class="p">}</span>
|
||||
<span class="n">prediction_dict</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">future</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">matches_dict</span><span class="p">[</span><span class="n">index</span><span class="p">][</span><span class="n">future</span><span class="p">])</span>
|
||||
<span class="k">for</span> <span class="n">future</span> <span class="ow">in</span> <span class="n">futures</span><span class="p">}</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Articles Checked: "</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">count_dict</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Prediction Accuracy:"</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">prediction_dict</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
<div class="output_subarea output_stream output_stdout output_text">
|
||||
<pre>Articles Checked:
|
||||
DJIA NASDAQ RUSS SPY
|
||||
Crude 268 268 271 271
|
||||
DOW 268 268 271 271
|
||||
Gold 268 268 271 271
|
||||
NASDAQ 268 268 271 271
|
||||
S&P 268 268 271 271
|
||||
|
||||
Prediction Accuracy:
|
||||
DJIA NASDAQ RUSS SPY
|
||||
Crude 0.544776 0.522388 0.601476 0.590406
|
||||
DOW 0.611940 0.604478 0.804428 0.841328
|
||||
Gold 0.462687 0.455224 0.464945 0.476015
|
||||
NASDAQ 0.615672 0.608209 0.797048 0.830258
|
||||
S&P 0.604478 0.597015 0.811808 0.848708
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>This data is very interesting. Some insights:</p>
|
||||
<ul>
|
||||
<li>Both DOW and NASDAQ futures are pretty bad at predicting their actual market openings</li>
|
||||
<li>NASDAQ and Dow are fairly unpredictable; Russell 2000 and S&P are very predictable</li>
|
||||
<li>Gold is a poor predictor in general - intuitively Gold should move inverse to the market, but it appears to be about as accurate as a coin flip.</li>
|
||||
</ul>
|
||||
<p>All said though it appears that futures data is important for determining market direction for both the S&P 500 and Russell 2000. Cramer is half-right: futures data isn't very helpful for the Dow and NASDAQ indices, but is great for the S&P and Russell indices.</p>
|
||||
<h1 id="The-next-step---Predicting-the-close">The next step - Predicting the close<a class="anchor-link" href="#The-next-step---Predicting-the-close">¶</a></h1><p>Given the code we currently have, I'd like to predict the close of the market as well. We can re-use most of the code, so let's see what happens:</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [6]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">calculate_closing_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span>
|
||||
<span class="c1"># I'm not a huge fan of the appending for loop,</span>
|
||||
<span class="c1"># but it's a bit verbose for a comprehension</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">frame</span><span class="p">)):</span>
|
||||
<span class="n">date</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">name</span>
|
||||
<span class="n">open_val</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="s1">'Open'</span><span class="p">]</span>
|
||||
<span class="n">close_val</span> <span class="o">=</span> <span class="n">frame</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="s1">'Close'</span><span class="p">]</span>
|
||||
<span class="n">data</span><span class="p">[</span><span class="n">date</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">close_val</span> <span class="o">-</span> <span class="n">open_val</span><span class="p">)</span> <span class="o">/</span> <span class="n">open_val</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">data</span>
|
||||
|
||||
<span class="n">SPY_close_ret</span> <span class="o">=</span> <span class="n">calculate_closing_ret</span><span class="p">(</span><span class="n">SPY</span><span class="p">)</span>
|
||||
<span class="n">DJIA_close_ret</span> <span class="o">=</span> <span class="n">calculate_closing_ret</span><span class="p">(</span><span class="n">DJIA</span><span class="p">)</span>
|
||||
<span class="n">RUSS_close_ret</span> <span class="o">=</span> <span class="n">calculate_closing_ret</span><span class="p">(</span><span class="n">RUSS</span><span class="p">)</span>
|
||||
<span class="n">NASDAQ_close_ret</span> <span class="o">=</span> <span class="n">calculate_closing_ret</span><span class="p">(</span><span class="n">NASDAQ</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">signs_match</span><span class="p">(</span><span class="n">list_1</span><span class="p">,</span> <span class="n">list_2</span><span class="p">):</span>
|
||||
<span class="c1"># This is a surprisingly difficult task - we have to match</span>
|
||||
<span class="c1"># up the dates in order to check if opening returns actually match</span>
|
||||
<span class="n">index_dict_dt</span> <span class="o">=</span> <span class="p">{</span><span class="n">key</span><span class="o">.</span><span class="n">to_datetime</span><span class="p">():</span> <span class="n">list_2</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="k">for</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">list_2</span><span class="o">.</span><span class="n">keys</span><span class="p">()}</span>
|
||||
|
||||
<span class="n">matches</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">list_1</span><span class="o">.</span><span class="n">iterrows</span><span class="p">():</span>
|
||||
<span class="n">row_dt</span> <span class="o">=</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">row_value</span> <span class="o">=</span> <span class="n">row</span><span class="p">[</span><span class="mi">1</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">index_dt</span> <span class="o">=</span> <span class="n">datetime</span><span class="p">(</span><span class="n">row_dt</span><span class="o">.</span><span class="n">year</span><span class="p">,</span> <span class="n">row_dt</span><span class="o">.</span><span class="n">month</span><span class="p">,</span> <span class="n">row_dt</span><span class="o">.</span><span class="n">day</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">index_dt</span> <span class="ow">in</span> <span class="n">list_2</span><span class="p">:</span>
|
||||
<span class="n">index_value</span> <span class="o">=</span> <span class="n">list_2</span><span class="p">[</span><span class="n">index_dt</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">row_value</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">index_value</span> <span class="o">></span> <span class="mi">0</span><span class="p">)</span> <span class="ow">or</span> \
|
||||
<span class="p">(</span><span class="n">row_value</span> <span class="o"><</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">index_value</span> <span class="o"><</span> <span class="mi">0</span><span class="p">)</span> <span class="ow">or</span> \
|
||||
<span class="p">(</span><span class="n">row_value</span> <span class="o">==</span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">index_value</span> <span class="o">==</span> <span class="mi">0</span><span class="p">):</span>
|
||||
<span class="n">matches</span> <span class="o">+=</span> <span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">matches</span> <span class="o">+=</span> <span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="c1">#print("{}".format(list_2[index_dt]))</span>
|
||||
<span class="k">return</span> <span class="n">matches</span>
|
||||
|
||||
|
||||
<span class="n">matches_dict</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">count_dict</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">prediction_dict</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="n">index_dict</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"SPY"</span><span class="p">:</span> <span class="n">SPY_close_ret</span><span class="p">,</span> <span class="s2">"DJIA"</span><span class="p">:</span> <span class="n">DJIA_close_ret</span><span class="p">,</span>
|
||||
<span class="s2">"RUSS"</span><span class="p">:</span> <span class="n">RUSS_close_ret</span><span class="p">,</span> <span class="s2">"NASDAQ"</span><span class="p">:</span> <span class="n">NASDAQ_close_ret</span><span class="p">}</span>
|
||||
<span class="n">indices</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"SPY"</span><span class="p">,</span> <span class="s2">"DJIA"</span><span class="p">,</span> <span class="s2">"RUSS"</span><span class="p">,</span> <span class="s2">"NASDAQ"</span><span class="p">]</span>
|
||||
<span class="n">futures</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"Crude"</span><span class="p">,</span> <span class="s2">"Gold"</span><span class="p">,</span> <span class="s2">"DOW"</span><span class="p">,</span> <span class="s2">"NASDAQ"</span><span class="p">,</span> <span class="s2">"S&P"</span><span class="p">]</span>
|
||||
<span class="k">for</span> <span class="n">index</span> <span class="ow">in</span> <span class="n">indices</span><span class="p">:</span>
|
||||
<span class="n">matches_dict</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">future</span><span class="p">:</span> <span class="n">signs_match</span><span class="p">(</span><span class="n">article_df</span><span class="p">[[</span><span class="n">future</span><span class="p">,</span> <span class="s1">'date'</span><span class="p">]],</span>
|
||||
<span class="n">index_dict</span><span class="p">[</span><span class="n">index</span><span class="p">])</span> <span class="k">for</span> <span class="n">future</span> <span class="ow">in</span> <span class="n">futures</span><span class="p">}</span>
|
||||
<span class="n">count_dict</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">future</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="n">matches_dict</span><span class="p">[</span><span class="n">index</span><span class="p">][</span><span class="n">future</span><span class="p">])</span> <span class="k">for</span> <span class="n">future</span> <span class="ow">in</span> <span class="n">futures</span><span class="p">}</span>
|
||||
<span class="n">prediction_dict</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span><span class="n">future</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">matches_dict</span><span class="p">[</span><span class="n">index</span><span class="p">][</span><span class="n">future</span><span class="p">])</span>
|
||||
<span class="k">for</span> <span class="n">future</span> <span class="ow">in</span> <span class="n">futures</span><span class="p">}</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Articles Checked:"</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">count_dict</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Prediction Accuracy:"</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">prediction_dict</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
<div class="output_subarea output_stream output_stdout output_text">
|
||||
<pre>Articles Checked:
|
||||
DJIA NASDAQ RUSS SPY
|
||||
Crude 268 268 271 271
|
||||
DOW 268 268 271 271
|
||||
Gold 268 268 271 271
|
||||
NASDAQ 268 268 271 271
|
||||
S&P 268 268 271 271
|
||||
|
||||
Prediction Accuracy:
|
||||
DJIA NASDAQ RUSS SPY
|
||||
Crude 0.533582 0.529851 0.501845 0.542435
|
||||
DOW 0.589552 0.608209 0.535055 0.535055
|
||||
Gold 0.455224 0.451493 0.483395 0.512915
|
||||
NASDAQ 0.582090 0.626866 0.531365 0.538745
|
||||
S&P 0.585821 0.608209 0.535055 0.535055
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>Well, it appears that the futures data is terrible at predicting market close. NASDAQ predicting NASDAQ is the most interesting data point, but 63% accuracy isn't accurate enough to make money consistently.</p>
|
||||
<h1 id="Final-sentiments">Final sentiments<a class="anchor-link" href="#Final-sentiments">¶</a></h1><p>The data bears out very close to what I expected would happen:</p>
|
||||
<ul>
|
||||
<li>Futures data is more accurate than a coin flip for predicting openings, which makes sense since it is recorded only 3 hours before the actual opening</li>
|
||||
<li>Futures data is about as acccurate as a coin flip for predicting closings, which means there is no money to be made in trying to predict the market direction for the day given the futures data.</li>
|
||||
</ul>
|
||||
<p>In summary:</p>
|
||||
<ul>
|
||||
<li>Cramer is half right: Futures data is not good for predicting the market open of the Dow and NASDAQ indices. Contrary to Cramer though, it is very good for predicting the S&P and Russell indices - we can achieve an accuracy slightly over 80% for each. </li>
|
||||
<li>Making money in the market is hard. We can't just go to the futures and treat them as an oracle for where the market will close.</li>
|
||||
</ul>
|
||||
<p>I hope you've enjoyed this, I quite enjoyed taking a deep dive in the analytics this way. I'll be posting more soon!</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div></p>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
</script>
|
||||
@ -81,6 +512,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
<script async src='https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML'></script>
|
||||
|
||||
|
||||
<div class="comments">
|
||||
<div id="disqus_thread"></div>
|
||||
<script type="text/javascript">
|
||||
var disqus_shortname = 'bradleespeice';
|
||||
var disqus_identifier = 'testing-cramer.html';
|
||||
var disqus_url = 'https://bspeice.github.io/testing-cramer.html';
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script>
|
||||
<noscript>Please enable JavaScript to view the comments.</noscript>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<!-- /Content -->
|
||||
@ -92,6 +537,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
<div class="footer-title"></div>
|
||||
<ul class="list-unstyled">
|
||||
<li><a href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate"></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
|
File diff suppressed because it is too large
Load Diff
562
tick-tock.html
562
tick-tock.html
@ -4,20 +4,22 @@
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="description" content="{% notebook 2016-4-6-tick-tock....ipynb %} //MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); MathJax.Hub.Config({tex2jax: {inlineMath: [['\$','\$']]}});">
|
||||
<meta name="description" content="If all we have is a finite number of heartbeats left, what about me? Warning: this one is a bit creepier. But that's what you get when you come up with data science ideas as you're drifting off to ...">
|
||||
<meta name="keywords" content="fitbit, heartrate">
|
||||
<link rel="icon" href="/favicon.ico">
|
||||
<link rel="icon" href="https://bspeice.github.io/favicon.ico">
|
||||
|
||||
<title>Tick Tock... - Bradlee Speice</title>
|
||||
|
||||
<!-- Stylesheets -->
|
||||
<link href="/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="/theme/css/pygment.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/pygment.css" rel="stylesheet">
|
||||
<!-- /Stylesheets -->
|
||||
|
||||
<!-- RSS Feeds -->
|
||||
<link href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Full Atom Feed" />
|
||||
<link href="https://bspeice.github.io/feeds/blog.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Categories Atom Feed" />
|
||||
<!-- /RSS Feeds -->
|
||||
|
||||
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
||||
@ -26,6 +28,17 @@
|
||||
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<!-- Google Analytics -->
|
||||
<script>
|
||||
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-74711362-1', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script>
|
||||
<!-- /Google Analytics -->
|
||||
|
||||
|
||||
</head>
|
||||
@ -39,7 +52,7 @@
|
||||
<div class="container">
|
||||
<div class="header-nav">
|
||||
<div class="header-logo">
|
||||
<a class="pull-left" href="/"><img class="mr20" src="/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
<a class="pull-left" href="https://bspeice.github.io/"><img class="mr20" src="https://bspeice.github.io/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
</div>
|
||||
<div class="nav pull-right">
|
||||
</div>
|
||||
@ -54,12 +67,12 @@
|
||||
<div class="col-lg-12">
|
||||
<div class="header-content">
|
||||
<h1 class="header-title">Tick Tock...</h1>
|
||||
<p class="header-date"> <a href="/author/bradlee-speice.html">Bradlee Speice</a>, Wed 06 April 2016, <a href="/category/blog.html">Blog</a></p>
|
||||
<p class="header-date"> <a href="https://bspeice.github.io/author/bradlee-speice.html">Bradlee Speice</a>, Wed 06 April 2016, <a href="https://bspeice.github.io/category/blog.html">Blog</a></p>
|
||||
<div class="header-underline"></div>
|
||||
<div class="clearfix"></div>
|
||||
<p class="pull-right header-tags">
|
||||
<span class="glyphicon glyphicon-tags mr5" aria-hidden="true"></span>
|
||||
<a href="/tag/fitbit.html">fitbit</a>, <a href="/tag/heartrate.html">heartrate</a> </p>
|
||||
<a href="https://bspeice.github.io/tag/fitbit.html">fitbit</a>, <a href="https://bspeice.github.io/tag/heartrate.html">heartrate</a> </p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -73,7 +86,521 @@
|
||||
|
||||
<!-- Content -->
|
||||
<div class="container content">
|
||||
<p>{% notebook 2016-4-6-tick-tock....ipynb %}</p>
|
||||
<p>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>If all we have is a finite number of heartbeats left, what about me?</p>
|
||||
<hr>
|
||||
<p>Warning: this one is a bit creepier. But that's what you get when you come up with data science ideas as you're drifting off to sleep.</p>
|
||||
<h1 id="2.5-Billion">2.5 Billion<a class="anchor-link" href="#2.5-Billion">¶</a></h1><p>If <a href="http://www.pbs.org/wgbh/nova/heart/heartfacts.html">PBS</a> is right, that's the total number of heartbeats we get. Approximately once every second that number goes down, and down, and down again...</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [1]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">total_heartbeats</span> <span class="o">=</span> <span class="mi">2500000000</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>I got a Fitbit this past Christmas season, mostly because I was interested in the data and trying to work on some data science projects with it. This is going to be the first project, but there will likely be more (and not nearly as morbid). My idea was: If this is the final number that I'm running up against, how far have I come, and how far am I likely to go? I've currently had about 3 months' time to estimate what my data will look like, so let's go ahead and see: given a lifetime 2.5 billion heart beats, how much time do I have left?</p>
|
||||
<h1 id="Statistical-Considerations">Statistical Considerations<a class="anchor-link" href="#Statistical-Considerations">¶</a></h1><p>Since I'm starting to work with health data, there are a few considerations I think are important before I start digging through my data.</p>
|
||||
<ol>
|
||||
<li>The concept of 2.5 billion as an agreed-upon number is tenuous at best. I've seen anywhere from <a href="http://gizmodo.com/5982977/how-many-heartbeats-does-each-species-get-in-a-lifetime">2.21 billion</a> to <a href="http://wonderopolis.org/wonder/how-many-times-does-your-heart-beat-in-a-lifetime/">3.4 billion</a> so even if I knew exactly how many times my heart had beaten so far, the ending result is suspect at best. I'm using 2.5 billion because that seems to be about the midpoint of the estimates I've seen so far.</li>
|
||||
<li>Most of the numbers I've seen so far are based on extrapolating number of heart beats from life expectancy. As life expectancy goes up, the number of expected heart beats goes up too.</li>
|
||||
<li>My estimation of the number of heartbeats in my life so far is based on 3 months worth of data, and I'm extrapolating an entire lifetime based on this.</li>
|
||||
</ol>
|
||||
<p>So while the ending number is <strong>not useful in any medical context</strong>, it is still an interesting project to work with the data I have on hand.</p>
|
||||
<h1 id="Getting-the-data">Getting the data<a class="anchor-link" href="#Getting-the-data">¶</a></h1><p><a href="https://www.fitbit.com/">Fitbit</a> has an <a href="https://dev.fitbit.com/">API available</a> for people to pull their personal data off the system. It requires registering an application, authentication with OAuth, and some other complicated things. <strong>If you're not interested in how I fetch the data, skip <a href="#Wild-Extrapolations-from-Small-Data">here</a></strong>.</p>
|
||||
<h2 id="Registering-an-application">Registering an application<a class="anchor-link" href="#Registering-an-application">¶</a></h2><p>I've already <a href="https://dev.fitbit.com/apps/new">registered a personal application</a> with Fitbit, so I can go ahead and retrieve things like the client secret from a file.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [2]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="c1"># Import all the OAuth secret information from a local file</span>
|
||||
<span class="kn">from</span> <span class="nn">secrets</span> <span class="k">import</span> <span class="n">CLIENT_SECRET</span><span class="p">,</span> <span class="n">CLIENT_ID</span><span class="p">,</span> <span class="n">CALLBACK_URL</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h2 id="Handling-OAuth-2">Handling OAuth 2<a class="anchor-link" href="#Handling-OAuth-2">¶</a></h2><p>So, all the people that know what OAuth 2 is know what's coming next. For those who don't: OAuth is how people allow applications to access other data without having to know your password. Essentially the dialog goes like this:</p>
|
||||
|
||||
<pre><code>Application: I've got a user here who wants to use my application, but I need their data.
|
||||
Fitbit: OK, what data do you need access to, and for how long?
|
||||
Application: I need all of these scopes, and for this amount of time.
|
||||
Fitbit: OK, let me check with the user to make sure they really want to do this.
|
||||
|
||||
Fitbit: User, do you really want to let this application have your data?
|
||||
User: I do! And to prove it, here's my password.
|
||||
Fitbit: OK, everything checks out. I'll let the application access your data.
|
||||
|
||||
Fitbit: Application, you can access the user's data. Use this special value whenever you need to request data from me.
|
||||
Application: Thank you, now give me all the data.</code></pre>
|
||||
<p>Effectively, this allows an application to gain access to a user's data without ever needing to know the user's password. That way, even if the other application is hacked, the user's original data remains safe. Plus, the user can let the data service know to stop providing the application access any time they want. All in all, very secure.</p>
|
||||
<p>It does make handling small requests a bit challenging, but I'll go through the steps here. We'll be using the <a href="https://dev.fitbit.com/docs/oauth2/">Implicit Grant</a> workflow, as it requires fewer steps in processing.</p>
|
||||
<p>First, we need to set up the URL the user would visit to authenticate:</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [3]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">urllib</span>
|
||||
|
||||
<span class="n">FITBIT_URI</span> <span class="o">=</span> <span class="s1">'https://www.fitbit.com/oauth2/authorize'</span>
|
||||
<span class="n">params</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="c1"># If we need more than one scope, must be a CSV string</span>
|
||||
<span class="s1">'scope'</span><span class="p">:</span> <span class="s1">'heartrate'</span><span class="p">,</span>
|
||||
<span class="s1">'response_type'</span><span class="p">:</span> <span class="s1">'token'</span><span class="p">,</span>
|
||||
<span class="s1">'expires_in'</span><span class="p">:</span> <span class="mi">86400</span><span class="p">,</span> <span class="c1"># 1 day</span>
|
||||
<span class="s1">'redirect_uri'</span><span class="p">:</span> <span class="n">CALLBACK_URL</span><span class="p">,</span>
|
||||
<span class="s1">'client_id'</span><span class="p">:</span> <span class="n">CLIENT_ID</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">request_url</span> <span class="o">=</span> <span class="n">FITBIT_URI</span> <span class="o">+</span> <span class="s1">'?'</span> <span class="o">+</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">urlencode</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>Now, here you would print out the request URL, go visit it, and get the full URL that it sends you back to. Because that is very sensitive information (specifically containing my <code>CLIENT_ID</code> that I'd really rather not share on the internet), I've skipped that step in the code here, but it happens in the background.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [6]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="c1"># The `response_url` variable contains the full URL that</span>
|
||||
<span class="c1"># FitBit sent back to us, but most importantly,</span>
|
||||
<span class="c1"># contains the token we need for authorization.</span>
|
||||
<span class="n">access_token</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">parse_qsl</span><span class="p">(</span><span class="n">response_url</span><span class="p">))[</span><span class="s1">'access_token'</span><span class="p">]</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h2 id="Requesting-the-data">Requesting the data<a class="anchor-link" href="#Requesting-the-data">¶</a></h2><p>Now that we've actually set up our access via the <code>access_token</code>, it's time to get the actual <a href="https://dev.fitbit.com/docs/heart-rate/">heart rate data</a>. I'll be using data from January 1, 2016 through March 31, 2016, and extrapolating wildly from that.</p>
|
||||
<p>Fitbit only lets us fetch intraday data one day at a time, so I'll create a date range using pandas and iterate through that to pull down all the data.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [7]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">from</span> <span class="nn">requests_oauthlib</span> <span class="k">import</span> <span class="n">OAuth2Session</span>
|
||||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="k">import</span> <span class="n">datetime</span>
|
||||
|
||||
<span class="n">session</span> <span class="o">=</span> <span class="n">OAuth2Session</span><span class="p">(</span><span class="n">token</span><span class="o">=</span><span class="p">{</span>
|
||||
<span class="s1">'access_token'</span><span class="p">:</span> <span class="n">access_token</span><span class="p">,</span>
|
||||
<span class="s1">'token_type'</span><span class="p">:</span> <span class="s1">'Bearer'</span>
|
||||
<span class="p">})</span>
|
||||
|
||||
<span class="n">format_str</span> <span class="o">=</span> <span class="s1">'%Y-%m-</span><span class="si">%d</span><span class="s1">'</span>
|
||||
<span class="n">start_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="p">(</span><span class="mi">2016</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">end_date</span> <span class="o">=</span> <span class="n">datetime</span><span class="p">(</span><span class="mi">2016</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">31</span><span class="p">)</span>
|
||||
<span class="n">dr</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="n">start_date</span><span class="p">,</span> <span class="n">end_date</span><span class="p">)</span>
|
||||
|
||||
<span class="n">url</span> <span class="o">=</span> <span class="s1">'https://api.fitbit.com/1/user/-/activities/heart/date/{0}/1d/1min.json'</span>
|
||||
<span class="n">hr_responses</span> <span class="o">=</span> <span class="p">[</span><span class="n">session</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">d</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="n">format_str</span><span class="p">)))</span> <span class="k">for</span> <span class="n">d</span> <span class="ow">in</span> <span class="n">dr</span><span class="p">]</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">record_to_df</span><span class="p">(</span><span class="n">record</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="s1">'activities-heart'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">record</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
<span class="n">date_str</span> <span class="o">=</span> <span class="n">record</span><span class="p">[</span><span class="s1">'activities-heart'</span><span class="p">][</span><span class="mi">0</span><span class="p">][</span><span class="s1">'dateTime'</span><span class="p">]</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">record</span><span class="p">[</span><span class="s1">'activities-heart-intraday'</span><span class="p">][</span><span class="s1">'dataset'</span><span class="p">])</span>
|
||||
|
||||
<span class="n">df</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="s1">'time'</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span>
|
||||
<span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">datetime</span><span class="o">.</span><span class="n">strptime</span><span class="p">(</span><span class="n">date_str</span> <span class="o">+</span> <span class="s1">' '</span> <span class="o">+</span> <span class="n">x</span><span class="p">,</span> <span class="s1">'%Y-%m-</span><span class="si">%d</span><span class="s1"> %H:%M:%S'</span><span class="p">))</span>
|
||||
<span class="k">return</span> <span class="n">df</span>
|
||||
|
||||
<span class="n">hr_dataframes</span> <span class="o">=</span> <span class="p">[</span><span class="n">record_to_df</span><span class="p">(</span><span class="n">record</span><span class="o">.</span><span class="n">json</span><span class="p">())</span> <span class="k">for</span> <span class="n">record</span> <span class="ow">in</span> <span class="n">hr_responses</span><span class="p">]</span>
|
||||
<span class="n">hr_df_concat</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">(</span><span class="n">hr_dataframes</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="c1"># There are some minutes with missing data, so we need to correct that</span>
|
||||
<span class="n">full_daterange</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="n">hr_df_concat</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span>
|
||||
<span class="n">hr_df_concat</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">],</span>
|
||||
<span class="n">freq</span><span class="o">=</span><span class="s1">'min'</span><span class="p">)</span>
|
||||
<span class="n">hr_df_full</span> <span class="o">=</span> <span class="n">hr_df_concat</span><span class="o">.</span><span class="n">reindex</span><span class="p">(</span><span class="n">full_daterange</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'nearest'</span><span class="p">)</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Heartbeats from {} to {}: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">hr_df_full</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span>
|
||||
<span class="n">hr_df_full</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">],</span>
|
||||
<span class="n">hr_df_full</span><span class="p">[</span><span class="s1">'value'</span><span class="p">]</span><span class="o">.</span><span class="n">sum</span><span class="p">()))</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
<div class="output_subarea output_stream output_stdout output_text">
|
||||
<pre>Heartbeats from 2016-01-01 00:00:00 to 2016-03-31 23:59:00: 8139060
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>And now we've retrieved all the available heart rate data for January 1<sup>st</sup> through March 31<sup>st</sup>! Let's get to the actual analysis.</p>
|
||||
<h1 id="Wild-Extrapolations-from-Small-Data">Wild Extrapolations from Small Data<a class="anchor-link" href="#Wild-Extrapolations-from-Small-Data">¶</a></h1><p>A fundamental issue of this data is that it's pretty small. I'm using 3 months of data to make predictions about my entire life. But, purely as an exercise, I'll move forward.</p>
|
||||
<h2 id="How-many-heartbeats-so-far?">How many heartbeats so far?<a class="anchor-link" href="#How-many-heartbeats-so-far?">¶</a></h2><p>The first step is figuring out how many of the 2.5 billion heartbeats I've used so far. We're going to try and work backward from the present day to when I was born to get that number. The easy part comes first: going back to January 1<sup>st</sup>, 1992. That's because I can generalize how many 3-month increments there were between now and then, account for leap years, and call that section done.</p>
|
||||
<p>Between January 1992 and January 2016 there were 96 quarters, and 6 leap days. The number we're looking for is:</p>
|
||||
\begin{equation}
|
||||
hr_q \cdot n - hr_d \cdot (n-m)
|
||||
\end{equation}<ul>
|
||||
<li>$hr_q$: Number of heartbeats per quarter</li>
|
||||
<li>$hr_d$: Number of heartbeats on leap day</li>
|
||||
<li>$n$: Number of quarters, in this case 96</li>
|
||||
<li>$m$: Number of leap days, in this case 6</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [8]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">quarterly_count</span> <span class="o">=</span> <span class="n">hr_df_full</span><span class="p">[</span><span class="s1">'value'</span><span class="p">]</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="n">leap_day_count</span> <span class="o">=</span> <span class="n">hr_df_full</span><span class="p">[(</span><span class="n">hr_df_full</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">month</span> <span class="o">==</span> <span class="mi">2</span><span class="p">)</span> <span class="o">&</span>
|
||||
<span class="p">(</span><span class="n">hr_df_full</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">day</span> <span class="o">==</span> <span class="mi">29</span><span class="p">)][</span><span class="s1">'value'</span><span class="p">]</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="n">num_quarters</span> <span class="o">=</span> <span class="mi">96</span>
|
||||
<span class="n">leap_days</span> <span class="o">=</span> <span class="mi">6</span>
|
||||
|
||||
<span class="n">jan_92_jan_16</span> <span class="o">=</span> <span class="n">quarterly_count</span> <span class="o">*</span> <span class="n">num_quarters</span> <span class="o">-</span> <span class="n">leap_day_count</span> <span class="o">*</span> <span class="p">(</span><span class="n">num_quarters</span> <span class="o">-</span> <span class="n">leap_days</span><span class="p">)</span>
|
||||
<span class="n">jan_92_jan_16</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt output_prompt">Out[8]:</div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea output_execute_result">
|
||||
<pre>773609400</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>So between January 1992 and January 2016 I've used $\approx$ 774 million heartbeats. Now, I need to go back to my exact birthday. I'm going to first find on average how many heartbeats I use in a minute, and multiply that by the number of minutes between my birthday and January 1992.</p>
|
||||
<p>For privacy purposes I'll put the code here that I'm using, but without any identifying information:</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [9]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">minute_mean</span> <span class="o">=</span> <span class="n">hr_df_full</span><span class="p">[</span><span class="s1">'value'</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
|
||||
<span class="c1"># Don't you wish you knew?</span>
|
||||
<span class="c1"># birthday_minutes = ???</span>
|
||||
|
||||
<span class="n">birthday_heartbeats</span> <span class="o">=</span> <span class="n">birthday_minutes</span> <span class="o">*</span> <span class="n">minute_mean</span>
|
||||
|
||||
<span class="n">heartbeats_until_2016</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">birthday_heartbeats</span> <span class="o">+</span> <span class="n">jan_92_jan_16</span><span class="p">)</span>
|
||||
<span class="n">remaining_2016</span> <span class="o">=</span> <span class="n">total_heartbeats</span> <span class="o">-</span> <span class="n">heartbeats_until_2016</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Heartbeats so far: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">heartbeats_until_2016</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Remaining heartbeats: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">remaining_2016</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
<div class="output_subarea output_stream output_stdout output_text">
|
||||
<pre>Heartbeats so far: 775804660
|
||||
Remaining heartbeats: 1724195340
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>It would appear that my heart has beaten 775,804,660 times between my moment of birth and January 1<sup>st</sup> 2016, and that I have 1.72 billion left.</p>
|
||||
<h2 id="How-many-heartbeats-longer?">How many heartbeats longer?<a class="anchor-link" href="#How-many-heartbeats-longer?">¶</a></h2><p>Now comes the tricky bit. I know how many heart beats I've used so far, and how many I have remaining, so I'd like to come up with a (relatively) accurate estimate of when exactly my heart should give out. We'll do this in a few steps, increasing in granularity.</p>
|
||||
<p>First step, how many heartbeats do I use in a 4-year period? I have data for a single quarter including leap day, so I want to know:</p>
|
||||
\begin{equation}
|
||||
hr_q \cdot n - hr_d \cdot (n - m)
|
||||
\end{equation}<ul>
|
||||
<li>$hr_q$: Heartbeats per quarter</li>
|
||||
<li>$hr_d$: Heartbeats per leap day</li>
|
||||
<li>$n$: Number of quarters = 16</li>
|
||||
<li>$m$: Number of leap days = 1</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [10]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">heartbeats_4year</span> <span class="o">=</span> <span class="n">quarterly_count</span> <span class="o">*</span> <span class="mi">16</span> <span class="o">-</span> <span class="n">leap_day_count</span> <span class="o">*</span> <span class="p">(</span><span class="mi">16</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">heartbeats_4year</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt output_prompt">Out[10]:</div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea output_execute_result">
|
||||
<pre>128934900</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>Now, I can fast forward from 2016 the number of periods of 4 years I have left.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [11]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">four_year_periods</span> <span class="o">=</span> <span class="n">remaining_2016</span> <span class="o">//</span> <span class="n">heartbeats_4year</span>
|
||||
<span class="n">remaining_4y</span> <span class="o">=</span> <span class="n">remaining_2016</span> <span class="o">-</span> <span class="n">four_year_periods</span> <span class="o">*</span> <span class="n">heartbeats_4year</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Four year periods remaining: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">four_year_periods</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Remaining heartbeats after 4 year periods: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">remaining_4y</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
<div class="output_subarea output_stream output_stdout output_text">
|
||||
<pre>Four year periods remaining: 13
|
||||
Remaining heartbeats after 4 year periods: 48041640
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>Given that there are 13 four-year periods left, I can move from 2016 all the way to 2068, and find that I will have 48 million heart beats left. Let's drop down to figuring out how many quarters that is. I know that 2068 will have a leap day (unless someone finally decides to get rid of them), so I'll subtract that out first. Then, I'm left to figure out how many quarters exactly are left.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [12]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">remaining_leap</span> <span class="o">=</span> <span class="n">remaining_4y</span> <span class="o">-</span> <span class="n">leap_day_count</span>
|
||||
<span class="c1"># Ignore leap day in the data set</span>
|
||||
<span class="n">heartbeats_quarter</span> <span class="o">=</span> <span class="n">hr_df_full</span><span class="p">[(</span><span class="n">hr_df_full</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">month</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">)</span> <span class="o">&</span>
|
||||
<span class="p">(</span><span class="n">hr_df_full</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">day</span> <span class="o">!=</span> <span class="mi">29</span><span class="p">)][</span><span class="s1">'value'</span><span class="p">]</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="n">quarters_left</span> <span class="o">=</span> <span class="n">remaining_leap</span> <span class="o">//</span> <span class="n">heartbeats_quarter</span>
|
||||
<span class="n">remaining_year</span> <span class="o">=</span> <span class="n">remaining_leap</span> <span class="o">-</span> <span class="n">quarters_left</span> <span class="o">*</span> <span class="n">heartbeats_quarter</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Quarters left starting 2068: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">quarters_left</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Remaining heartbeats after that: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">remaining_year</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
<div class="output_subarea output_stream output_stdout output_text">
|
||||
<pre>Quarters left starting 2068: 8
|
||||
Remaining heartbeats after that: 4760716
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>So, that analysis gets me through until January 1<sup>st</sup> 2070. Final step, using that minute estimate to figure out how many minutes past that I'm predicted to have:</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [13]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">from</span> <span class="nn">datetime</span> <span class="k">import</span> <span class="n">timedelta</span>
|
||||
|
||||
<span class="n">base</span> <span class="o">=</span> <span class="n">datetime</span><span class="p">(</span><span class="mi">2070</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">minutes_left</span> <span class="o">=</span> <span class="n">remaining_year</span> <span class="o">//</span> <span class="n">minute_mean</span>
|
||||
|
||||
<span class="n">kaput</span> <span class="o">=</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">minutes</span><span class="o">=</span><span class="n">minutes_left</span><span class="p">)</span>
|
||||
<span class="n">base</span> <span class="o">+</span> <span class="n">kaput</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt output_prompt">Out[13]:</div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea output_execute_result">
|
||||
<pre>datetime.datetime(2070, 2, 23, 5, 28)</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>According to this, I've got until February 23<sup>rd</sup>, 2070 at 5:28 PM in the evening before my heart gives out.</p>
|
||||
<h1 id="Summary">Summary<a class="anchor-link" href="#Summary">¶</a></h1><p>Well, that's kind of a creepy date to know. As I said at the top though, <strong>this number is totally useless in any medical context</strong>. It ignores the rate at which we continue to get better at making people live longer, and is extrapolating from 3 months' worth of data the rest of my life. Additionally, throughout my time developing this post I made many minor mistakes. I think they're all fixed now, but it's easy to mix a number up here or there and the analysis gets thrown off by a couple years.</p>
|
||||
<p>Even still, I think philosophically humans have a desire to know how much time we have left in the world. <a href="https://www.biblegateway.com/passage/?search=psalm+144&version=ESV">Man is but a breath</a>, and it's scary to think just how quickly that date may be coming up. This analysis asks an important question though: what are you going to do with the time you have left?</p>
|
||||
<p>Thanks for sticking with me on this one, I promise it will be much less depressing next time!</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div></p>
|
||||
<script type="text/x-mathjax-config">
|
||||
//MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
MathJax.Hub.Config({tex2jax: {inlineMath: [['\$','\$']]}});
|
||||
@ -82,6 +609,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['\$','\$']]}});
|
||||
<script async src='https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML'></script>
|
||||
|
||||
|
||||
<div class="comments">
|
||||
<div id="disqus_thread"></div>
|
||||
<script type="text/javascript">
|
||||
var disqus_shortname = 'bradleespeice';
|
||||
var disqus_identifier = 'tick-tock.html';
|
||||
var disqus_url = 'https://bspeice.github.io/tick-tock.html';
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script>
|
||||
<noscript>Please enable JavaScript to view the comments.</noscript>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<!-- /Content -->
|
||||
@ -93,6 +634,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['\$','\$']]}});
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
<div class="footer-title"></div>
|
||||
<ul class="list-unstyled">
|
||||
<li><a href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate"></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
|
@ -4,20 +4,22 @@
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="description" content="{% notebook 2016-3-28-tweet-like-me.ipynb %} MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});">
|
||||
<meta name="description" content="An experiment in creating a robot that will imitate me on Twitter. So, I'm taking a Machine Learning course this semester in school, and one of the topics we keep coming back to is natural ...">
|
||||
<meta name="keywords" content="MCMC, twitter">
|
||||
<link rel="icon" href="/favicon.ico">
|
||||
<link rel="icon" href="https://bspeice.github.io/favicon.ico">
|
||||
|
||||
<title>Tweet Like Me - Bradlee Speice</title>
|
||||
|
||||
<!-- Stylesheets -->
|
||||
<link href="/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="/theme/css/pygment.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/pygment.css" rel="stylesheet">
|
||||
<!-- /Stylesheets -->
|
||||
|
||||
<!-- RSS Feeds -->
|
||||
<link href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Full Atom Feed" />
|
||||
<link href="https://bspeice.github.io/feeds/blog.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Categories Atom Feed" />
|
||||
<!-- /RSS Feeds -->
|
||||
|
||||
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
||||
@ -26,6 +28,17 @@
|
||||
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<!-- Google Analytics -->
|
||||
<script>
|
||||
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-74711362-1', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script>
|
||||
<!-- /Google Analytics -->
|
||||
|
||||
|
||||
</head>
|
||||
@ -39,7 +52,7 @@
|
||||
<div class="container">
|
||||
<div class="header-nav">
|
||||
<div class="header-logo">
|
||||
<a class="pull-left" href="/"><img class="mr20" src="/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
<a class="pull-left" href="https://bspeice.github.io/"><img class="mr20" src="https://bspeice.github.io/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
</div>
|
||||
<div class="nav pull-right">
|
||||
</div>
|
||||
@ -54,12 +67,12 @@
|
||||
<div class="col-lg-12">
|
||||
<div class="header-content">
|
||||
<h1 class="header-title">Tweet Like Me</h1>
|
||||
<p class="header-date"> <a href="/author/bradlee-speice.html">Bradlee Speice</a>, Mon 28 March 2016, <a href="/category/blog.html">Blog</a></p>
|
||||
<p class="header-date"> <a href="https://bspeice.github.io/author/bradlee-speice.html">Bradlee Speice</a>, Mon 28 March 2016, <a href="https://bspeice.github.io/category/blog.html">Blog</a></p>
|
||||
<div class="header-underline"></div>
|
||||
<div class="clearfix"></div>
|
||||
<p class="pull-right header-tags">
|
||||
<span class="glyphicon glyphicon-tags mr5" aria-hidden="true"></span>
|
||||
<a href="/tag/mcmc.html">MCMC</a>, <a href="/tag/twitter.html">twitter</a> </p>
|
||||
<a href="https://bspeice.github.io/tag/mcmc.html">MCMC</a>, <a href="https://bspeice.github.io/tag/twitter.html">twitter</a> </p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -73,7 +86,577 @@
|
||||
|
||||
<!-- Content -->
|
||||
<div class="container content">
|
||||
<p>{% notebook 2016-3-28-tweet-like-me.ipynb %}</p>
|
||||
<p>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>An experiment in creating a robot that will imitate me on Twitter.</p>
|
||||
<hr>
|
||||
<p>So, I'm taking a Machine Learning course this semester in school, and one of the topics we keep coming back to is natural language processing and the 'bag of words' data structure. That is, given a sentence:</p>
|
||||
<p><code>How much wood would a woodchuck chuck if a woodchuck could chuck wood?</code></p>
|
||||
<p>We can represent that sentence as the following list:</p>
|
||||
<p><code>{
|
||||
How: 1
|
||||
much: 1
|
||||
wood: 2
|
||||
would: 2
|
||||
a: 2
|
||||
woodchuck: 2
|
||||
chuck: 2
|
||||
if: 1
|
||||
}</code></p>
|
||||
<p>Ignoring <em>where</em> the words happened, we're just interested in how <em>often</em> the words occurred. That got me thinking: I wonder what would happen if I built a robot that just imitated how often I said things? It's dangerous territory when computer scientists ask "what if," but I got curious enough I wanted to follow through.</p>
|
||||
<h2 id="The-Objective">The Objective<a class="anchor-link" href="#The-Objective">¶</a></h2><p>Given an input list of Tweets, build up the following things:</p>
|
||||
<ol>
|
||||
<li>The distribution of starting words; since there are no "prior" words to go from, we need to treat this as a special case.</li>
|
||||
<li>The distribution of words given a previous word; for example, every time I use the word <code>woodchuck</code> in the example sentence, there is a 50% chance it is followed by <code>chuck</code> and a 50% chance it is followed by <code>could</code>. I need this distribution for all words.</li>
|
||||
<li>The distribution of quantity of hashtags; Do I most often use just one? Two? Do they follow something like a Poisson distribution?</li>
|
||||
<li>Distribution of hashtags; Given a number of hashtags, what is the actual content? I'll treat hashtags as separate from the content of a tweet.</li>
|
||||
</ol>
|
||||
<h2 id="The-Data">The Data<a class="anchor-link" href="#The-Data">¶</a></h2><p>I'm using as input my tweet history. I don't really use Twitter anymore, but it seems like a fun use of the dataset. I'd like to eventually build this to a point where I can imitate anyone on Twitter using their last 100 tweets or so, but I'll start with this as example code.</p>
|
||||
<h2 id="The-Algorithm">The Algorithm<a class="anchor-link" href="#The-Algorithm">¶</a></h2><p>I'll be using the <a href="http://www.nltk.org/">NLTK</a> library for doing a lot of the heavy lifting. First, let's import the data:</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [1]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
||||
|
||||
<span class="n">tweets</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">'tweets.csv'</span><span class="p">)</span>
|
||||
<span class="n">text</span> <span class="o">=</span> <span class="n">tweets</span><span class="o">.</span><span class="n">text</span>
|
||||
|
||||
<span class="c1"># Don't include tweets in reply to or mentioning people</span>
|
||||
<span class="n">replies</span> <span class="o">=</span> <span class="n">text</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">contains</span><span class="p">(</span><span class="s1">'@'</span><span class="p">)</span>
|
||||
<span class="n">text_norep</span> <span class="o">=</span> <span class="n">text</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="o">~</span><span class="n">replies</span><span class="p">]</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>And now that we've got data, let's start crunching. First, tokenize and build out the distribution of first word:</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [2]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">from</span> <span class="nn">nltk.tokenize</span> <span class="k">import</span> <span class="n">TweetTokenizer</span>
|
||||
<span class="n">tknzr</span> <span class="o">=</span> <span class="n">TweetTokenizer</span><span class="p">()</span>
|
||||
<span class="n">tokens</span> <span class="o">=</span> <span class="n">text_norep</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">tknzr</span><span class="o">.</span><span class="n">tokenize</span><span class="p">)</span>
|
||||
|
||||
<span class="n">first_words</span> <span class="o">=</span> <span class="n">tokens</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||||
<span class="n">first_words_alpha</span> <span class="o">=</span> <span class="n">first_words</span><span class="p">[</span><span class="n">first_words</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">isalpha</span><span class="p">()]</span>
|
||||
<span class="n">first_word_dist</span> <span class="o">=</span> <span class="n">first_words_alpha</span><span class="o">.</span><span class="n">value_counts</span><span class="p">()</span> <span class="o">/</span> <span class="nb">len</span><span class="p">(</span><span class="n">first_words_alpha</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>Next, we need to build out the conditional distributions. That is, what is the probability of the next word given the current word is $X$? This one is a bit more involved. First, find all unique words, and then find what words proceed them. This can probably be done in a more efficient manner than I'm currently doing here, but we'll ignore that for the moment.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [3]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">from</span> <span class="nn">functools</span> <span class="k">import</span> <span class="n">reduce</span>
|
||||
|
||||
<span class="c1"># Get all possible words</span>
|
||||
<span class="n">all_words</span> <span class="o">=</span> <span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span><span class="o">+</span><span class="n">y</span><span class="p">,</span> <span class="n">tokens</span><span class="p">,</span> <span class="p">[])</span>
|
||||
<span class="n">unique_words</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">all_words</span><span class="p">)</span>
|
||||
<span class="n">actual_words</span> <span class="o">=</span> <span class="nb">set</span><span class="p">([</span><span class="n">x</span> <span class="k">if</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">!=</span> <span class="s1">'.'</span> <span class="k">else</span> <span class="kc">None</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">unique_words</span><span class="p">])</span>
|
||||
|
||||
<span class="n">word_dist</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">for</span> <span class="n">word</span> <span class="ow">in</span> <span class="nb">iter</span><span class="p">(</span><span class="n">actual_words</span><span class="p">):</span>
|
||||
<span class="n">indices</span> <span class="o">=</span> <span class="p">[</span><span class="n">i</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">all_words</span><span class="p">)</span> <span class="k">if</span> <span class="n">j</span> <span class="o">==</span> <span class="n">word</span><span class="p">]</span>
|
||||
<span class="n">proceeding</span> <span class="o">=</span> <span class="p">[</span><span class="n">all_words</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">indices</span><span class="p">]</span>
|
||||
<span class="n">word_dist</span><span class="p">[</span><span class="n">word</span><span class="p">]</span> <span class="o">=</span> <span class="n">proceeding</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>Now that we've got the tweet analysis done, it's time for the fun part: hashtags! Let's count how many hashtags are in each tweet, I want to get a sense of the distribution.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [4]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
|
||||
<span class="o">%</span><span class="k">matplotlib</span> inline
|
||||
|
||||
<span class="n">hashtags</span> <span class="o">=</span> <span class="n">text_norep</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="s1">'#'</span><span class="p">)</span>
|
||||
<span class="n">bins</span> <span class="o">=</span> <span class="n">hashtags</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
|
||||
<span class="n">hashtags</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">kind</span><span class="o">=</span><span class="s1">'hist'</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="n">bins</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt output_prompt">Out[4]:</div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea output_execute_result">
|
||||
<pre><matplotlib.axes._subplots.AxesSubplot at 0x18e59dc28d0></pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
|
||||
|
||||
<div class="output_png output_subarea ">
|
||||
<img src="
|
||||
AAALEgAACxIB0t1+/AAAEe1JREFUeJzt3X+s3XV9x/HnCzqRinadjt6NosA0CGYOUSsJM7tmG4pG
|
||||
YFuGuGkEMmOCTheThZZsazXZBOOcbguJUWYqw7CCIpi5UQi7Li5KmYKixdpkFrHQC1MHogQB3/vj
|
||||
fGsP9X7KObf33HNu7/ORnPT7/dzv95x3v/32vO7n8/2VqkKSpLkcNu4CJEmTy5CQJDUZEpKkJkNC
|
||||
ktRkSEiSmgwJSVLTyEMiya4kX01ye5JtXdvqJFuT7EhyY5JVfctvSLIzyV1Jzhh1fZKktsXoSfwU
|
||||
mK6ql1TVuq5tPXBzVZ0I3AJsAEhyMnAucBJwJnB5kixCjZKkOSxGSGSOzzkb2NxNbwbO6abPAq6u
|
||||
qserahewE1iHJGksFiMkCrgpyW1J/qRrW1NVswBVtQc4ums/Brinb93dXZskaQxWLMJnnF5V9yX5
|
||||
ZWBrkh30gqOf9waRpAk08pCoqvu6Px9I8hl6w0ezSdZU1WySKeD+bvHdwLF9q6/t2p4kiaEiSfNQ
|
||||
VUMd5x3pcFOSlUmO6qafAZwB3AncAJzfLfYW4Ppu+gbgvCRPS3I88Hxg21zvXVW+qti4cePYa5iU
|
||||
l9vCbeG2OPBrPkbdk1gDXNf95r8CuKqqtib5b2BLkguBu+md0URVbU+yBdgOPAZcVPP9m0mSDtpI
|
||||
Q6Kqvg2cMkf794HfaazzPuB9o6xLkjQYr7he4qanp8ddwsRwW+zjttjHbXFwshRHc5I4CiVJQ0pC
|
||||
TdKBa0nS0mZISJKaDAlJUpMhIUlqMiQkSU2GhCSpyZCQJDUZEpKkJkNCktS0rEJiauo4kgz1mpo6
|
||||
btxlS9LYLKvbcvQelz3sepn3LXYlaZJ4Ww5J0oIyJCRJTYaEJKnJkJAkNRkSkqQmQ0KS1GRISJKa
|
||||
DAlJUpMhIUlqMiQkSU2GhCSpyZCQJDUZEpKkJkNCktRkSEiSmgwJSVKTISFJajIkJElNhoQkqcmQ
|
||||
kCQ1GRKSpCZDQpLUZEhIkpoWJSSSHJbkK0lu6OZXJ9maZEeSG5Os6lt2Q5KdSe5KcsZi1CdJmtti
|
||||
9STeBWzvm18P3FxVJwK3ABsAkpwMnAucBJwJXJ4ki1SjJGk/Iw+JJGuB1wIf62s+G9jcTW8Gzumm
|
||||
zwKurqrHq2oXsBNYN+oaJUlzW4yexN8Bfw5UX9uaqpoFqKo9wNFd+zHAPX3L7e7aJEljsGKUb57k
|
||||
dcBsVd2RZPoAi9YBfjanTZs2/Wx6enqa6ekDvb0kLT8zMzPMzMwc1Hukaujv58HfPPkb4E3A48CR
|
||||
wDOB64CXAdNVNZtkCviPqjopyXqgquqybv1/BzZW1a37vW/Np+7e4Y1h1wuj3EaStFiSUFVDHecd
|
||||
6XBTVV1SVc+tqhOA84BbqurNwGeB87vF3gJc303fAJyX5GlJjgeeD2wbZY2SpLaRDjcdwKXAliQX
|
||||
AnfTO6OJqtqeZAu9M6EeAy6aV5dBkrQgRjrcNCoON0nS8CZuuEmStLQZEpKkJkNCktRkSEiSmgwJ
|
||||
SVKTISFJajIkJElNhoQkqcmQkCQ1GRKSpCZDQpLUZEhIkpoMCUlSkyEhSWoyJCRJTYaEJKnJkJAk
|
||||
NY3r8aUH7b3vfe9Qy69cuXJElUjSoWvJPr4U/nKodY444goeffRefHyppOVqPo8vXcIhMVzdq1ad
|
||||
xoMP3oohIWm58hnXkqQFZUhIkpoMCUlSkyEhSWoyJCRJTYaEJKnJkJAkNRkSkqQmQ0KS1GRISJKa
|
||||
DAlJUpMhIUlqMiQkSU2GhCSpyZCQJDWNNCSSHJHk1iS3J7kzycaufXWSrUl2JLkxyaq+dTYk2Znk
|
||||
riRnjLI+SdKBjTQkqupR4FVV9RLgFODMJOuA9cDNVXUicAuwASDJycC5wEnAmcDlSYZ6QIYkaeGM
|
||||
fLipqn7cTR5B75naBZwNbO7aNwPndNNnAVdX1eNVtQvYCawbdY2SpLkNFBJJfn2+H5DksCS3A3uA
|
||||
m6rqNmBNVc0CVNUe4Ohu8WOAe/pW3921SZLGYNCexOVJtiW5qP/4wSCq6qfdcNNaYF2SF/HzD5r2
|
||||
IdKSNIFWDLJQVb0yyQuAC4EvJ9kGfLyqbhr0g6rqoSQzwGuA2SRrqmo2yRRwf7fYbuDYvtXWdm1z
|
||||
2NQ3Pd29JEl7zczMMDMzc1DvkarBf4lPcji94wd/DzwEBLikqj7dWP45wGNV9WCSI4EbgUuB3wK+
|
||||
X1WXJbkYWF1V67sD11cBr6A3zHQT8ILar8gkNWznY9Wq03jwwVsZvtMShtlGkjSpklBVQ50MNFBP
|
||||
IsmLgQuA19H74n59VX0lya8CXwTmDAngV4DNSQ6jN7T1L1X1uSRfArYkuRC4m94ZTVTV9iRbgO3A
|
||||
Y8BF+weEJGnxDNSTSPJ54GPAtVX1yH4/e3NVXTmi+lr12JOQpCGNrCdBrwfxSFU90X3QYcDTq+rH
|
||||
ix0QkqTFM+jZTTcDR/bNr+zaJEmHsEFD4ulV9fDemW565WhKkiRNikFD4kdJTt07k+SlwCMHWF6S
|
||||
dAgY9JjEnwHXJLmX3mmvU8AbRlaVJGkiDHox3W1JXgic2DXtqKrHRleWJGkSDNqTAHg5cFy3zqnd
|
||||
qVSfGElVkqSJMOjFdFcCvwbcATzRNRdgSEjSIWzQnsTLgJO9+lmSlpdBz276Or2D1ZKkZWTQnsRz
|
||||
gO3d3V8f3dtYVWeNpCpJ0kQYNCQ2jbIISdJkGvQU2M8neR6923bfnGQlcPhoS5Mkjdugjy99K3At
|
||||
8JGu6RjgM6MqSpI0GQY9cP124HR6Dxqiqnay77nUkqRD1KAh8WhV/WTvTJIV+FxqSTrkDRoSn09y
|
||||
CXBkkt8FrgE+O7qyJEmTYNCQWA88ANwJvA34HPAXoypKkjQZBnp86aTx8aWSNLyRPb40ybeZ49u1
|
||||
qk4Y5sMkSUvLMPdu2uvpwB8Cv7Tw5UiSJslAxySq6nt9r91V9SHgdSOuTZI0ZoMON53aN3sYvZ7F
|
||||
MM+ikCQtQYN+0f9t3/TjwC7g3AWvRpI0UQa9d9OrRl2IJGnyDDrc9O4D/byqPrgw5UiSJskwZze9
|
||||
HLihm389sA3YOYqiJEmTYdCQWAucWlU/BEiyCfjXqnrTqAqTJI3foLflWAP8pG/+J12bJOkQNmhP
|
||||
4hPAtiTXdfPnAJtHU5IkaVIMenbTXyf5N+CVXdMFVXX76MqSJE2CQYebAFYCD1XVh4HvJjl+RDVJ
|
||||
kibEoI8v3QhcDGzomn4B+OdRFSVJmgyD9iR+DzgL+BFAVd0LPHNURUmSJsOgIfGT6j1UoQCSPGN0
|
||||
JUmSJsWgIbElyUeAX0zyVuBm4KOjK0uSNAkGvVX4B4BrgU8BJwJ/VVX/8FTrJVmb5JYk30hyZ5J3
|
||||
du2rk2xNsiPJjUlW9a2zIcnOJHclOWN+fy1J0kJ4yseXJjkcuHk+N/lLMgVMVdUdSY4CvgycDVwA
|
||||
fK+q3p/kYmB1Va1PcjJwFb1bgKyl12N5Qe1XpI8vlaThzefxpU/Zk6iqJ4Cf9v+2P6iq2lNVd3TT
|
||||
DwN30fvyP5t9F+NtpndxHvQOjl9dVY9X1S5694ZaN+znSpIWxqBXXD8M3JnkJroznACq6p2DflCS
|
||||
44BTgC8Ba6pqtnuPPUmO7hY7Bvhi32q7uzZJ0hgMGhKf7l7z0g01XQu8q6oe7g0XPYnjOZI0gQ4Y
|
||||
EkmeW1Xfqap536cpyQp6AXFlVV3fNc8mWVNVs91xi/u79t3AsX2rr+3a5rCpb3q6e0mS9pqZmWFm
|
||||
Zuag3uOAB66TfKWqTu2mP1VVfzD0BySfAP63qt7d13YZ8P2quqxx4PoV9IaZbsID15K0IOZz4Pqp
|
||||
hpv63+yEeRR0OvDH9I5n3E7vG/oS4DJ6115cCNxN97zsqtqeZAuwHXgMuGj/gJAkLZ6nColqTA+k
|
||||
qv4LOLzx499prPM+4H3DfpYkaeE9VUj8RpKH6PUojuym6earqp410uokSWN1wJCoqlYvQJK0DAzz
|
||||
PAlJ0jJjSEiSmgwJSVKTISFJajIkJElNhoQkqcmQkCQ1GRKSpCZDQpLUZEhIkpoMCUlSkyEhSWoy
|
||||
JCRJTYaEJKnJkJAkNRkSkqQmQ0KS1GRISJKaDAlJUpMhIUlqMiQkSU2GhCSpyZCQJDUZEpKkJkNC
|
||||
ktRkSEiSmgwJSVKTISFJajIkJElNhoQkqcmQkCQ1GRJP6QiSDP2amjpu3IVL0kFbMe4CJt+jQA29
|
||||
1uxsFr4USVpk9iQkSU0jDYkkVySZTfK1vrbVSbYm2ZHkxiSr+n62IcnOJHclOWOUtUmSntqoexIf
|
||||
B169X9t64OaqOhG4BdgAkORk4FzgJOBM4PIkjtlI0hiNNCSq6gvAD/ZrPhvY3E1vBs7pps8Crq6q
|
||||
x6tqF7ATWDfK+iRJBzaOYxJHV9UsQFXtAY7u2o8B7ulbbnfXJkkak0k4u2n4U4cA2NQ3Pd29JEl7
|
||||
zczMMDMzc1DvMY6QmE2ypqpmk0wB93ftu4Fj+5Zb27U1bBpVfZJ0SJienmZ6evpn8+95z3uGfo/F
|
||||
GG5K99rrBuD8bvotwPV97ecleVqS44HnA9sWoT5JUsNIexJJPklvHOjZSb4DbAQuBa5JciFwN70z
|
||||
mqiq7Um2ANuBx4CLqmqeQ1GSpIWQpfg9nKSGPZSxatVpPPjgrQx/CCTzWKe33lLctpIOXUmoqqEu
|
||||
LfCKa0lSkyEhSWoyJCRJTYaEJKnJkJAkNRkSkqQmQ0KS1GRISJKaDAlJUpMhIUlqMiQkSU2GhCSp
|
||||
yZCQJDUZEpKkJkNCktRkSEiSmgwJSVKTISFJajIkJElNhoQkqcmQkCQ1GRKSpCZDQpLUZEhIkpoM
|
||||
CUlSkyExMkeQZKjX1NRx4y5akp5kxbgLOHQ9CtRQa8zOZjSlSNI82ZOQJDUZEpKkJkNCktRkSEiS
|
||||
mgwJSVKTISFJajIkJElNhoQkqWkiQyLJa5J8M8m3klw87nokabmauJBIchjwj8CrgRcBb0zywvFW
|
||||
NblmZmbGXcLEcFvs47bYx21xcCYuJIB1wM6quruqHgOuBs4ec00Ty/8A+7gt9nFb7OO2ODiTGBLH
|
||||
APf0zX+3a9McPvCBDw19I0FvJihpUEv2Bn/Petbrh1r+kUe+OaJKFlLvzrHDG+5GguDNBKVRmJo6
|
||||
jtnZu4daZ82a57Fnz67RFLQAUjX8F8woJTkN2FRVr+nm1wNVVZf1LTNZRUvSElFVQ/2GOIkhcTiw
|
||||
A/ht4D5gG/DGqrprrIVJ0jI0ccNNVfVEkncAW+kdM7nCgJCk8Zi4noQkaXJM4tlNB+SFdvsk2ZXk
|
||||
q0luT7Jt3PUspiRXJJlN8rW+ttVJtibZkeTGJKvGWeNiaWyLjUm+m+Qr3es146xxsSRZm+SWJN9I
|
||||
cmeSd3bty2rfmGM7/GnXPvR+saR6Et2Fdt+id7ziXuA24LyqWgqnLi24JP8DvLSqfjDuWhZbkt8E
|
||||
HgY+UVUv7touA75XVe/vfoFYXVXrx1nnYmhsi43AD6vqg2MtbpElmQKmquqOJEcBX6Z3ndUFLKN9
|
||||
4wDb4Q0MuV8stZ6EF9o9WVh6/4YLoqq+AOwfjmcDm7vpzcA5i1rUmDS2BfT2j2WlqvZU1R3d9MPA
|
||||
XcBaltm+0dgOe683G2q/WGpfMF5o92QF3JTktiRvHXcxE+DoqpqF3n8S4Ogx1zNu70hyR5KPHerD
|
||||
K3NJchxwCvAlYM1y3Tf6tsOtXdNQ+8VSCwk92elVdSrwWuDt3bCD9lk6Y6kL73LghKo6BdgDLLdh
|
||||
p6OAa4F3db9J778vLIt9Y47tMPR+sdRCYjfw3L75tV3bslRV93V/PgBcR284bjmbTbIGfjYme/+Y
|
||||
6xmbqnqg9h1w/Cjw8nHWs5iSrKD3xXhlVV3fNS+7fWOu7TCf/WKphcRtwPOTPC/J04DzgBvGXNNY
|
||||
JFnZ/ZZAkmcAZwBfH29Viy48eXz1BuD8bvotwPX7r3AIe9K26L4I9/p9lte+8U/A9qr6cF/bctw3
|
||||
fm47zGe/WFJnN0HvFFjgw+y70O7SMZc0FkmOp9d7KHoXRV61nLZFkk8C08CzgVlgI/AZ4BrgWOBu
|
||||
4Nyq+r9x1bhYGtviVfTGoX8K7ALetndM/lCW5HTgP4E76f3fKOASendu2MIy2TcOsB3+iCH3iyUX
|
||||
EpKkxbPUhpskSYvIkJAkNRkSkqQmQ0KS1GRISJKaDAlJUpMhIUlqMiQkSU3/DzepYDZSwMuQAAAA
|
||||
AElFTkSuQmCC
|
||||
"
|
||||
>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>That looks like a Poisson distribution, kind of as I expected. I'm guessing my number of hashtags per tweet is $\sim Poi(1)$, but let's actually find the <a href="https://en.wikipedia.org/wiki/Poisson_distribution#Maximum_likelihood">most likely estimator</a> which in this case is just $\bar{\lambda}$:</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [5]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">mle</span> <span class="o">=</span> <span class="n">hashtags</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
|
||||
<span class="n">mle</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt output_prompt">Out[5]:</div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea output_execute_result">
|
||||
<pre>0.870236869207003</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>Pretty close! So we can now simulate how many hashtags are in a tweet. Let's also find what hashtags are actually used:</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [6]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">hashtags</span> <span class="o">=</span> <span class="p">[</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">all_words</span> <span class="k">if</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'#'</span><span class="p">]</span>
|
||||
<span class="n">n_hashtags</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">hashtags</span><span class="p">)</span>
|
||||
|
||||
<span class="n">unique_hashtags</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">set</span><span class="p">([</span><span class="n">x</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">unique_words</span> <span class="k">if</span> <span class="n">x</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s1">'#'</span><span class="p">]))</span>
|
||||
<span class="n">hashtag_dist</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">'hashtags'</span><span class="p">:</span> <span class="n">unique_hashtags</span><span class="p">,</span>
|
||||
<span class="s1">'prob'</span><span class="p">:</span> <span class="p">[</span><span class="n">all_words</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">h</span><span class="p">)</span> <span class="o">/</span> <span class="n">n_hashtags</span>
|
||||
<span class="k">for</span> <span class="n">h</span> <span class="ow">in</span> <span class="n">unique_hashtags</span><span class="p">]})</span>
|
||||
<span class="nb">len</span><span class="p">(</span><span class="n">hashtag_dist</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt output_prompt">Out[6]:</div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea output_execute_result">
|
||||
<pre>603</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>Turns out I have used 603 different hashtags during my time on Twitter. That means I was using a unique hashtag for about every third tweet.</p>
|
||||
<p>In better news though, we now have all the data we need to go about actually constructing tweets! The process will happen in a few steps:</p>
|
||||
<ol>
|
||||
<li>Randomly select what the first word will be.</li>
|
||||
<li>Randomly select the number of hashtags for this tweet, and then select the actual hashtags.</li>
|
||||
<li>Fill in the remaining space of 140 characters with random words taken from my tweets.</li>
|
||||
</ol>
|
||||
<p>And hopefully, we won't have anything too crazy come out the other end. The way we do the selection follows a <a href="https://en.wikipedia.org/wiki/Multinomial_distribution">Multinomial Distribution</a>: given a lot of different values with specific probability, pick one. Let's give a quick example:</p>
|
||||
|
||||
<pre><code>x: .33
|
||||
y: .5
|
||||
z: .17</code></pre>
|
||||
<p>That is, I pick <code>x</code> with probability 33%, <code>y</code> with probability 50%, and so on. In context of our sentence construction, I've built out the probabilities of specific words already - now I just need to simulate that distribution. Time for the engine to actually be developed!</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [7]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">multinom_sim</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="n">vals</span><span class="p">,</span> <span class="n">probs</span><span class="p">):</span>
|
||||
<span class="n">occurrences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">multinomial</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="n">probs</span><span class="p">)</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="n">occurrences</span> <span class="o">*</span> <span class="n">vals</span>
|
||||
<span class="k">return</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">results</span><span class="p">[</span><span class="n">results</span> <span class="o">!=</span> <span class="s1">''</span><span class="p">])</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">sim_n_hashtags</span><span class="p">(</span><span class="n">hashtag_freq</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">poisson</span><span class="p">(</span><span class="n">hashtag_freq</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">sim_hashtags</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="n">hashtag_dist</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">multinom_sim</span><span class="p">(</span><span class="n">n</span><span class="p">,</span> <span class="n">hashtag_dist</span><span class="o">.</span><span class="n">hashtags</span><span class="p">,</span> <span class="n">hashtag_dist</span><span class="o">.</span><span class="n">prob</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">sim_first_word</span><span class="p">(</span><span class="n">first_word_dist</span><span class="p">):</span>
|
||||
<span class="n">probs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span><span class="p">(</span><span class="n">first_word_dist</span><span class="o">.</span><span class="n">values</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">multinom_sim</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">first_word_dist</span><span class="o">.</span><span class="n">reset_index</span><span class="p">()[</span><span class="s1">'index'</span><span class="p">],</span> <span class="n">probs</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">sim_next_word</span><span class="p">(</span><span class="n">current</span><span class="p">,</span> <span class="n">word_dist</span><span class="p">):</span>
|
||||
<span class="n">dist</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">word_dist</span><span class="p">[</span><span class="n">current</span><span class="p">])</span>
|
||||
<span class="n">probs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">dist</span><span class="p">))</span> <span class="o">/</span> <span class="nb">len</span><span class="p">(</span><span class="n">dist</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">multinom_sim</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">dist</span><span class="p">,</span> <span class="n">probs</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h2 id="Pulling-it-all-together">Pulling it all together<a class="anchor-link" href="#Pulling-it-all-together">¶</a></h2><p>I've now built out all the code I need to actually simulate a sentence written by me. Let's try doing an example with five words and a single hashtag:</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [8]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">first</span> <span class="o">=</span> <span class="n">sim_first_word</span><span class="p">(</span><span class="n">first_word_dist</span><span class="p">)</span>
|
||||
<span class="n">second</span> <span class="o">=</span> <span class="n">sim_next_word</span><span class="p">(</span><span class="n">first</span><span class="p">,</span> <span class="n">word_dist</span><span class="p">)</span>
|
||||
<span class="n">third</span> <span class="o">=</span> <span class="n">sim_next_word</span><span class="p">(</span><span class="n">second</span><span class="p">,</span> <span class="n">word_dist</span><span class="p">)</span>
|
||||
<span class="n">fourth</span> <span class="o">=</span> <span class="n">sim_next_word</span><span class="p">(</span><span class="n">third</span><span class="p">,</span> <span class="n">word_dist</span><span class="p">)</span>
|
||||
<span class="n">fifth</span> <span class="o">=</span> <span class="n">sim_next_word</span><span class="p">(</span><span class="n">fourth</span><span class="p">,</span> <span class="n">word_dist</span><span class="p">)</span>
|
||||
<span class="n">hashtag</span> <span class="o">=</span> <span class="n">sim_hashtags</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">hashtag_dist</span><span class="p">)</span>
|
||||
|
||||
<span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">((</span><span class="n">first</span><span class="p">,</span> <span class="n">second</span><span class="p">,</span> <span class="n">third</span><span class="p">,</span> <span class="n">fourth</span><span class="p">,</span> <span class="n">fifth</span><span class="p">,</span> <span class="n">hashtag</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt output_prompt">Out[8]:</div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea output_execute_result">
|
||||
<pre>'My first all-nighter of friends #oldschool'</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>Let's go ahead and put everything together! We're going to simulate a first word, simulate the hashtags, and then simulate to fill the gap until we've either taken up all the space or reached a period.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [9]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">simulate_tweet</span><span class="p">():</span>
|
||||
<span class="n">chars_remaining</span> <span class="o">=</span> <span class="mi">140</span>
|
||||
<span class="n">first</span> <span class="o">=</span> <span class="n">sim_first_word</span><span class="p">(</span><span class="n">first_word_dist</span><span class="p">)</span>
|
||||
<span class="n">n_hash</span> <span class="o">=</span> <span class="n">sim_n_hashtags</span><span class="p">(</span><span class="n">mle</span><span class="p">)</span>
|
||||
<span class="n">hashtags</span> <span class="o">=</span> <span class="n">sim_hashtags</span><span class="p">(</span><span class="n">n_hash</span><span class="p">,</span> <span class="n">hashtag_dist</span><span class="p">)</span>
|
||||
|
||||
<span class="n">chars_remaining</span> <span class="o">-=</span> <span class="nb">len</span><span class="p">(</span><span class="n">first</span><span class="p">)</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">hashtags</span><span class="p">)</span>
|
||||
|
||||
<span class="n">tweet</span> <span class="o">=</span> <span class="n">first</span>
|
||||
<span class="n">current</span> <span class="o">=</span> <span class="n">first</span>
|
||||
<span class="k">while</span> <span class="n">chars_remaining</span> <span class="o">></span> <span class="nb">len</span><span class="p">(</span><span class="n">tweet</span><span class="p">)</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">hashtags</span><span class="p">)</span> <span class="ow">and</span> <span class="n">current</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">!=</span> <span class="s1">'.'</span> <span class="ow">and</span> <span class="n">current</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">!=</span> <span class="s1">'!'</span><span class="p">:</span>
|
||||
<span class="n">current</span> <span class="o">=</span> <span class="n">sim_next_word</span><span class="p">(</span><span class="n">current</span><span class="p">,</span> <span class="n">word_dist</span><span class="p">)</span>
|
||||
<span class="n">tweet</span> <span class="o">+=</span> <span class="s1">' '</span> <span class="o">+</span> <span class="n">current</span>
|
||||
|
||||
<span class="n">tweet</span> <span class="o">=</span> <span class="n">tweet</span><span class="p">[:</span><span class="o">-</span><span class="mi">2</span><span class="p">]</span> <span class="o">+</span> <span class="n">tweet</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
|
||||
<span class="k">return</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">((</span><span class="n">tweet</span><span class="p">,</span> <span class="n">hashtags</span><span class="p">))</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h2 id="The-results">The results<a class="anchor-link" href="#The-results">¶</a></h2><p>And now for something completely different: twenty random tweets dreamed up by my computer and my Twitter data. Here you go:</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [12]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">20</span><span class="p">):</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">simulate_tweet</span><span class="p">())</span>
|
||||
<span class="nb">print</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
<div class="output_subarea output_stream output_stdout output_text">
|
||||
<pre>Also , I'm at 8 this morning. #thursdaysgohard #ornot
|
||||
|
||||
Turns out of us breathe the code will want to my undergraduate career is becoming more night trying ? Religion is now as a chane #HYPE
|
||||
|
||||
You know what recursion is to review the UNCC. #ornot
|
||||
|
||||
There are really sore 3 bonfires in my first writing the library ground floor if awesome. #realtalk #impressed
|
||||
|
||||
So we can make it out there's nothing but I'm not let us so hot I could think I may be good. #SwingDance
|
||||
|
||||
Happy Christmas , at Harris Teeter to be be godly or Roman Catholic ). #4b392b#4b392b #Isaiah26
|
||||
|
||||
For context , I in the most decisive factor of the same for homework. #accomplishment
|
||||
|
||||
Freaking done. #loveyouall
|
||||
|
||||
New blog post : Don't jump in a quiz in with a knife fight. #haskell #earlybirthday
|
||||
|
||||
God shows me legitimately want to get some food and one day.
|
||||
|
||||
Stormed the queen city. #mindblown
|
||||
|
||||
The day of a cold at least outside right before the semester ..
|
||||
|
||||
Finished with the way back. #winners
|
||||
|
||||
Waking up , OJ , I feel like Nick Jonas today.
|
||||
|
||||
First draft of so hard drive. #humansvszombies
|
||||
|
||||
Eric Whitacre is the wise creation.
|
||||
|
||||
Ethics paper first , music in close to everyone who just be posting up with my sin , and Jerry Springr #TheLittleThings
|
||||
|
||||
Love that you know enough time I've eaten at 8 PM. #deepthoughts #stillblownaway
|
||||
|
||||
Lead. #ThinkingTooMuch #Christmas
|
||||
|
||||
Aamazing conference when you married #DepartmentOfRedundancyDepartment Yep , but there's a legitimate challenge.
|
||||
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<p>...Which all ended up being a whole lot more nonsensical than I had hoped for. There are some good ones, so I'll call that an accomplishment! I was banking on grammar not being an issue: since my tweets use impeccable grammar, the program modeled off them should have pretty good grammar as well. There are going to be some hilarious edge cases (I'm looking at you, <code>Ethics paper first, music in close to everyone</code>) that make no sense, and some hilarious edge cases (<code>Waking up, OJ, I feel like Nick Jonas today</code>) that make me feel like I should have a Twitter rap career. On the whole though, the structure came out alright.</p>
|
||||
<h2 id="Moving-on-from-here">Moving on from here<a class="anchor-link" href="#Moving-on-from-here">¶</a></h2><p>During class we also talked about an interesting idea: trying to analyze corporate documents and corporate speech. I'd be interested to know what this analysis applied to something like a couple of bank press releases could do. By any means, the code needs some work to clean it up before I get that far.</p>
|
||||
<h2 id="For-further-reading">For further reading<a class="anchor-link" href="#For-further-reading">¶</a></h2><p>I'm pretty confident I re-invented a couple wheels along the way - what I'm doing feels a lot like what <a href="https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo">Markov Chain Monte Carlo</a> is intended to do. But I've never worked explicitly with that before, so more research is needed.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div></p>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
</script>
|
||||
@ -81,6 +664,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
<script async src='https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML'></script>
|
||||
|
||||
|
||||
<div class="comments">
|
||||
<div id="disqus_thread"></div>
|
||||
<script type="text/javascript">
|
||||
var disqus_shortname = 'bradleespeice';
|
||||
var disqus_identifier = 'tweet-like-me.html';
|
||||
var disqus_url = 'https://bspeice.github.io/tweet-like-me.html';
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script>
|
||||
<noscript>Please enable JavaScript to view the comments.</noscript>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<!-- /Content -->
|
||||
@ -92,6 +689,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
<div class="footer-title"></div>
|
||||
<ul class="list-unstyled">
|
||||
<li><a href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate"></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
|
@ -4,20 +4,22 @@
|
||||
<meta charset="utf-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="description" content="Hello! Glad to meet you. I'm currently a student at Columbia University studying Financial Engineering, and want to give an overview of the projects I'm working on! To start things off, Columbia...">
|
||||
<meta name="description" content="Hello! Glad to meet you. I'm currently a student at Columbia University studying Financial Engineering, and want to give an overview of the projects I'm working on! To start things off, Columbia ...">
|
||||
<meta name="keywords" content="introduction, trading">
|
||||
<link rel="icon" href="/favicon.ico">
|
||||
<link rel="icon" href="https://bspeice.github.io/favicon.ico">
|
||||
|
||||
<title>Welcome, and an algorithm - Bradlee Speice</title>
|
||||
|
||||
<!-- Stylesheets -->
|
||||
<link href="/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="/theme/css/pygment.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/fonts.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/nest.css" rel="stylesheet">
|
||||
<link href="https://bspeice.github.io/theme/css/pygment.css" rel="stylesheet">
|
||||
<!-- /Stylesheets -->
|
||||
|
||||
<!-- RSS Feeds -->
|
||||
<link href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Full Atom Feed" />
|
||||
<link href="https://bspeice.github.io/feeds/blog.atom.xml" type="application/atom+xml" rel="alternate" title="Bradlee Speice Categories Atom Feed" />
|
||||
<!-- /RSS Feeds -->
|
||||
|
||||
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
||||
@ -26,6 +28,17 @@
|
||||
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<!-- Google Analytics -->
|
||||
<script>
|
||||
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
||||
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
||||
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
||||
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
|
||||
|
||||
ga('create', 'UA-74711362-1', 'auto');
|
||||
ga('send', 'pageview');
|
||||
</script>
|
||||
<!-- /Google Analytics -->
|
||||
|
||||
|
||||
</head>
|
||||
@ -39,7 +52,7 @@
|
||||
<div class="container">
|
||||
<div class="header-nav">
|
||||
<div class="header-logo">
|
||||
<a class="pull-left" href="/"><img class="mr20" src="/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
<a class="pull-left" href="https://bspeice.github.io/"><img class="mr20" src="https://bspeice.github.io/images/logo.svg" alt="logo">Bradlee Speice</a>
|
||||
</div>
|
||||
<div class="nav pull-right">
|
||||
</div>
|
||||
@ -54,12 +67,12 @@
|
||||
<div class="col-lg-12">
|
||||
<div class="header-content">
|
||||
<h1 class="header-title">Welcome, and an algorithm</h1>
|
||||
<p class="header-date"> <a href="/author/bradlee-speice.html">Bradlee Speice</a>, Thu 19 November 2015, Sat 05 December 2015, <a href="/category/blog.html">Blog</a></p>
|
||||
<p class="header-date"> <a href="https://bspeice.github.io/author/bradlee-speice.html">Bradlee Speice</a>, Thu 19 November 2015, Sat 05 December 2015, <a href="https://bspeice.github.io/category/blog.html">Blog</a></p>
|
||||
<div class="header-underline"></div>
|
||||
<div class="clearfix"></div>
|
||||
<p class="pull-right header-tags">
|
||||
<span class="glyphicon glyphicon-tags mr5" aria-hidden="true"></span>
|
||||
<a href="/tag/introduction.html">introduction</a>, <a href="/tag/trading.html">trading</a> </p>
|
||||
<a href="https://bspeice.github.io/tag/introduction.html">introduction</a>, <a href="https://bspeice.github.io/tag/trading.html">trading</a> </p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -92,7 +105,7 @@ days. Then, optimize for return subject to the drawdown being below a specific
|
||||
level. We didn't include the Sharpe ratio as a constraint, mostly because
|
||||
we were a bit late entering the competition.</p>
|
||||
<p>I'll be updating this post with the results of our algorithm as they come along!</p>
|
||||
<hr>
|
||||
<hr />
|
||||
<p><strong>UPDATE 12/5/2015</strong>: Now that the competition has ended, I wanted to update
|
||||
how the algorithm performed. Unfortunately, it didn't do very well. I'm planning
|
||||
to make some tweaks over the coming weeks, and do another forward test in January.</p>
|
||||
@ -128,8 +141,315 @@ to make some tweaks over the coming weeks, and do another forward test in Januar
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<hr>
|
||||
<p>{% notebook 2015-11-14-welcome.ipynb %}</p>
|
||||
<hr />
|
||||
<p>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h1 id="Trading-Competition-Optimization">Trading Competition Optimization<a class="anchor-link" href="#Trading-Competition-Optimization">¶</a></h1><h3 id="Goal:-Max-return-given-maximum-Sharpe-and-Drawdown">Goal: Max return given maximum Sharpe and Drawdown<a class="anchor-link" href="#Goal:-Max-return-given-maximum-Sharpe-and-Drawdown">¶</a></h3>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [1]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">from</span> <span class="nn">IPython.display</span> <span class="k">import</span> <span class="n">display</span>
|
||||
<span class="kn">import</span> <span class="nn">Quandl</span>
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="k">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span>
|
||||
|
||||
<span class="n">tickers</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'XOM'</span><span class="p">,</span> <span class="s1">'CVX'</span><span class="p">,</span> <span class="s1">'CLB'</span><span class="p">,</span> <span class="s1">'OXY'</span><span class="p">,</span> <span class="s1">'SLB'</span><span class="p">]</span>
|
||||
<span class="n">market_ticker</span> <span class="o">=</span> <span class="s1">'GOOG/NYSE_VOO'</span>
|
||||
<span class="n">lookback</span> <span class="o">=</span> <span class="mi">30</span>
|
||||
<span class="n">d_col</span> <span class="o">=</span> <span class="s1">'Close'</span>
|
||||
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'YAHOO/{}'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">tick</span><span class="p">))[</span><span class="o">-</span><span class="n">lookback</span><span class="p">:]</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span>
|
||||
<span class="n">market</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">market_ticker</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h1 id="Calculating-the-Return">Calculating the Return<a class="anchor-link" href="#Calculating-the-Return">¶</a></h1><p>We first want to know how much each ticker returned over the prior period.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [2]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">returns</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">data</span><span class="p">[</span><span class="n">tick</span><span class="p">][</span><span class="n">d_col</span><span class="p">]</span><span class="o">.</span><span class="n">pct_change</span><span class="p">()</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span>
|
||||
|
||||
<span class="n">display</span><span class="p">({</span><span class="n">tick</span><span class="p">:</span> <span class="n">returns</span><span class="p">[</span><span class="n">tick</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">})</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea ">
|
||||
<pre>{'CLB': -0.0016320202164526894,
|
||||
'CVX': 0.0010319531629488911,
|
||||
'OXY': 0.00093418904454400551,
|
||||
'SLB': 0.00098431254720448159,
|
||||
'XOM': 0.00044165797556096868}</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h1 id="Calculating-the-Sharpe-ratio">Calculating the Sharpe ratio<a class="anchor-link" href="#Calculating-the-Sharpe-ratio">¶</a></h1><p>Sharpe: ${R - R_M \over \sigma}$</p>
|
||||
<p>We use the average return over the lookback period, minus the market average return, over the ticker standard deviation to calculate the Sharpe. Shorting a stock turns a negative Sharpe positive.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [3]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">market_returns</span> <span class="o">=</span> <span class="n">market</span><span class="o">.</span><span class="n">pct_change</span><span class="p">()</span>
|
||||
|
||||
<span class="n">sharpe</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">ret</span><span class="p">:</span> <span class="p">(</span><span class="n">ret</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span> <span class="o">-</span> <span class="n">market_returns</span><span class="p">[</span><span class="n">d_col</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">())</span> <span class="o">/</span> <span class="n">ret</span><span class="o">.</span><span class="n">std</span><span class="p">()</span>
|
||||
<span class="n">sharpes</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">sharpe</span><span class="p">(</span><span class="n">returns</span><span class="p">[</span><span class="n">tick</span><span class="p">])</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span>
|
||||
|
||||
<span class="n">display</span><span class="p">(</span><span class="n">sharpes</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea ">
|
||||
<pre>{'CLB': -0.10578734457846127,
|
||||
'CVX': 0.027303529817677398,
|
||||
'OXY': 0.022622210057414487,
|
||||
'SLB': 0.026950946344858676,
|
||||
'XOM': -0.0053519259698605499}</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h1 id="Calculating-the-drawdown">Calculating the drawdown<a class="anchor-link" href="#Calculating-the-drawdown">¶</a></h1><p>This one is easy - what is the maximum daily change over the lookback period? That is, because we will allow short positions, we are not concerned strictly with maximum downturn, but in general, what is the largest 1-day change?</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [4]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">drawdown</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">ret</span><span class="p">:</span> <span class="n">ret</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
|
||||
<span class="n">drawdowns</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">drawdown</span><span class="p">(</span><span class="n">returns</span><span class="p">[</span><span class="n">tick</span><span class="p">])</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span>
|
||||
|
||||
<span class="n">display</span><span class="p">(</span><span class="n">drawdowns</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea ">
|
||||
<pre>{'CLB': 0.043551495607375035,
|
||||
'CVX': 0.044894389686214398,
|
||||
'OXY': 0.051424517867144637,
|
||||
'SLB': 0.034774627850375328,
|
||||
'XOM': 0.035851524605672758}</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="cell border-box-sizing text_cell rendered">
|
||||
<div class="prompt input_prompt">
|
||||
</div>
|
||||
<div class="inner_cell">
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h1 id="Performing-the-optimization">Performing the optimization<a class="anchor-link" href="#Performing-the-optimization">¶</a></h1><p>$\begin{align}
|
||||
max\ \ & \mu \cdot \omega\\
|
||||
s.t.\ \ & \vec{1} \omega = 1\\
|
||||
& \vec{S} \omega \ge s\\
|
||||
& \vec{D} \cdot | \omega | \le d\\
|
||||
& \left|\omega\right| \le l\\
|
||||
\end{align}$</p>
|
||||
<p>We want to maximize average return subject to having a full portfolio, Sharpe above a specific level, drawdown below a level, and leverage not too high - that is, don't have huge long/short positions.</p>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="cell border-box-sizing code_cell rendered">
|
||||
<div class="input">
|
||||
<div class="prompt input_prompt">In [5]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">scipy.optimize</span> <span class="k">import</span> <span class="n">minimize</span>
|
||||
|
||||
<span class="c1">#sharpe_limit = .1</span>
|
||||
<span class="n">drawdown_limit</span> <span class="o">=</span> <span class="o">.</span><span class="mi">05</span>
|
||||
<span class="n">leverage</span> <span class="o">=</span> <span class="mi">250</span>
|
||||
|
||||
<span class="c1"># Use the map so we can guarantee we maintain the correct order</span>
|
||||
<span class="c1"># sharpe_a = np.array(list(map(lambda tick: sharpes[tick], tickers))) * -1 # So we can write as upper-bound</span>
|
||||
<span class="n">dd_a</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">tick</span><span class="p">:</span> <span class="n">drawdowns</span><span class="p">[</span><span class="n">tick</span><span class="p">],</span> <span class="n">tickers</span><span class="p">)))</span>
|
||||
<span class="n">returns_a</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">tick</span><span class="p">:</span> <span class="n">returns</span><span class="p">[</span><span class="n">tick</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">(),</span> <span class="n">tickers</span><span class="p">)))</span> <span class="c1"># Because minimizing</span>
|
||||
|
||||
<span class="n">meets_sharpe</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">sum</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">*</span> <span class="n">sharpe_a</span><span class="p">)</span> <span class="o">-</span> <span class="n">sharpe_limit</span>
|
||||
<span class="k">def</span> <span class="nf">meets_dd</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
|
||||
<span class="n">portfolio</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">portfolio</span> <span class="o"><</span> <span class="o">.</span><span class="mi">1</span><span class="p">:</span>
|
||||
<span class="c1"># If there are no stocks in the portfolio,</span>
|
||||
<span class="c1"># we can accidentally induce division by 0,</span>
|
||||
<span class="c1"># or division by something small enough to cause infinity</span>
|
||||
<span class="k">return</span> <span class="mi">0</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">drawdown_limit</span> <span class="o">-</span> <span class="nb">sum</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">*</span> <span class="n">dd_a</span><span class="p">)</span> <span class="o">/</span> <span class="nb">sum</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
|
||||
|
||||
<span class="n">is_portfolio</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">sum</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">within_leverage</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">leverage</span> <span class="o">-</span> <span class="nb">sum</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">x</span><span class="p">))</span>
|
||||
|
||||
<span class="n">objective</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">sum</span><span class="p">(</span><span class="n">x</span> <span class="o">*</span> <span class="n">returns_a</span><span class="p">)</span> <span class="o">*</span> <span class="o">-</span><span class="mi">1</span> <span class="c1"># Because we're minimizing</span>
|
||||
<span class="n">bounds</span> <span class="o">=</span> <span class="p">((</span><span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">),)</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">tickers</span><span class="p">)</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">tickers</span><span class="p">))</span>
|
||||
|
||||
<span class="n">constraints</span> <span class="o">=</span> <span class="p">[</span>
|
||||
<span class="p">{</span>
|
||||
<span class="s1">'type'</span><span class="p">:</span> <span class="s1">'eq'</span><span class="p">,</span>
|
||||
<span class="s1">'fun'</span><span class="p">:</span> <span class="n">is_portfolio</span>
|
||||
<span class="p">},</span> <span class="p">{</span>
|
||||
<span class="s1">'type'</span><span class="p">:</span> <span class="s1">'ineq'</span><span class="p">,</span>
|
||||
<span class="s1">'fun'</span><span class="p">:</span> <span class="n">within_leverage</span>
|
||||
<span class="c1">#}, {</span>
|
||||
<span class="c1"># 'type': 'ineq',</span>
|
||||
<span class="c1"># 'fun': meets_sharpe</span>
|
||||
<span class="p">},</span> <span class="p">{</span>
|
||||
<span class="s1">'type'</span><span class="p">:</span> <span class="s1">'ineq'</span><span class="p">,</span>
|
||||
<span class="s1">'fun'</span><span class="p">:</span> <span class="n">meets_dd</span>
|
||||
<span class="p">}</span>
|
||||
<span class="p">]</span>
|
||||
|
||||
<span class="n">optimal</span> <span class="o">=</span> <span class="n">minimize</span><span class="p">(</span><span class="n">objective</span><span class="p">,</span> <span class="n">x</span><span class="p">,</span> <span class="n">bounds</span><span class="o">=</span><span class="n">bounds</span><span class="p">,</span> <span class="n">constraints</span><span class="o">=</span><span class="n">constraints</span><span class="p">,</span>
|
||||
<span class="n">options</span><span class="o">=</span><span class="p">{</span><span class="s1">'maxiter'</span><span class="p">:</span> <span class="mi">500</span><span class="p">})</span>
|
||||
|
||||
<span class="c1"># Optimization time!</span>
|
||||
<span class="n">display</span><span class="p">(</span><span class="n">optimal</span><span class="o">.</span><span class="n">message</span><span class="p">)</span>
|
||||
|
||||
<span class="n">display</span><span class="p">(</span><span class="s2">"Holdings: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">tickers</span><span class="p">,</span> <span class="n">optimal</span><span class="o">.</span><span class="n">x</span><span class="p">))))</span>
|
||||
|
||||
<span class="n">expected_return</span> <span class="o">=</span> <span class="n">optimal</span><span class="o">.</span><span class="n">fun</span> <span class="o">*</span> <span class="o">-</span><span class="mi">100</span> <span class="c1"># multiply by -100 to scale, and compensate for minimizing</span>
|
||||
<span class="n">display</span><span class="p">(</span><span class="s2">"Expected Return: {:.3f}%"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expected_return</span><span class="p">))</span>
|
||||
|
||||
<span class="n">expected_drawdown</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">optimal</span><span class="o">.</span><span class="n">x</span><span class="p">)</span> <span class="o">*</span> <span class="n">dd_a</span><span class="p">)</span> <span class="o">/</span> <span class="nb">sum</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">optimal</span><span class="o">.</span><span class="n">x</span><span class="p">))</span> <span class="o">*</span> <span class="mi">100</span>
|
||||
<span class="n">display</span><span class="p">(</span><span class="s2">"Expected Max Drawdown: {0:.2f}%"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expected_drawdown</span><span class="p">))</span>
|
||||
|
||||
<span class="c1"># TODO: Calculate expected Sharpe</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="output_wrapper">
|
||||
<div class="output">
|
||||
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea ">
|
||||
<pre>'Optimization terminated successfully.'</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea ">
|
||||
<pre>"Holdings: [('XOM', 5.8337945679814904), ('CVX', 42.935064321851307), ('CLB', -124.5), ('OXY', 36.790387773552119), ('SLB', 39.940753336615096)]"</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea ">
|
||||
<pre>'Expected Return: 32.375%'</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="output_area"><div class="prompt"></div>
|
||||
|
||||
|
||||
<div class="output_text output_subarea ">
|
||||
<pre>'Expected Max Drawdown: 4.34%'</pre>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div></p>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
</script>
|
||||
@ -137,6 +457,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
<script async src='https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML'></script>
|
||||
|
||||
|
||||
<div class="comments">
|
||||
<div id="disqus_thread"></div>
|
||||
<script type="text/javascript">
|
||||
var disqus_shortname = 'bradleespeice';
|
||||
var disqus_identifier = 'welcome-and-an-algorithm.html';
|
||||
var disqus_url = 'https://bspeice.github.io/welcome-and-an-algorithm.html';
|
||||
(function() {
|
||||
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
|
||||
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
|
||||
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
|
||||
})();
|
||||
</script>
|
||||
<noscript>Please enable JavaScript to view the comments.</noscript>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<!-- /Content -->
|
||||
@ -148,6 +482,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
<div class="footer-title"></div>
|
||||
<ul class="list-unstyled">
|
||||
<li><a href="https://bspeice.github.io/feeds/all.atom.xml" type="application/atom+xml" rel="alternate"></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="col-xs-4 col-sm-3 col-md-3 col-lg-3">
|
||||
|
Loading…
Reference in New Issue
Block a user