diff --git a/archives.html b/archives.html index 873bd37..f14d256 100644 --- a/archives.html +++ b/archives.html @@ -27,6 +27,17 @@ + + + @@ -71,6 +82,8 @@

+
Sat 05 March 2016
+
Predicting Santander Customer Happiness
Fri 26 February 2016
Profitability using the Investment Formula
Wed 03 February 2016
diff --git a/author/bradlee-speice.html b/author/bradlee-speice.html index bbebd91..86488d9 100644 --- a/author/bradlee-speice.html +++ b/author/bradlee-speice.html @@ -27,6 +27,17 @@ + + + @@ -71,6 +82,8 @@

+
Sat 05 March 2016
+
Predicting Santander Customer Happiness
Fri 26 February 2016
Profitability using the Investment Formula
Wed 03 February 2016
diff --git a/authors.html b/authors.html index 041fad7..360e372 100644 --- a/authors.html +++ b/authors.html @@ -27,6 +27,17 @@ + + + @@ -71,6 +82,8 @@

Bradlee Speice

+
Sat 05 March 2016
+
Predicting Santander Customer Happiness
Fri 26 February 2016
Profitability using the Investment Formula
Wed 03 February 2016
diff --git a/autocallable-bonds.html b/autocallable-bonds.html index c37fd73..d9df85f 100644 --- a/autocallable-bonds.html +++ b/autocallable-bonds.html @@ -28,6 +28,17 @@ + + + @@ -84,7 +95,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at
In [1]:
-
using Gadfly
+
using Gadfly
 
@@ -120,7 +131,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at
In [2]:
-
S0 = 102.2
+
S0 = 102.2
 nominal = 100
 q = 2.84 / 100
 σ = 15.37 / 100
@@ -176,7 +187,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at
 
In [3]:
-
simulate_gbm = function(S0, μ, σ, T, n)
+
simulate_gbm = function(S0, μ, σ, T, n)
     # Set the initial state
     m = length(S0)
     t = T / n
@@ -247,7 +258,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at
 
In [4]:
-
initial = ones(5) * S0
+
initial = ones(5) * S0
 # Using μ=0, T=.25 for now, we'll use the proper values later
 motion = simulate_gbm(initial, 0, σ, .25, 200) 
 
@@ -1855,7 +1866,7 @@ fig.select("#fig-3a6dd25ad25c4037a166889ee51bb151-element-20")
 
In [5]:
-
forward_term = function(yearly_term)
+
forward_term = function(yearly_term)
     # It is assumed that we have a yearly term structure passed in, and starts at year 0
     # This implies a nominal rate above 0 for the first year!
     years = length(term)-1 # because we start at 0
@@ -1885,14 +1896,14 @@ fig.select("#fig-3a6dd25ad25c4037a166889ee51bb151-element-20")
 
In [6]:
-
# Example term structure taken from:
+
# Example term structure taken from:
 # http://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yield
 # Linear interpolation used years in-between periods, assuming real-dollar
 # interest rates
 forward_yield = forward_term(term)
 calculated_term2 = term[1] * forward_yield[1]
 
-println("Actual term[2]: $(term[2]); Calculated term[2]: $(calculated_term2)")
+println("Actual term[2]: $(term[2]); Calculated term[2]: $(calculated_term2)")
 
@@ -1929,7 +1940,7 @@ fig.select("#fig-3a6dd25ad25c4037a166889ee51bb151-element-20")
In [7]:
-
full_motion = ones(5) * S0
+
full_motion = ones(5) * S0
 full_term = vcat(term[1], forward_yield)
 for i=1:T
     μ = (full_term[i] - 1 - q)
@@ -3490,7 +3501,7 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20")
 
In [8]:
-
full_simulation = function(S0, T, n, m, term)
+
full_simulation = function(S0, T, n, m, term)
     forward = vcat(term[1], forward_term(term))
 
     # And an S0 to kick things off.
@@ -3506,7 +3517,7 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20")
 tic()
 full_simulation(S0, T, n, m, term)
 time = toq()
-@printf("Time to run simulation: %.2fs", time)
+@printf("Time to run simulation: %.2fs", time)
 
@@ -3551,7 +3562,7 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20")
In [9]:
-
call_barrier = S0
+
call_barrier = S0
 strike = S0
 protection_barrier = S0 * .6
 coupon = nominal * .07
@@ -3595,13 +3606,13 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20")
     tic()
     mean_payoffs[i] = athena()
     time = toq()
-    @printf("Mean of simulation %i: \$%.4f; Simulation time: %.2fs\n", i, mean_payoffs[i], time)
+    @printf("Mean of simulation %i: \$%.4f; Simulation time: %.2fs\n", i, mean_payoffs[i], time)
 end
 
 final_mean = mean(mean_payoffs)
-println("Mean over $num_simulations simulations: $(mean(mean_payoffs))")
+println("Mean over $num_simulations simulations: $(mean(mean_payoffs))")
 pv = final_mean * (exp(-(prod(forward_structure)-1)*T))
-@printf("Present value of Athena note: \$%.2f, notional: \$%.2f", pv, nominal)
+@printf("Present value of Athena note: \$%.2f, notional: \$%.2f", pv, nominal)
 
@@ -3662,7 +3673,7 @@ Present value of Athena note: $95.00, notional: $100.00
In [10]:
-
call_barrier = S0
+
call_barrier = S0
 coupon_barrier = S0 * .8
 protection_barrier = S0 * .6
 coupon = nominal * .06
@@ -3709,13 +3720,13 @@ Present value of Athena note: $95.00, notional: $100.00
tic() mean_payoffs[i] = phoenix_no_memory() time = toq() - @printf("Mean of simulation %i: \$%.4f; Simulation time: %.2fs\n", i, mean_payoffs[i], time) + @printf("Mean of simulation %i: \$%.4f; Simulation time: %.2fs\n", i, mean_payoffs[i], time) end final_mean = mean(mean_payoffs) -println("Mean over $num_simulations simulations: $(mean(mean_payoffs))") +println("Mean over $num_simulations simulations: $(mean(mean_payoffs))") pv = final_mean * exp(-(prod(forward_structure)-1)*(T)) -@printf("Present value of Phoenix without memory note: \$%.2f", pv) +@printf("Present value of Phoenix without memory note: \$%.2f", pv)
@@ -3762,7 +3773,7 @@ Present value of Phoenix without memory note: $97.44
In [11]:
-
call_barrier = S0
+
call_barrier = S0
 coupon_barrier = S0 * .8
 protection_barrier = S0 * .6
 coupon = nominal * .07
@@ -3816,14 +3827,14 @@ Present value of Phoenix without memory note: $97.44
tic() mean_payoffs[i] = phoenix_with_memory() time = toq() - @printf("Mean of simulation %i: \$%.4f; Simulation time: %.2fs\n", + @printf("Mean of simulation %i: \$%.4f; Simulation time: %.2fs\n", i, mean_payoffs[i], time) end final_mean = mean(mean_payoffs) -println("Mean over $num_simulations simulations: $(mean(mean_payoffs))") +println("Mean over $num_simulations simulations: $(mean(mean_payoffs))") pv = final_mean * exp(-(prod(forward_structure)-1)*(T)) -@printf("Present value of Phoenix with memory note: \$%.2f", pv) +@printf("Present value of Phoenix with memory note: \$%.2f", pv)
@@ -3857,6 +3868,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); +
+
+ + +
diff --git a/categories.html b/categories.html index 368e2d3..21e06d8 100644 --- a/categories.html +++ b/categories.html @@ -27,6 +27,17 @@ + + + @@ -71,6 +82,8 @@

Blog

+
Sat 05 March 2016
+
Predicting Santander Customer Happiness
Fri 26 February 2016
Profitability using the Investment Formula
Wed 03 February 2016
diff --git a/category/blog.html b/category/blog.html index 3517c77..637278c 100644 --- a/category/blog.html +++ b/category/blog.html @@ -28,6 +28,17 @@ + + + @@ -72,6 +83,8 @@

Blog

+
Sat 05 March 2016
+
Predicting Santander Customer Happiness
Fri 26 February 2016
Profitability using the Investment Formula
Wed 03 February 2016
diff --git a/cloudy-in-seattle.html b/cloudy-in-seattle.html index a1321f5..7bd28c3 100644 --- a/cloudy-in-seattle.html +++ b/cloudy-in-seattle.html @@ -28,6 +28,17 @@ + + + @@ -81,7 +92,7 @@
In [1]:
-
import pickle
+
import pickle
 import pandas as pd
 import numpy as np
 from bokeh.plotting import output_notebook, figure, show
@@ -348,7 +359,7 @@
 
In [2]:
-
city_forecasts = pickle.load(open('city_forecasts.p', 'rb'))
+
city_forecasts = pickle.load(open('city_forecasts.p', 'rb'))
 forecasts_df = pd.DataFrame.from_dict(city_forecasts)
 
@@ -362,7 +373,7 @@
In [3]:
-
cities = ['binghamton', 'cary', 'nyc', 'seattle']
+
cities = ['binghamton', 'cary', 'nyc', 'seattle']
 city_colors = {cities[i]: Palette[i] for i in range(0, 4)}
 
 def safe_cover(frame):
@@ -393,7 +404,7 @@
 
In [4]:
-
years = range(1990, 2016)
+
years = range(1990, 2016)
 def city_avg_cc(city, month):
     return [monthly_avg_cloudcover(city, y, month) for y in years]
 
@@ -407,7 +418,7 @@
 
 for month, month_id in months:
     month_averages = {city: city_avg_cc(city, month_id) for city in cities}
-    f = figure(title="{} Average Cloud Cover".format(month),
+    f = figure(title="{} Average Cloud Cover".format(month),
                x_axis_label='Year',
                y_axis_label='Cloud Cover Percentage')
     for city in cities:
@@ -598,7 +609,7 @@
 
In [5]:
-
def safe_precip(frame):
+
def safe_precip(frame):
     if frame and 'precipProbability' in frame:
         return frame['precipProbability']
     else:
@@ -620,7 +631,7 @@
 
 for month, month_id in months:
     month_averages = {city: city_avg_cc(city, month_id) for city in cities}
-    f = figure(title="{} Average Precipitation Chance".format(month),
+    f = figure(title="{} Average Precipitation Chance".format(month),
                x_axis_label='Year',
                y_axis_label='Precipitation Chance Percentage')
     for city in cities:
@@ -814,6 +825,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
 
 
 
+    
+
+ + +
diff --git a/complaining-about-the-weather.html b/complaining-about-the-weather.html index 0777c17..3dce470 100644 --- a/complaining-about-the-weather.html +++ b/complaining-about-the-weather.html @@ -28,6 +28,17 @@ + + + @@ -81,7 +92,7 @@
In [1]:
-
from bokeh.plotting import figure, output_notebook, show
+
from bokeh.plotting import figure, output_notebook, show
 from bokeh.palettes import PuBuGn9 as Palette
 import pandas as pd
 import numpy as np
@@ -341,7 +352,7 @@
 
In [2]:
-
city_forecasts = pickle.load(open('city_forecasts.p', 'rb'))
+
city_forecasts = pickle.load(open('city_forecasts.p', 'rb'))
 forecast_df = pd.DataFrame.from_dict(city_forecasts)
 
@@ -355,7 +366,7 @@
In [3]:
-
cary_forecast = forecast_df['cary']
+
cary_forecast = forecast_df['cary']
 years = range(1990, 2016)
 months = range(7, 12)
 months_str = ['July', 'August', 'September', 'October', 'November']
@@ -443,7 +454,7 @@
 
In [4]:
-
def monthly_cloudy_days(year, month):
+
def monthly_cloudy_days(year, month):
     dates = pd.DatetimeIndex(start=datetime(year, month, 1, 12),
                              end=datetime(year, month + 1, 1, 12),
                              freq='D', closed='left')
@@ -559,7 +570,7 @@
 
In [5]:
-
def safe_precip(frame):
+
def safe_precip(frame):
     if frame and 'precipProbability' in frame:
         return frame['precipProbability']
     else:
@@ -640,7 +651,7 @@
 
In [6]:
-
def monthly_rainy_days(year, month):
+
def monthly_rainy_days(year, month):
     dates = pd.DatetimeIndex(start=datetime(year, month, 1, 12),
                              end=datetime(year, month + 1, 1, 12),
                              freq='D', closed='left')
@@ -759,7 +770,7 @@
 

Generating the Forecast file

The following code was generates the file that was used throughout the blog post. Please note that I'm retrieving data for other cities to use in a future blog post, only Cary data was used for this post.

-
import pandas as pd
+
import pandas as pd
 from functools import reduce
 import requests
 from datetime import datetime
@@ -834,6 +845,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
 
 
 
+    
+
+ + +
diff --git a/feeds/all.atom.xml b/feeds/all.atom.xml index a506e59..e3a04cd 100644 --- a/feeds/all.atom.xml +++ b/feeds/all.atom.xml @@ -1,5 +1,927 @@ -Bradlee Speicehttps://bspeice.github.io/2016-02-26T00:00:00-05:00Profitability using the Investment Formula2016-02-26T00:00:00-05:00Bradlee Speicetag:bspeice.github.io,2016-02-26:profitability-using-the-investment-formula.html<p> +Bradlee Speicehttps://bspeice.github.io/2016-03-05T00:00:00-05:00Predicting Santander Customer Happiness2016-03-05T00:00:00-05:00Bradlee Speicetag:bspeice.github.io,2016-03-05:predicting-santander-customer-happiness.html<p> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<h3 id="My-first-Kaggle-competition">My first Kaggle competition<a class="anchor-link" href="#My-first-Kaggle-competition">&#182;</a></h3><p>It's time! After embarking on a Machine Learning class this semester, and with a Saturday in which I don't have much planned, I wanted to put this class and training to work. It's my first competition submission. I want to walk you guys through how I'm approaching this problem, because I thought it would be really neat. The competition is Banco Santander's <a href="https://www.kaggle.com/c/santander-customer-satisfaction">Santander Customer Satisfaction</a> competition. It seemed like an easy enough problem I could actually make decent progress on it.</p> +<h1 id="Data-Exploration">Data Exploration<a class="anchor-link" href="#Data-Exploration">&#182;</a></h1><p>First up: we need to load our data and do some exploratory work. Because we're going to be using this data for model selection prior to testing, we need to make a further split. I've already gone ahead and done this work, please see the code in the <a href="#Appendix">appendix below</a>.</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[1]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> +<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> +<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span> +<span class="o">%</span><span class="k">matplotlib</span> inline + +<span class="c1"># Record how long it takes to run the notebook - I&#39;m curious.</span> +<span class="kn">from</span> <span class="nn">datetime</span> <span class="k">import</span> <span class="n">datetime</span> +<span class="n">start</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> + +<span class="n">dataset</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">&#39;split_train.csv&#39;</span><span class="p">)</span> +<span class="n">dataset</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">ID</span> +<span class="n">X</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">drop</span><span class="p">([</span><span class="s1">&#39;TARGET&#39;</span><span class="p">,</span> <span class="s1">&#39;ID&#39;</span><span class="p">,</span> <span class="s1">&#39;ID.1&#39;</span><span class="p">],</span> <span class="mi">1</span><span class="p">)</span> +<span class="n">y</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">TARGET</span> +</pre></div> + +</div> +</div> +</div> + +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[2]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">y</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt output_prompt">Out[2]:</div> + + +<div class="output_text output_subarea output_execute_result"> +<pre>array([0, 1], dtype=int64)</pre> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[3]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="nb">len</span><span class="p">(</span><span class="n">X</span><span class="o">.</span><span class="n">columns</span><span class="p">)</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt output_prompt">Out[3]:</div> + + +<div class="output_text output_subarea output_execute_result"> +<pre>369</pre> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>Okay, so there are only <a href="https://www.kaggle.com/c/santander-customer-satisfaction/data">two classes we're predicting</a>: 1 for unsatisfied customers, 0 for satisfied customers. I would have preferred this to be something more like a regression, or predicting multiple classes: maybe the customer isn't the most happy, but is nowhere near closing their accounts. For now though, that's just the data we're working with.</p> +<p>Now, I'd like to make a scatter matrix of everything going on. Unfortunately as noted above, we have 369 different features. There's no way I can graphically make sense of that much data to start with.</p> +<p>We're also not told what the data actually represents: Are these survey results? Average time between contact with a customer care person? Frequency of contacting a customer care person? The idea is that I need to reduce the number of dimensions we're predicting across.</p> +<h2 id="Dimensionality-Reduction-pt.-1---Binary-Classifiers">Dimensionality Reduction pt. 1 - Binary Classifiers<a class="anchor-link" href="#Dimensionality-Reduction-pt.-1---Binary-Classifiers">&#182;</a></h2><p>My first attempt to reduce the data dimensionality is to find all the binary classifiers in the dataset (i.e. 0 or 1 values) and see if any of those are good (or anti-good) predictors of the final data.</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[4]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cols</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">columns</span> +<span class="n">b_class</span> <span class="o">=</span> <span class="p">[]</span> +<span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">:</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">unique</span><span class="p">())</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span> + <span class="n">b_class</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> + +<span class="nb">len</span><span class="p">(</span><span class="n">b_class</span><span class="p">)</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt output_prompt">Out[4]:</div> + + +<div class="output_text output_subarea output_execute_result"> +<pre>111</pre> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>So there are 111 features in the dataset that are a binary label. Let's see if any of them are good at predicting the users satisfaction!</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[5]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># First we need to `binarize` the data to 0-1; some of the labels are {0, 1},</span> +<span class="c1"># some are {0, 3}, etc.</span> +<span class="kn">from</span> <span class="nn">sklearn.preprocessing</span> <span class="k">import</span> <span class="n">binarize</span> +<span class="n">X_bin</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">b_class</span><span class="p">])</span> + +<span class="n">accuracy</span> <span class="o">=</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">X_bin</span><span class="p">[:,</span><span class="n">i</span><span class="p">]</span> <span class="o">==</span> <span class="n">y</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">b_class</span><span class="p">))]</span> +<span class="n">acc_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;Accuracy&quot;</span><span class="p">:</span> <span class="n">accuracy</span><span class="p">},</span> <span class="n">index</span><span class="o">=</span><span class="n">b_class</span><span class="p">)</span> +<span class="n">acc_df</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt output_prompt">Out[5]:</div> + +<div class="output_html rendered_html output_subarea output_execute_result"> +<div> +<table border="1" class="dataframe"> + <thead> + <tr style="text-align: right;"> + <th></th> + <th>Accuracy</th> + </tr> + </thead> + <tbody> + <tr> + <th>count</th> + <td>111.000000</td> + </tr> + <tr> + <th>mean</th> + <td>0.905159</td> + </tr> + <tr> + <th>std</th> + <td>0.180602</td> + </tr> + <tr> + <th>min</th> + <td>0.043598</td> + </tr> + <tr> + <th>25%</th> + <td>0.937329</td> + </tr> + <tr> + <th>50%</th> + <td>0.959372</td> + </tr> + <tr> + <th>75%</th> + <td>0.960837</td> + </tr> + <tr> + <th>max</th> + <td>0.960837</td> + </tr> + </tbody> +</table> +</div> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>Wow! Looks like we've got some incredibly predictive features! So much so that we should be a bit concerned. My initial guess for what's happening is that we have a sparsity issue: so many of the values are 0, and these likely happen to line up with satisfied customers.</p> +<p>So the question we must now answer, which I likely should have asked long before now: What exactly is the distribution of un/satisfied customers?</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[6]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">unsat</span> <span class="o">=</span> <span class="n">y</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">count</span><span class="p">()</span> +<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Satisfied customers: </span><span class="si">{}</span><span class="s2">; Unsatisfied customers: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">y</span><span class="p">)</span> <span class="o">-</span> <span class="n">unsat</span><span class="p">,</span> <span class="n">unsat</span><span class="p">))</span> +<span class="n">naive_guess</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">y</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">y</span><span class="p">)))</span> +<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Naive guess accuracy: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">))</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt"></div> +<div class="output_subarea output_stream output_stdout output_text"> +<pre>Satisfied customers: 51131; Unsatisfied customers: 2083 +Naive guess accuracy: 0.9608561656706882 +</pre> +</div> +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>This is a bit discouraging. A naive guess of "always satisfied" performs as well as our best individual binary classifier. What this tells me then, is that these data columns aren't incredibly helpful in prediction. I'd be interested in a polynomial expansion of this data-set, but for now, that's more computation than I want to take on.</p> +<h1 id="Dimensionality-Reduction-pt.-2---LDA">Dimensionality Reduction pt. 2 - LDA<a class="anchor-link" href="#Dimensionality-Reduction-pt.-2---LDA">&#182;</a></h1><p>Knowing that our naive guess performs so well is a blessing and a curse:</p> +<ul> +<li>Curse: The threshold for performance is incredibly high: We can only "improve" over the naive guess by 4%</li> +<li>Blessing: All the binary classification features we just discovered are worthless on their own. We can throw them out and reduce the data dimensionality from 369 to 111.</li> +</ul> +<p>Now, in removing these features from the dataset, I'm not saying that there is no "information" contained within them. There might be. But the only way we'd know is through a polynomial expansion, and I'm not going to take that on within this post.</p> +<p>My initial thought for a "next guess" is to use the <a href="http://scikit-learn.org/stable/modules/lda_qda.html">LDA</a> model for dimensionality reduction. However, it can only reduce dimensions to $1 - p$, with $p$ being the number of classes. Since this is a binary classification, every LDA model that I try will have dimensionality one; when I actually try this, the predictor ends up being slightly less accurate than the naive guess.</p> +<p>Instead, let's take a different approach to dimensionality reduction: <a href="http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">principle components analysis</a>. This allows us to perform the dimensionality reduction without worrying about the number of classes. Then, we'll use a <a href="http://scikit-learn.org/stable/modules/naive_bayes.html#gaussian-naive-bayes">Gaussian Naive Bayes</a> model to actually do the prediction. This model is chosen simply because it doesn't take a long time to fit and compute; because PCA will take so long, I just want a prediction at the end of this. We can worry about using a more sophisticated LDA/QDA/SVM model later.</p> +<p>Now into the actual process: We're going to test out PCA dimensionality reduction from 1 - 20 dimensions, and then predict using a Gaussian Naive Bayes model. The 20 dimensions upper limit was selected because the accuracy never improves after you get beyond that (I found out by running it myself). Hopefully, we'll find that we can create a model better than the naive guess.</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[7]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">sklearn.naive_bayes</span> <span class="k">import</span> <span class="n">GaussianNB</span> +<span class="kn">from</span> <span class="nn">sklearn.decomposition</span> <span class="k">import</span> <span class="n">PCA</span> + +<span class="n">X_no_bin</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">b_class</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> + +<span class="k">def</span> <span class="nf">evaluate_gnb</span><span class="p">(</span><span class="n">dims</span><span class="p">):</span> + <span class="n">pca</span> <span class="o">=</span> <span class="n">PCA</span><span class="p">(</span><span class="n">n_components</span><span class="o">=</span><span class="n">dims</span><span class="p">)</span> + <span class="n">X_xform</span> <span class="o">=</span> <span class="n">pca</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">X_no_bin</span><span class="p">)</span> + + <span class="n">gnb</span> <span class="o">=</span> <span class="n">GaussianNB</span><span class="p">()</span> + <span class="n">gnb</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> + <span class="k">return</span> <span class="n">gnb</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> + +<span class="n">dim_range</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">21</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Gaussian NB Accuracy&quot;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Inverse Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">legend</span><span class="p">();</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt"></div> + + +<div class="output_png output_subarea "> +<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAsYAAAFwCAYAAAC7E71AAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz +AAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8VfWd//HXNxDWQAiJbElYpK503KZgrdpiGUH9iVor +CCi2OFXb2hmxdWHsaEOXcURt1dZldKw74tJ26r4UjdW2bq3ISJXNDkgKsgUjsoTl+/vjhmsIWclN +7k3yej4e55F7zvnecz73cI1vvnzP+YYYI5IkSVJHl5XuAiRJkqRMYDCWJEmSMBhLkiRJgMFYkiRJ +AgzGkiRJEmAwliRJkoBGBOMQwp0hhA9DCPPraXNTCGFxCGFeCOGw1JYoSZIktbzG9BjfBYyra2cI +4URgeIxxP+AC4LYU1SZJkiS1mgaDcYzxFaC8nianAvdWtX0NyA0h9E9NeZIkSVLrSMUY40Lgg2rr +ZVXbJEmSpDbDm+8kSZIkoHMKjlEGFFdbL6ratocQQkzB+SRJkqQGxRhDU9o3NhiHqqU2jwEXAg+F +ED4PbIgxflhPgU2pT2o1JSUllJSUpLsMaQ9+N5Wp/G4qk4XQpEwMNCIYhxBmA6OB/BDCcuAHQBcg +xhhvjzE+FUI4KYSwBPgEmNbkKiRJkqQ0azAYxxinNKLNd1JTjiRJkpQe3nwnVRk9enS6S5Bq5XdT +mcrvptqb0JpjfkMI0THGkiRJamkhhBa7+U6SJLUjQ4cOZdmyZekuQ2q2IUOG8H//938pOZY9xpIk +dUBVvWnpLkNqtrq+y3vTY+wYY0mSJAmDsSRJkgQYjCVJkiTAYCxJktSgk046ifvuuy/dZaiFdbib +7371K1i/Pq0l7CaE+pesrIbbNHcRdOsGRx+d7iokqfVk+s13c+bM4YYbbuCdd94hJyeHYcOGcc45 +5/Ctb30r3aWl1D333MO0adOYNWsWl1xySXJ7cXExDzzwAF/84heZOXMmP/nJT+jWrVty349+9CNO +P/30eo9dWlrKl7/8Za655houvfTSFv0c6ZTKm++IMbbaAkQXFxcXFxeXzFgy1XXXXRcHDBgQf/3r +X8eNGzfGGGOcN29ePPvss2NlZWWaq0utu+++O+bn58d99tkn+VljjLGoqCi+9NJLMcYYS0pK4tSp +U5P7nn322di9e/e4evXqeo89bdq0WFBQED/72c+2TPH12L59e6udq4HveJOyaqsPpWhqgS4urbEc +fXTk979Pfx0uLi4urbVkqoqKCn7wgx9w66238pWvfIWePXsCcOihh3LfffeRnZ0NwFNPPcURRxxB +bm4uQ4YMYebMmcljvPTSSxQXF+923GHDhvHCCy8A8MYbbzBy5Ehyc3MZOHBgsqd269atTJ06lYKC +AvLy8jjyyCNZs2YNAMcddxy//OUvAXj//fcZM2YMBQUF9OvXj7PPPpuKiordznX99ddz6KGHkpeX +x+TJk6msrKzzMx900EEcddRRXH/99Y26RmPHjqVXr14sXbq0zjabNm3i0Ucf5eabb2bx4sX85S9/ +2W3/K6+8wtFHH01eXh5Dhgzh3nvvBWDLli1873vfY+jQoeTl5fHFL36RrVu3NnhNZ86cyYQJE5g6 +dSp9+vThnnvu4Y033uALX/gCeXl5FBYW8i//8i9s3749+f4FCxYwduxY8vPzGThwIP/5n//Jhx9+ +SM+ePSkvL0+2+8tf/kK/fv3YsWNHnZ83Vd9xxxhLQFERrFiR7iokSX/605+orKzklFNOqbddTk4O +9913Hx999BFPPvkkt912G4899lhyf6hnrOBFF13E9OnT+eijj1i6dCkTJ04EEsMaKioqKCsrY/36 +9dx222107959j/fHGLniiitYtWoV7777LitWrKCkpGS3No888gjPPfccf/vb33j77be5++6766wn +hMCPfvQjbrjhBjZs2FDv5wZ48skn2bZtGwcffHCdbX71q1/Rq1cvJkyYwNixY7nnnnuS+5YvX85J +J53ERRddxNq1a5k3bx6HHXYYAN/73vd46623ePXVV1m/fj2zZs0iKysrWWd9HnvsMSZOnMiGDRs4 +66yz6Ny5MzfccAPr16/nT3/6Ey+88AK33HILABs3buT444/npJNOYuXKlSxZsoQxY8bQv39/jjvu +OB5++OHkce+//34mT55Mp06dGrw2zWUwljAYS1KmWLt2LQUFBckwBiR7Nnv06MErr7wCwBe/+EVG +jBgBwGc/+1kmTZrESy+91KhzdOnShSVLlrBu3Tp69OjBqFGjAMjOzmbdunUsWrSIEAKHH344OTk5 +e7x/+PDhjBkzhs6dO5Ofn8/FF1+8x7kvuugi+vfvT58+fRg/fjzz5s2rt6ZDDjmE448/nmuuuabW +/Q899BB9+/YlJyeH0047jSuuuILevXvXebx7772XSZMmEUJgypQpzJkzJ9njOnv2bI4//ngmTpxI +p06dyMvL45BDDiHGyF133cVNN93EgAEDCCHw+c9/PtlL35CjjjqK8ePHA9C1a1cOP/xwRo0aRQiB +wYMHc/755yev0xNPPMHAgQOZPn06Xbp0oWfPnowcORKAc845J3mj486dO3nwwQeZOnVqo2poLoOx +hMFYkmqTjhu88/PzWbt2LTt37kxu+8Mf/kB5eTkFBQXJ7a+99hpf/vKX6devH3369OG//uu/WLt2 +baPOceedd7Jw4UIOPPBAjjzySJ588kkApk6dyrhx45g0aRJFRUVcfvnltf7z/erVq5k8eTJFRUX0 +6dOHs88+e49z9+/fP/m6R48ebNy4scG6fvjDH3LrrbeyevXqPfadeeaZrF+/no0bN7J06VLuuece +7rjjjlqPs2LFCl588UWmTJkCwCmnnMLmzZuTn/ODDz5g+PDhe7xv7dq1bN26lX333bfBWmtTc6jF +4sWLGT9+PAMHDqRPnz58//vfT16numoAOPXUU3n33XdZtmwZzz33HH369OFzn/vcXtXUVAZjCYOx +JNUmxuYvTXXUUUfRtWtXfvvb39ZSz6cHPOusszjttNMoKytjw4YNXHDBBcn9PXv2ZNOmTcm2O3bs +SI4VhkSP7+zZs1mzZg2XXXYZZ5xxBps3b6Zz585ceeWVLFiwgD/+8Y888cQTybG31V1xxRVkZWWx +YMECNmzYwP3335+ScdsHHHAAp59+Oj/5yU/qHbYwePBgTjzxRB5//PFa9997773EGJOhdPjw4Wzd +ujU5nKK4uJglS5bs8b6CggK6detW69jlhq4p7DnU4lvf+hYHHXQQS5cuZcOGDfzkJz9JXqfi4uI6 +x0h37dqViRMnct9993H//fe3Wm8xGIwlwGAsSZkiNzeXq666im9/+9v86le/YuPGjcQYmTdv3m7B +bOPGjeTl5ZGdnc3rr7/O7Nmzk/v2339/tmzZwtNPP8327dv58Y9/vNvNbw888ECy5zI3N5cQAllZ +WZSWlvLOO++wc+dOcnJyyM7OrnVc68cff0xOTg69evWirKyMa6+9NmWf/6qrruKuu+7aY6xx9eC9 +YsUKnnnmGT772c/Weox7772XkpIS5s2bx9tvv83bb7/No48+ypNPPkl5eTlnnXUWc+fO5dFHH2XH +jh2sX7+et99+mxAC06ZN47vf/S4rV65k586dvPrqq2zbtq3Ba1qbjz/+mN69e9OjRw/ee+89br31 +1uS+k08+mVWrVnHTTTdRWVnJxo0bef3115P7p06dyt13383jjz9uMJZam8FYkjLHpZdeyk9/+lNm +zZrFgAEDGDBgAN/61reYNWsWX/jCFwC45ZZbuPLKK8nNzeXHP/4xZ555ZvL9vXv35pZbbuGf//mf +KSoqolevXhQVFSX3P/PMM4wYMYLevXtz8cUX89BDD9G1a1dWrVrFGWecQW5uLiNGjOC4447j7LPP +BnbvDf3BD37An//85+T44a9+9au71d/QTWr1GTp0KFOnTuWTTz7ZbfvDDz9M79696d27N0ceeSTH +HnssV1111R7vf+2111i+fDnf/va36devX3IZP348++23Hw8++CDFxcU89dRTXHfddfTt25fDDz+c ++fPnA3DdddfxD//wD4wcOZL8/HxmzJjBzp07G7ymtbnuuut44IEH6N27NxdccAGTJk1K7svJyeH5 +55/nscceY8CAAey///6UlpYm93/hC18gKyuLI444Yo8hGi2pw03wIdVm+3bo0QM++QQaeY+BJLVp +mT7BhzRmzBjOOusszj333HrbpXKCD3uMJaBzZ9hnH1i1Kt2VSJKkN954g7feemu3fwloDQZjqYrD +KSRJSr+vf/3rjB07lhtvvDE5wUtr6dyqZ5MymMFYkqT0q28ylJZmj7FUxWAsSVLHZjCWqhiMJUnq +2AzGUhWDsSRJHZvBWKpiMJYkqWMzGEtVDMaSJHVsBmOpyqBBsHIl7NiR7kokSc1x9dVXc/7556e7 +DLVBBmOpSteukJcHq1enuxJJ6tiGDh1K//792bx5c3LbnXfeyXHHHdeo9//bv/0bt99+e4vU9vzz +z/PlL3+Z3r17s88++3DEEUdw7bXXUllZ2SLnU+syGEvVOJxCktIvhMDOnTu54YYb9tieTo888ggT +Jkzg7LPPZvny5axZs4aHHnqIFStW8MEHH6S1NqWGwViqpqgIysrSXYUk6dJLL+X666+noqKi1v3T +p09n8ODB5ObmMnLkSF555ZXkvpkzZ3LOOecAcNJJJ3HLLbfs9t7DDjuM//mf/wHgvffeY+zYseTn +53PQQQfxyCOP1FnT9773PUpKSjj33HPp06cPAPvttx833ngjw4cPB2DatGlcddVVyfe89NJLFBcX +J9dXrlzJGWecQb9+/Rg+fDg///nPk/veeOMNRo4cSW5uLgMHDuSSSy4BYOvWrUydOpWCggLy8vI4 +8sgjWbNmTcMXUU1mMJaqscdYkjLD5z73OUaPHs21115b6/5Ro0Yxf/58ysvLmTJlChMmTKh1OMPk +yZOZPXt2cv2vf/0ry5cv5+STT2bTpk2MHTuWs88+m7Vr1zJnzhwuvPBC3nvvvT2Os3DhQsrKyjj9 +9NOb/Fl29XTHGBk/fjyHH344K1euZO7cudx44408//zzAFx00UVMnz6djz76iKVLlzJx4kQA7rnn +HioqKigrK2P9+vXcdtttdO/evcl1qGEGY6kag7EkZY6ZM2fyi1/8gnXr1u2xb8qUKfTp04esrCwu +vvhitm7dysKFC/do95WvfIW33347OdRh9uzZnH766XTu3JknnniCYcOGcc455xBC4NBDD+X000+v +tdd47dq1AAwYMCC5bfLkyeTl5dGzZ08eeOCBBj/P66+/ztq1a/n+979Pp06dGDp0KN/4xjeYM2cO +ANnZ2SxZsoR169bRo0cPRo0aldy+bt06Fi1aRAiBww8/nJycnEZcQTWVwViqxmAsSZ8KITR7aY4R +I0Zw8sknc/XVV++x77rrruPggw8mLy+PvLw8KioqkuG1upycHE466aRk+HzwwQc5++yzAVi2bBmv +vvoqffv2pW/fvuTl5TF79mxWrVq1x3Hy8/OBxFCIXR588EHKy8s54ogj2NGIRxotX76csrKy3c53 +9dVXs7rqru9f/vKXLFy4kAMPPJAjjzySJ598EoCpU6cybtw4Jk2aRFFRETNmzGjU+dR0BmOpGoOx +JH0qxtjspblKSkq44447KKt2A8jLL7/Mtddey6OPPkp5eTnl5eX07t27zvPtGk7x6quvsnXrVkaP +Hg1AcXExo0ePZv369axfv57y8nIqKiq4+eab9zjGAQccQGFhIb/+9a/rrbdnz55s2rQpuV49SBcX +F7Pvvvvudr6PPvqIxx9/HIDhw4cze/Zs1qxZw2WXXcYZZ5zB5s2b6dy5M1deeSULFizgj3/8I48/ +/jj33ntvo6+hGs9gLFVjMJakzDJ8+HDOPPNMbrrppuS2jRs3kp2dTX5+PpWVlfzwhz/k448/rvMY +J510EsuWLeOqq67izDPPTG4/+eSTWbRoEffffz/bt29n27ZtvPnmm7WOMQ4hcN111zFz5kzuvPNO +NmzYAMDixYv58MMPk+0OO+wwnnrqKcrLy1m1ahU33nhjct+oUaPo1asXs2bNYsuWLezYsYMFCxbw +5ptvAvDAAw8ke71zc3MJIZCVlUVpaSnvvPMOO3fuJCcnh+zsbLKyjHAtwasqVVNYmHgqRQo6OSRJ +e6nmEIyrrrqKTZs2JbePGzeOcePGsf/++zNs2DB69Oix25MfaurSpQunn346c+fOZcqUKcntOTk5 +PPfcc8yZM4dBgwYxaNAgZsyYUecziSdOnMjDDz/Mfffdx+DBg9lnn32YNGkS3/zmN5kwYQKQGPZw +yCGHMHToUE444QQmTZqUfH9WVhZPPPEE8+bNY9iwYfTr14/zzjsv+eSNZ555hhEjRtC7d28uvvhi +HnroIbp27cqqVas444wzyM3NZcSIERx33HFMnTp17y6u6hVS8c8cjT5ZCLE1zyftjfx8WLgQCgrS +XYkktZwQQkqGOkjpVtd3uWp7kwa622Ms1eBwCkmSOiaDsVSDwViSpI7JYCzVUFhoMJYkqSMyGEs1 +2GMsSVLHZDCWajAYS5LUMRmMpRoMxpIkdUwGY6kGg7EkSR2TwViqYVcw9vGekiR1LAZjqYbevSEr +Cz76KN2VSJI6oldeeYWDDjoo3WV0SAZjqRYOp5Ck9Bk2bBgvvPBCusvYa1//+tfJysrizTffTG5b +unQpWVmNi13HHHMM7777bovUtmTJEiZPnky/fv3o06cPBxxwABdddBF///vfW+R8bY3BWKqFwViS +OpYdO3ak7FghBPLz8/n3f//3Pban05IlSzjyyCMpKipi3rx5bNiwgT/84Q8MHz6cV155Ja21ZQqD +sVQLg7EkZYZ77rmHY489lksvvZS+ffsyfPhwnn32WQAefvhhRo4cuVv7n/3sZ5x22mkAVFZWcskl +lzBkyBAGDhzIt7/9bbZu3QrASy+9RHFxMbNmzWLgwIGce+65rFu3jvHjx5OXl0d+fj5f+tKXksdd +uXIlZ5xxBv369WP48OH8/Oc/r7fur33ta8yfP5+XX3651v133303Bx98ML179+Yzn/kMt99+e3Lf +rtoAZs2axYQJE3Z770UXXcT06dMBqKio4Bvf+AaDBg2iuLiYK6+8kljHTTIzZ87kmGOO4dprr2XQ +oEEAFBQU8K//+q9MnDhxt+tdXVZWFu+//36D17S+63fNNddQVFRE7969Oeigg3jxxRfrvX7pYjCW +amEwlqTM8frrr3PQQQexbt06Lr30Us4991wAxo8fz6JFi1i6dGmy7YMPPshZZ50FwOWXX86SJUuY +P38+S5YsoaysjB/+8IfJtqtWrWLDhg0sX76c22+/neuvv57i4mLWrVvH6tWr+Y//+A8AYoyMHz+e +ww8/nJUrVzJ37lxuvPFGnn/++Tpr7tGjB1dccQVXXHFFrfv79+/PU089RUVFBXfddRcXX3wx8+bN +S+7f1bs8adIknn76aT755BMAdu7cySOPPJL8jF/72tfo0qUL77//Pm+99RbPP/88//3f/13rOX/3 +u9/x1a9+tf6LzZ4929XX67umdV2/RYsWcfPNN/PnP/+ZiooKnn32WYYOHdpgHelgMJZqYTCWpMwx +ZMgQzj33XEIIfO1rX2PlypWsXr2a7t27c8opp/Dggw8CsHjxYhYuXMgpp5wCwB133MHPfvYzcnNz +6dmzJzNmzEi2BejUqRMzZ84kOzubrl27kp2dzcqVK/nb3/5Gp06dOProowF44403WLt2Ld///vfp +1KkTQ4cO5Rvf+AZz5sypt+7zzz+f5cuXJ3u4qzvxxBOT4fDYY49l7NixtfYuDx48mCOOOILf/OY3 +AMydO5eePXsycuRIPvzwQ55++ml+9rOf0a1bNwoKCpg+ffpun7G6tWvXMmDAgOT6zTffTF5eHr16 +9eKCCy6o83NU74Gu75rWdf06depEZWUl77zzDtu3b2fw4MEMGzas3muXLgZjqRZFRVBWlu4qJCm9 +QgjNXlKhepjr3r07ABs3bgRgypQpyWA2e/ZsTjvtNLp27cqaNWvYtGkT//iP/0jfvn3p27cvJ554 +IuvWrUsea5999iE7Ozu5ftlllzF8+HDGjh3LZz7zGa655hoAli1bRllZWfI4eXl5XH311axevbre +urt06cKVV17JlVdeuce+p59+mqOOOor8/Hzy8vJ4+umnWbt2ba3HmTx5cvIzPvjgg0yZMgWA5cuX +s23bNgYOHJis65vf/Gadx8nPz2flypXJ9QsvvJDy8nKmT5/Otm3b6v0sQIPX9NJLL631+g0fPpwb +briBkpIS+vfvz5QpU3arI5MYjKVa2GMsSYmewuYuLe34449nzZo1vP3228yZMycZGgsKCujRowcL +Fixg/fr1rF+/ng0bNvBRtWdx1gzuPXv25LrrrmPp0qU89thj/PSnP+XFF1+kuLiYfffdN3mc8vJy +PvroIx5//PEG65s2bRobNmzg17/+dXJbZWUlZ5xxBpdddhlr1qyhvLycE088sc7rNWHCBEpLSykr +K+M3v/lN8jMWFxfTrVs31q1bl6xrw4YNzJ8/v9bjjBkzZrc6atOzZ082bdqUXF+1alXydUPXNCcn +p9brB4khIS+//DLLli0DYMaMGQ1durQwGEu1MBhLUtvQuXNnJkyYwKWXXkp5eTnHH388kAi95513 +HtOnT2fNmjUAlJWV8dxzz9V5rCeffDI5XrlXr1507tyZrKwsRo0aRa9evZg1axZbtmxhx44dLFiw +YLfHsdWlU6dOlJSUJHtPIRGMKysrKSgoICsri6effrreugoKCvjSl77EtGnT2HfffTnggAOARE/6 +2LFjufjii/n444+JMfL+++/z+9//vtbjlJSU8PLLL3PJJZckH8+2du3a3R4Nd+ihh7JgwQLmz5/P +1q1bmTlzZvIvEA1d07qu36JFi3jxxReprKykS5cudO/evdGPrmttmVmVlGZ5ebB1K1T9S50kqRU1 +NASj5v7Jkyczd+5cJk6cuFvguuaaa/jMZz7D5z//efr06cPYsWNZtGhRncddvHgx//RP/0SvXr04 ++uijufDCC/nSl75EVlYWTzzxBPPmzWPYsGH069eP8847j4qKikbXN3DgwOT2nJwcbrrpJiZMmEDf +vn2ZM2cOp556ar2fecqUKcydOzd5090u9957L5WVlRx88MH07duXCRMm7NbLW91+++3Ha6+9xgcf +fMChhx5Kbm4uxx57LIWFhfzoRz9KtrnqqqsYM2YM+++//x5PqKjvmtZ1/bZu3cqMGTPYZ599GDRo +EGvWrOHqq6+u9/OmS2iNf+ZIniyE2Jrnk5pj//3h8ceh6i/mktSuhBBaZaiD1NLq+i5XbW/SQHd7 +jKU6OJxCkqSOxWAs1cFgLElSx9KoYBxCOCGE8F4IYVEI4fJa9vcOITwWQpgXQvjfEMLXU16p1MoM +xpIkdSwNBuMQQhbwC2AcMAKYHEI4sEazC4EFMcbDgOOA60MInVNdrNSaDMaSJHUsjekxHgUsjjEu +izFuA+YANW+djECvqte9gHUxxu2pK1NqfQZjSZI6lsYE40Lgg2rrK6q2VfcL4OAQwt+Bt4GLUlOe +lD6FhQZjSZI6klTdfDcOeCvGOAg4HLg5hJCTomNLaWGPsSRJHUtjxgGXAYOrrRdVbatuGnA1QIxx +aQjhb8CBwB5TwpSUlCRfjx49mtGjRzepYKm17LMPVFTAli3QrVu6q5Gk1BoyZEiDE2lIbcGQIUMA +KC0tpbS0tFnHanCCjxBCJ2AhMAZYCbwOTI4xvlutzc3A6hjjzBBCfxKB+NAY4/oax3KCD7Upw4bB +734Hw4enuxJJktQULTLBR4xxB/Ad4DlgATAnxvhuCOGCEML5Vc1+DHwhhDAfeB64rGYoltoih1NI +ktRxNOqRajHGZ4ADamz7r2qvV5IYZyy1KwZjSZI6Dme+k+phMJYkqeMwGEv1MBhLktRxGIylehiM +JUnqOAzGUj0MxpIkdRwGY6keBmNJkjqOBp9jnNKT+RxjtTE7dkD37vDJJ5Cdne5qJElSY7XIc4yl +jqxTJ+jfH1auTHclkiSppRmMpQY4nEKSpI7BYCw1wGAsSVLHYDCWGmAwliSpYzAYSw0wGEuS1DEY +jKUGGIwlSeoYDMZSAwzGkiR1DAZjqQEGY0mSOgYn+JAaUFkJOTmweXPiucaSJCnzOcGH1AK6dIH8 +fPjww3RXIkmSWpLBWGqEwkKHU0iS1N4ZjKVGcJyxJEntn8FYagSDsSRJ7Z/BWGoEg7EkSe2fwVhq +BIOxJEntn8FYagSDsSRJ7Z/BWGoEg7EkSe2fE3xIjbB5M/Tpk/iZ5V8nJUnKeE7wIbWQ7t2hVy9Y +uzbdlUiSpJZiMJYayeEUkiS1bwZjqZEMxpIktW8GY6mRDMaSJLVvBmOpkYqKoKws3VVIkqSWYjCW +GskeY0mS2jeDsdRIBmNJkto3g7HUSAZjSZLaN4Ox1EiFhYlg7Bw1kiS1TwZjqZF69YLsbNiwId2V +SJKklmAwlprA4RSSJLVfBmOpCQzGkiS1XwZjqQkMxpIktV8GY6kJDMaSJLVfBmOpCXY9mUKSJLU/ +BmOpCewxliSp/TIYS01gMJYkqf0yGEtNYDCWJKn9MhhLTdCnD2zfDhUV6a5EkiSlmsFYaoIQEr3G +ZWXprkSSJKWawVhqIodTSJLUPhmMpSYyGEuS1D4ZjKUmMhhLktQ+GYylJjIYS5LUPhmMpSYyGEuS +1D4ZjKUmMhhLktQ+GYylJvJxbZIktU8GY6mJCgpg40bYvDndlUiSpFQyGEtNFAIUFtprLElSe2Mw +lvaC44wlSWp/DMbSXjAYS5LU/hiMpb1gMJYkqf0xGEt7wWAsSVL7YzCW9oLBWJKk9qdRwTiEcEII +4b0QwqIQwuV1tBkdQngrhPBOCOHF1JYpZRaDsSRJ7U/nhhqEELKAXwBjgL8Db4QQfhtjfK9am1zg +ZmBsjLEshFDQUgVLmcBgLElS+9OYHuNRwOIY47IY4zZgDnBqjTZTgF/FGMsAYoxrU1umlFn69YP1 +66GyMt3G3FU+AAATCUlEQVSVSJKkVGlMMC4EPqi2vqJqW3X7A31DCC+GEN4IIUxNVYFSJurUCQYM +gL//Pd2VSJKkVGlwKEUTjnME8GWgJ/CnEMKfYoxLUnR8KePsGk4xdGi6K5EkSanQmGBcBgyutl5U +ta26FcDaGOMWYEsI4ffAocAewbikpCT5evTo0YwePbppFUsZwnHGkiRljtLSUkpLS5t1jBBjrL9B +CJ2AhSRuvlsJvA5MjjG+W63NgcDPgROArsBrwJkxxr/WOFZs6HxSW/Hd78KgQXDJJemuRJIk1RRC +IMYYmvKeBnuMY4w7QgjfAZ4jMSb5zhjjuyGECxK74+0xxvdCCM8C84EdwO01Q7HU3hQVwfLl6a5C +kiSlSoM9xik9mT3GakcefjixPPpouiuRJEk17U2PsTPfSXvJMcaSJLUvBmNpLxmMJUlqXxxKIe2l +bdugZ0/YtAk6p+rBh5IkKSUcSiG1ouxsKCiAVavSXYkkSUoFg7HUDA6nkCSp/TAYS81QVARlNae7 +kSRJbZLBWGoGe4wlSWo/DMZSMxiMJUlqPwzGUjMYjCVJaj8MxlIzGIwlSWo/DMZSMxiMJUlqP5zg +Q2qGLVsgNxc2b4Ys/5opSVLGcIIPqZV165YIxmvWpLsSSZLUXAZjqZkcTiFJUvtgMJaayWAsSVL7 +YDCWmqmw0GAsSVJ7YDCWmskeY0mS2geDsdRMBmNJktoHg7HUTAZjSZLaB4Ox1EwGY0mS2gcn+JCa +aeNG2Gcf2LQJQpMeIy5JklqKE3xIaZCTk5joY/36dFciSZKaw2AspYDDKSRJavsMxlIKGIwlSWr7 +DMZSChiMJUlq+wzGUgoYjCVJavsMxlIKGIwlSWr7DMZSChiMJUlq+wzGUgoUFUFZWbqrkCRJzWEw +llLAHmNJkto+g7GUAr17Q4xQUZHuSiRJ0t4yGEspEIK9xpIktXUGYylFDMaSJLVtBmMpRQzGkiS1 +bQZjKUUMxpIktW0GYylFDMaSJLVtBmMpRQzGkiS1bQZjKUUKCw3GkiS1ZQZjKUXsMZYkqW0zGEsp +kp8PmzbBJ5+kuxJJkrQ3DMZSiuya5KOsLN2VSJKkvWEwllLI4RSSJLVdBmMphQzGkiS1XQZjKYUM +xpIktV0GYymFDMaSJLVdBmMphQzGkiS1XQZjKYUMxpIktV0GYymFDMaSJLVdIcbYeicLIbbm+aTW +tnMndOsGFRWJn5IkKT1CCMQYQ1PeY4+xlEJZWTBoEPz97+muRJIkNZXBWEoxZ7+TJKltMhhLKeY4 +Y0mS2iaDsZRiBmNJktomg7GUYgZjSZLaJoOxlGIGY0mS2iaDsZRiBmNJktomg7GUYgZjSZLaJif4 +kFJs+3bo0QM++QSys9NdjSRJHVOLTfARQjghhPBeCGFRCOHyetqNDCFsCyGc3pQipPakc2fo1w9W +rUp3JZIkqSkaDMYhhCzgF8A4YAQwOYRwYB3t/hN4NtVFSm2NwykkSWp7GtNjPApYHGNcFmPcBswB +Tq2l3b8AjwKrU1if1CYVFhqMJUlqaxoTjAuBD6qtr6jalhRCGAScFmO8FWjSWA6pPbLHWJKktidV +T6W4Aag+9thwrA7NYCxJUtvTuRFtyoDB1daLqrZV9zlgTgghAAXAiSGEbTHGx2oerKSkJPl69OjR +jB49uoklS5mvqAjefDPdVUiS1HGUlpZSWlrarGM0+Li2EEInYCEwBlgJvA5MjjG+W0f7u4DHY4y/ +rmWfj2tTh/DyyzBjBvzhD+muRJKkjmlvHtfWYI9xjHFHCOE7wHMkhl7cGWN8N4RwQWJ3vL3mW5pS +gNQeOZRCkqS2xwk+pBawdSv06gWbN0OnTumuRpKkjqfFJviQ1DRdu0JeHqz24YWSJLUZBmOphTic +QpKktsVgLLUQg7EkSW2LwVhqIQZjSZLaFoOx1EIMxpIktS0GY6mFFBVBWc2pcCRJUsYyGEstxB5j +SZLaFoOx1EIMxpIktS1O8CG1kE2bID8/8TM06fHikiSpuZzgQ8ogPXoklnXr0l2JJElqDIOx1IIc +TiFJUtthMJZakMFYkqS2w2AstSCDsSRJbYfBWGpBBmNJktoOg7HUggzGkiS1HQZjqQUVFhqMJUlq +KwzGUguyx1iSpLbDYCy1oF3B2HltJEnKfAZjqQX17g1ZWfDRR+muRJIkNcRgLLUwh1NIktQ2GIyl +FmYwliSpbTAYSy3MYCxJUttgMJZamMFYkqS2wWAstTCDsSRJbYPBWGphBmNJktoGg7HUwgzGkiS1 +DQZjqYUZjCVJahsMxlILy8uDykrYuDHdlUiSpPoYjKUWFkKi17isLN2VSJKk+hiMpVbgcApJkjKf +wVhqBQZjSZIyn8FYagUGY0mSMp/BWGoFBmNJkjKfwVhqBQZjSZIyn8FYagUGY0mSMp/BWGoFBmNJ +kjKfwVhqBQUF8PHHsGVLuiuRJEl1MRhLrSArCwYOdJIPSZIymcFYaiUOp5AkKbMZjKVWYjCWJCmz +GYylVmIwliQpsxmMpVZiMJYkKbMZjKVWYjCWJCmzGYylVmIwliQpsxmMpVZiMJYkKbOFGGPrnSyE +2JrnkzLJjh3QvTts3AhduqS7GkmS2rcQAjHG0JT32GMstZJOnaB/f1i5Mt2VSJKk2hiMpVbkcApJ +kjKXwVhqRQZjSZIyl8FYakVFRVBWlu4qJElSbQzGUiuyx1iSpMxlMJZakcFYkqTMZTCWWpHBWJKk +zGUwllqRwViSpMzlBB9SK6qshJwc2Lw58VxjSZLUMpzgQ8pwXbpAfj58+GG6K5EkSTUZjKVW5nAK +SZIyk8FYamUGY0mSMlOjgnEI4YQQwnshhEUhhMtr2T8lhPB21fJKCOEfUl+q1D4YjCVJykwNBuMQ +QhbwC2AcMAKYHEI4sEaz94EvxhgPBX4M3JHqQqX2orDQYCxJUiZqTI/xKGBxjHFZjHEbMAc4tXqD +GOOrMcaPqlZfBQpTW6bUfthjLElSZmpMMC4EPqi2voL6g+83gKebU5TUnhmMJUnKTJ1TebAQwnHA +NOCYVB5Xak8MxpIkZabGBOMyYHC19aKqbbsJIRwC3A6cEGMsr+tgJSUlydejR49m9OjRjSxVah8K +C6GsDHbuhCyfCyNJUkqUlpZSWlrarGM0OPNdCKETsBAYA6wEXgcmxxjfrdZmMDAXmBpjfLWeYznz +nQQUFMBf/wr9+qW7EkmS2qe9mfmuwR7jGOOOEMJ3gOdIjEm+M8b4bgjhgsTueDtwJdAXuCWEEIBt +McZRTf8IUsewaziFwViSpMzRYI9xSk9mj7EEwMknw/nnwymnpLsSSZLap73pMXaEo5QG3oAnSVLm +MRhLaWAwliQp8xiMpTQwGEuSlHkMxlIaGIwlSco8BmMpDYqKEs8yliRJmcNgLKVBYWGix9iHtEiS +lDkMxlIa9OoF2dmwYUO6K5EkSbsYjKU0cZyxJEmZxWAspYnBWJKkzGIwltLEYCxJUmYxGEtpYjCW +JCmzGIylNDEYS5KUWQzGUpoYjCVJyiwGYylNDMaSJGUWg7GUJrsm+ZAkSZnBYCylSZ8+sH07VFSk +uxJJkgQGYyltQkgMpygrS3clkiQJDMZSWjnOWJKkzGEwltLIYCxJUuYwGEtpZDCWJClzGIylNDIY +S5KUOQzGUhoZjCVJyhwGYymNDMaSJGUOg7GURgZjSZIyh8FYSqOCAvjkE9i0Kd2VSJIkg7GURiEk +poZ2kg9JktLPYCylmbPfSZKUGQzGUpo5zliSpMxgMJbSzGAsSVJmMBhLaWYwliQpM3ROdwFSR1dU +BHPnprsKZaqtW2HDBujUac8lK+vTnyGku9LmixF27EgsO3d++rrmsm1bYtm+/dPXNdcb83pv3rN9 +e2KJ8dOaq9ef7tfpOB8kvn+7voO7XqdiW2PbVq+r+s/atjVm3968vyF1/Tda33+7e7Mv04+X6QzG +UprZYyxIBOCFC2HBgsTy178mfi5fDr167R4Ma4bGGHcPyXUF6L3ZVn17Y0JrXfsas73656irns6d +E0t2dmKp63V9++p63aNH49rtqm2X6gEgE1639vlqhsTqS2O3Nff91cNyzZ97u29v3l+XusJzfaF6 +b/Zl+vFa23PPNf09BmMpzQzGHUtlZSIA7wq+u5Zly2DYMDj4YBgxAiZNSvzcbz/o0qX+Y+7qaa0t +fDZnW/XtO3c2HFobG7Dbe8+3pLYrxFaM9SGE2Jrnk9qCHTuge3fYuLHhAKS2o7ISFi/ePfwuWAB/ ++xsMHZoIvdWX/ff3z1+SUimEQIyxSX/dNhhLGWDIEHjppURgUtuybVvtAfj99xN/rtXD78EHwwEH +QNeu6a5aktq/vQnGDqWQMsCu4RQG48y1bRssWVJ7AC4u/nQIxFe+Av/+74kA3K1buquWJDWFwVjK +AIWFjjPOJNu3wxtvwAsvwP/+byIAL1mS+HPa1ft76qlwxRWJANy9e7orliSlgsFYygDegJd+H3wA +zz6bWObOTfyZHH88/L//B5ddBgcemHhqgSSp/TIYSxmgqCjxWC61ns2b4fe//zQMf/gh/NM/JYLw +jTfCoEHprlCS1NoMxlIGKCqCP/4x3VW0bzHCu+8mQvAzzySu96GHwrhxcNdd8I//mHhkmCSp4zIY +SxnAoRQto7wcfve7T3uFs7ISQfiCC+Chh6BPn3RXKEnKJAZjKQMYjFNjx47ETXPPPJMIwgsWwDHH +JMLwJZckbpRzAglJUl18jrGUAbZtg549YdOmxNSzarwVK3a/aW7QIDjhhEQYPuYYH5kmSR2VE3xI +bdigQfD664neY9Vt82Z4+eVPe4VXrUo8PWLcOBg7NvFINUmSnOBDasN2DacwGO+u+k1zzz4Lf/gD +HHKIN81JklLPYCxliI48zjhG2LIlMZTkk08Sy4IFn4bhEBJB+LzzYM4cb5qTJLUMg7GUITI5GMcI +lZWJwFo9vDZ3fdfrTZsgOzsxzrpHj8TPYcMSYfi7301MruFNc5KklmYwljJEcTHceiuUlibWY0ws +1V83tJ6Ktjt3Jsbx1gyvWVm7B9ddS13rubmJcdONad+jhzcdSpLSz5vvpAyxfn0iFIfw6QK1v07V +vtraZmVB9+57Btns7Bb9+JIkpZRPpZAkSZJoI0+lCA4UlCRJUgZq9WBsj7EkSZJa2t50xma1QB2S +JElSm2MwliRJkjAYS5IkSYDBWJIkSQIMxpIkSRJgMJYkSZKARgbjEMIJIYT3QgiLQgiX19HmphDC +4hDCvBDCYaktU5IkSWpZDQbjEEIW8AtgHDACmBxCOLBGmxOB4THG/YALgNtaoFapRZWWlqa7BKlW +fjeVqfxuqr1pTI/xKGBxjHFZjHEbMAc4tUabU4F7AWKMrwG5IYT+Ka1UamH+glem8rupTOV3U+1N +Y4JxIfBBtfUVVdvqa1NWSxtJkiQpY3nznSRJkgSEGGP9DUL4PFASYzyhan0GEGOM11RrcxvwYozx +oar194AvxRg/rHGs+k8mSZIkpUiMMTSlfedGtHkD+EwIYQiwEpgETK7R5jHgQuChqiC9oWYo3pvi +JEmSpNbSYDCOMe4IIXwHeI7E0Is7Y4zvhhAuSOyOt8cYnwohnBRCWAJ8Akxr2bIlSZKk1GpwKIUk +SZLUEbTazXeNmSRESocQwv+FEN4OIbwVQng93fWoYwsh3BlC+DCEML/atrwQwnMhhIUhhGdDCLnp +rFEdUx3fzR+EEFaEEP5StZyQzhrVMYUQikIIL4QQFoQQ/jeE8K9V25v8u7NVgnFjJgmR0mgnMDrG +eHiMcVS6i1GHdxeJ35XVzQB+F2M8AHgB+LdWr0qq/bsJ8NMY4xFVyzOtXZQEbAe+G2McARwFXFiV +M5v8u7O1eowbM0mIlC4BH12oDBFjfAUor7H5VOCeqtf3AKe1alESdX43IfE7VEqbGOOqGOO8qtcb +gXeBIvbid2drhYHGTBIipUsEng8hvBFCOC/dxUi16LfrST8xxlVAvzTXI1X3nRDCvBDCfzvMR+kW +QhgKHAa8CvRv6u9Oe8kkODrGeARwEol/fjkm3QVJDfCuaWWKW4B9Y4yHAauAn6a5HnVgIYQc4FHg +oqqe45q/Kxv83dlawbgMGFxtvahqm5R2McaVVT/XAL8hMfRHyiQfhhD6A4QQBgCr01yPBCR+b8ZP +H291BzAynfWo4wohdCYRiu+LMf62anOTf3e2VjBOThISQuhCYpKQx1rp3FKdQgg9qv6GSQihJzAW +eCe9VUkEdh+3+Rjw9arXXwN+W/MNUivZ7btZFTZ2OR1/fyp9fgn8NcZ4Y7VtTf7d2WrPMa56hMuN +fDpJyH+2yomleoQQhpHoJY4kJrx5wO+m0imEMBsYDeQDHwI/AP4HeAQoBpYBE2OMG9JVozqmOr6b +x5EYz7kT+D/ggtpmvpVaUgjhaOD3wP+S+P95BK4AXgcepgm/O53gQ5IkScKb7yRJkiTAYCxJkiQB +BmNJkiQJMBhLkiRJgMFYkiRJAgzGkiRJEmAwliRJkgCDsSRJkgTA/wdTBj79QR260QAAAABJRU5E +rkJggg== +" +> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>**sigh...** After all the effort and computational power, we're still at square one: we have yet to beat out the naive guess threshold. With PCA in play we end up performing terribly, but not terribly enough that we can guess against ourselves.</p> +<p>Let's try one last-ditch attempt using the entire data set:</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[8]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">evaluate_gnb_full</span><span class="p">(</span><span class="n">dims</span><span class="p">):</span> + <span class="n">pca</span> <span class="o">=</span> <span class="n">PCA</span><span class="p">(</span><span class="n">n_components</span><span class="o">=</span><span class="n">dims</span><span class="p">)</span> + <span class="n">X_xform</span> <span class="o">=</span> <span class="n">pca</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span> + + <span class="n">gnb</span> <span class="o">=</span> <span class="n">GaussianNB</span><span class="p">()</span> + <span class="n">gnb</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> + <span class="k">return</span> <span class="n">gnb</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> + +<span class="n">dim_range</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">21</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Gaussian NB Accuracy&quot;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Inverse Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">legend</span><span class="p">();</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt"></div> + + +<div class="output_png output_subarea "> +<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAsYAAAFwCAYAAAC7E71AAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz +AAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8VfWd//HXNxDWQAiJbElYpK503KZgrdpiGUH9iVor +CCi2OFXb2hmxdWHsaEOXcURt1dZldKw74tJ26r4UjdW2bq3ISJXNDkgKsgUjsoTl+/vjhmsIWclN +7k3yej4e55F7zvnecz73cI1vvnzP+YYYI5IkSVJHl5XuAiRJkqRMYDCWJEmSMBhLkiRJgMFYkiRJ +AgzGkiRJEmAwliRJkoBGBOMQwp0hhA9DCPPraXNTCGFxCGFeCOGw1JYoSZIktbzG9BjfBYyra2cI +4URgeIxxP+AC4LYU1SZJkiS1mgaDcYzxFaC8nianAvdWtX0NyA0h9E9NeZIkSVLrSMUY40Lgg2rr +ZVXbJEmSpDbDm+8kSZIkoHMKjlEGFFdbL6ratocQQkzB+SRJkqQGxRhDU9o3NhiHqqU2jwEXAg+F +ED4PbIgxflhPgU2pT2o1JSUllJSUpLsMaQ9+N5Wp/G4qk4XQpEwMNCIYhxBmA6OB/BDCcuAHQBcg +xhhvjzE+FUI4KYSwBPgEmNbkKiRJkqQ0azAYxxinNKLNd1JTjiRJkpQe3nwnVRk9enS6S5Bq5XdT +mcrvptqb0JpjfkMI0THGkiRJamkhhBa7+U6SJLUjQ4cOZdmyZekuQ2q2IUOG8H//938pOZY9xpIk +dUBVvWnpLkNqtrq+y3vTY+wYY0mSJAmDsSRJkgQYjCVJkiTAYCxJktSgk046ifvuuy/dZaiFdbib +7371K1i/Pq0l7CaE+pesrIbbNHcRdOsGRx+d7iokqfVk+s13c+bM4YYbbuCdd94hJyeHYcOGcc45 +5/Ctb30r3aWl1D333MO0adOYNWsWl1xySXJ7cXExDzzwAF/84heZOXMmP/nJT+jWrVty349+9CNO +P/30eo9dWlrKl7/8Za655houvfTSFv0c6ZTKm++IMbbaAkQXFxcXFxeXzFgy1XXXXRcHDBgQf/3r +X8eNGzfGGGOcN29ePPvss2NlZWWaq0utu+++O+bn58d99tkn+VljjLGoqCi+9NJLMcYYS0pK4tSp +U5P7nn322di9e/e4evXqeo89bdq0WFBQED/72c+2TPH12L59e6udq4HveJOyaqsPpWhqgS4urbEc +fXTk979Pfx0uLi4urbVkqoqKCn7wgx9w66238pWvfIWePXsCcOihh3LfffeRnZ0NwFNPPcURRxxB +bm4uQ4YMYebMmcljvPTSSxQXF+923GHDhvHCCy8A8MYbbzBy5Ehyc3MZOHBgsqd269atTJ06lYKC +AvLy8jjyyCNZs2YNAMcddxy//OUvAXj//fcZM2YMBQUF9OvXj7PPPpuKiordznX99ddz6KGHkpeX +x+TJk6msrKzzMx900EEcddRRXH/99Y26RmPHjqVXr14sXbq0zjabNm3i0Ucf5eabb2bx4sX85S9/ +2W3/K6+8wtFHH01eXh5Dhgzh3nvvBWDLli1873vfY+jQoeTl5fHFL36RrVu3NnhNZ86cyYQJE5g6 +dSp9+vThnnvu4Y033uALX/gCeXl5FBYW8i//8i9s3749+f4FCxYwduxY8vPzGThwIP/5n//Jhx9+ +SM+ePSkvL0+2+8tf/kK/fv3YsWNHnZ83Vd9xxxhLQFERrFiR7iokSX/605+orKzklFNOqbddTk4O +9913Hx999BFPPvkkt912G4899lhyf6hnrOBFF13E9OnT+eijj1i6dCkTJ04EEsMaKioqKCsrY/36 +9dx222107959j/fHGLniiitYtWoV7777LitWrKCkpGS3No888gjPPfccf/vb33j77be5++6766wn +hMCPfvQjbrjhBjZs2FDv5wZ48skn2bZtGwcffHCdbX71q1/Rq1cvJkyYwNixY7nnnnuS+5YvX85J +J53ERRddxNq1a5k3bx6HHXYYAN/73vd46623ePXVV1m/fj2zZs0iKysrWWd9HnvsMSZOnMiGDRs4 +66yz6Ny5MzfccAPr16/nT3/6Ey+88AK33HILABs3buT444/npJNOYuXKlSxZsoQxY8bQv39/jjvu +OB5++OHkce+//34mT55Mp06dGrw2zWUwljAYS1KmWLt2LQUFBckwBiR7Nnv06MErr7wCwBe/+EVG +jBgBwGc/+1kmTZrESy+91KhzdOnShSVLlrBu3Tp69OjBqFGjAMjOzmbdunUsWrSIEAKHH344OTk5 +e7x/+PDhjBkzhs6dO5Ofn8/FF1+8x7kvuugi+vfvT58+fRg/fjzz5s2rt6ZDDjmE448/nmuuuabW +/Q899BB9+/YlJyeH0047jSuuuILevXvXebx7772XSZMmEUJgypQpzJkzJ9njOnv2bI4//ngmTpxI +p06dyMvL45BDDiHGyF133cVNN93EgAEDCCHw+c9/PtlL35CjjjqK8ePHA9C1a1cOP/xwRo0aRQiB +wYMHc/755yev0xNPPMHAgQOZPn06Xbp0oWfPnowcORKAc845J3mj486dO3nwwQeZOnVqo2poLoOx +hMFYkmqTjhu88/PzWbt2LTt37kxu+8Mf/kB5eTkFBQXJ7a+99hpf/vKX6devH3369OG//uu/WLt2 +baPOceedd7Jw4UIOPPBAjjzySJ588kkApk6dyrhx45g0aRJFRUVcfvnltf7z/erVq5k8eTJFRUX0 +6dOHs88+e49z9+/fP/m6R48ebNy4scG6fvjDH3LrrbeyevXqPfadeeaZrF+/no0bN7J06VLuuece +7rjjjlqPs2LFCl588UWmTJkCwCmnnMLmzZuTn/ODDz5g+PDhe7xv7dq1bN26lX333bfBWmtTc6jF +4sWLGT9+PAMHDqRPnz58//vfT16numoAOPXUU3n33XdZtmwZzz33HH369OFzn/vcXtXUVAZjCYOx +JNUmxuYvTXXUUUfRtWtXfvvb39ZSz6cHPOusszjttNMoKytjw4YNXHDBBcn9PXv2ZNOmTcm2O3bs +SI4VhkSP7+zZs1mzZg2XXXYZZ5xxBps3b6Zz585ceeWVLFiwgD/+8Y888cQTybG31V1xxRVkZWWx +YMECNmzYwP3335+ScdsHHHAAp59+Oj/5yU/qHbYwePBgTjzxRB5//PFa9997773EGJOhdPjw4Wzd +ujU5nKK4uJglS5bs8b6CggK6detW69jlhq4p7DnU4lvf+hYHHXQQS5cuZcOGDfzkJz9JXqfi4uI6 +x0h37dqViRMnct9993H//fe3Wm8xGIwlwGAsSZkiNzeXq666im9/+9v86le/YuPGjcQYmTdv3m7B +bOPGjeTl5ZGdnc3rr7/O7Nmzk/v2339/tmzZwtNPP8327dv58Y9/vNvNbw888ECy5zI3N5cQAllZ +WZSWlvLOO++wc+dOcnJyyM7OrnVc68cff0xOTg69evWirKyMa6+9NmWf/6qrruKuu+7aY6xx9eC9 +YsUKnnnmGT772c/Weox7772XkpIS5s2bx9tvv83bb7/No48+ypNPPkl5eTlnnXUWc+fO5dFHH2XH +jh2sX7+et99+mxAC06ZN47vf/S4rV65k586dvPrqq2zbtq3Ba1qbjz/+mN69e9OjRw/ee+89br31 +1uS+k08+mVWrVnHTTTdRWVnJxo0bef3115P7p06dyt13383jjz9uMJZam8FYkjLHpZdeyk9/+lNm +zZrFgAEDGDBgAN/61reYNWsWX/jCFwC45ZZbuPLKK8nNzeXHP/4xZ555ZvL9vXv35pZbbuGf//mf +KSoqolevXhQVFSX3P/PMM4wYMYLevXtz8cUX89BDD9G1a1dWrVrFGWecQW5uLiNGjOC4447j7LPP +BnbvDf3BD37An//85+T44a9+9au71d/QTWr1GTp0KFOnTuWTTz7ZbfvDDz9M79696d27N0ceeSTH +HnssV1111R7vf+2111i+fDnf/va36devX3IZP348++23Hw8++CDFxcU89dRTXHfddfTt25fDDz+c ++fPnA3DdddfxD//wD4wcOZL8/HxmzJjBzp07G7ymtbnuuut44IEH6N27NxdccAGTJk1K7svJyeH5 +55/nscceY8CAAey///6UlpYm93/hC18gKyuLI444Yo8hGi2pw03wIdVm+3bo0QM++QQaeY+BJLVp +mT7BhzRmzBjOOusszj333HrbpXKCD3uMJaBzZ9hnH1i1Kt2VSJKkN954g7feemu3fwloDQZjqYrD +KSRJSr+vf/3rjB07lhtvvDE5wUtr6dyqZ5MymMFYkqT0q28ylJZmj7FUxWAsSVLHZjCWqhiMJUnq +2AzGUhWDsSRJHZvBWKpiMJYkqWMzGEtVDMaSJHVsBmOpyqBBsHIl7NiR7kokSc1x9dVXc/7556e7 +DLVBBmOpSteukJcHq1enuxJJ6tiGDh1K//792bx5c3LbnXfeyXHHHdeo9//bv/0bt99+e4vU9vzz +z/PlL3+Z3r17s88++3DEEUdw7bXXUllZ2SLnU+syGEvVOJxCktIvhMDOnTu54YYb9tieTo888ggT +Jkzg7LPPZvny5axZs4aHHnqIFStW8MEHH6S1NqWGwViqpqgIysrSXYUk6dJLL+X666+noqKi1v3T +p09n8ODB5ObmMnLkSF555ZXkvpkzZ3LOOecAcNJJJ3HLLbfs9t7DDjuM//mf/wHgvffeY+zYseTn +53PQQQfxyCOP1FnT9773PUpKSjj33HPp06cPAPvttx833ngjw4cPB2DatGlcddVVyfe89NJLFBcX +J9dXrlzJGWecQb9+/Rg+fDg///nPk/veeOMNRo4cSW5uLgMHDuSSSy4BYOvWrUydOpWCggLy8vI4 +8sgjWbNmTcMXUU1mMJaqscdYkjLD5z73OUaPHs21115b6/5Ro0Yxf/58ysvLmTJlChMmTKh1OMPk +yZOZPXt2cv2vf/0ry5cv5+STT2bTpk2MHTuWs88+m7Vr1zJnzhwuvPBC3nvvvT2Os3DhQsrKyjj9 +9NOb/Fl29XTHGBk/fjyHH344K1euZO7cudx44408//zzAFx00UVMnz6djz76iKVLlzJx4kQA7rnn +HioqKigrK2P9+vXcdtttdO/evcl1qGEGY6kag7EkZY6ZM2fyi1/8gnXr1u2xb8qUKfTp04esrCwu +vvhitm7dysKFC/do95WvfIW33347OdRh9uzZnH766XTu3JknnniCYcOGcc455xBC4NBDD+X000+v +tdd47dq1AAwYMCC5bfLkyeTl5dGzZ08eeOCBBj/P66+/ztq1a/n+979Pp06dGDp0KN/4xjeYM2cO +ANnZ2SxZsoR169bRo0cPRo0aldy+bt06Fi1aRAiBww8/nJycnEZcQTWVwViqxmAsSZ8KITR7aY4R +I0Zw8sknc/XVV++x77rrruPggw8mLy+PvLw8KioqkuG1upycHE466aRk+HzwwQc5++yzAVi2bBmv +vvoqffv2pW/fvuTl5TF79mxWrVq1x3Hy8/OBxFCIXR588EHKy8s54ogj2NGIRxotX76csrKy3c53 +9dVXs7rqru9f/vKXLFy4kAMPPJAjjzySJ598EoCpU6cybtw4Jk2aRFFRETNmzGjU+dR0BmOpGoOx +JH0qxtjspblKSkq44447KKt2A8jLL7/Mtddey6OPPkp5eTnl5eX07t27zvPtGk7x6quvsnXrVkaP +Hg1AcXExo0ePZv369axfv57y8nIqKiq4+eab9zjGAQccQGFhIb/+9a/rrbdnz55s2rQpuV49SBcX +F7Pvvvvudr6PPvqIxx9/HIDhw4cze/Zs1qxZw2WXXcYZZ5zB5s2b6dy5M1deeSULFizgj3/8I48/ +/jj33ntvo6+hGs9gLFVjMJakzDJ8+HDOPPNMbrrppuS2jRs3kp2dTX5+PpWVlfzwhz/k448/rvMY +J510EsuWLeOqq67izDPPTG4/+eSTWbRoEffffz/bt29n27ZtvPnmm7WOMQ4hcN111zFz5kzuvPNO +NmzYAMDixYv58MMPk+0OO+wwnnrqKcrLy1m1ahU33nhjct+oUaPo1asXs2bNYsuWLezYsYMFCxbw +5ptvAvDAAw8ke71zc3MJIZCVlUVpaSnvvPMOO3fuJCcnh+zsbLKyjHAtwasqVVNYmHgqRQo6OSRJ +e6nmEIyrrrqKTZs2JbePGzeOcePGsf/++zNs2DB69Oix25MfaurSpQunn346c+fOZcqUKcntOTk5 +PPfcc8yZM4dBgwYxaNAgZsyYUecziSdOnMjDDz/Mfffdx+DBg9lnn32YNGkS3/zmN5kwYQKQGPZw +yCGHMHToUE444QQmTZqUfH9WVhZPPPEE8+bNY9iwYfTr14/zzjsv+eSNZ555hhEjRtC7d28uvvhi +HnroIbp27cqqVas444wzyM3NZcSIERx33HFMnTp17y6u6hVS8c8cjT5ZCLE1zyftjfx8WLgQCgrS +XYkktZwQQkqGOkjpVtd3uWp7kwa622Ms1eBwCkmSOiaDsVSDwViSpI7JYCzVUFhoMJYkqSMyGEs1 +2GMsSVLHZDCWajAYS5LUMRmMpRoMxpIkdUwGY6kGg7EkSR2TwViqYVcw9vGekiR1LAZjqYbevSEr +Cz76KN2VSJI6oldeeYWDDjoo3WV0SAZjqRYOp5Ck9Bk2bBgvvPBCusvYa1//+tfJysrizTffTG5b +unQpWVmNi13HHHMM7777bovUtmTJEiZPnky/fv3o06cPBxxwABdddBF///vfW+R8bY3BWKqFwViS +OpYdO3ak7FghBPLz8/n3f//3Pban05IlSzjyyCMpKipi3rx5bNiwgT/84Q8MHz6cV155Ja21ZQqD +sVQLg7EkZYZ77rmHY489lksvvZS+ffsyfPhwnn32WQAefvhhRo4cuVv7n/3sZ5x22mkAVFZWcskl +lzBkyBAGDhzIt7/9bbZu3QrASy+9RHFxMbNmzWLgwIGce+65rFu3jvHjx5OXl0d+fj5f+tKXksdd +uXIlZ5xxBv369WP48OH8/Oc/r7fur33ta8yfP5+XX3651v133303Bx98ML179+Yzn/kMt99+e3Lf +rtoAZs2axYQJE3Z770UXXcT06dMBqKio4Bvf+AaDBg2iuLiYK6+8kljHTTIzZ87kmGOO4dprr2XQ +oEEAFBQU8K//+q9MnDhxt+tdXVZWFu+//36D17S+63fNNddQVFRE7969Oeigg3jxxRfrvX7pYjCW +amEwlqTM8frrr3PQQQexbt06Lr30Us4991wAxo8fz6JFi1i6dGmy7YMPPshZZ50FwOWXX86SJUuY +P38+S5YsoaysjB/+8IfJtqtWrWLDhg0sX76c22+/neuvv57i4mLWrVvH6tWr+Y//+A8AYoyMHz+e +ww8/nJUrVzJ37lxuvPFGnn/++Tpr7tGjB1dccQVXXHFFrfv79+/PU089RUVFBXfddRcXX3wx8+bN +S+7f1bs8adIknn76aT755BMAdu7cySOPPJL8jF/72tfo0qUL77//Pm+99RbPP/88//3f/13rOX/3 +u9/x1a9+tf6LzZ4929XX67umdV2/RYsWcfPNN/PnP/+ZiooKnn32WYYOHdpgHelgMJZqYTCWpMwx +ZMgQzj33XEIIfO1rX2PlypWsXr2a7t27c8opp/Dggw8CsHjxYhYuXMgpp5wCwB133MHPfvYzcnNz +6dmzJzNmzEi2BejUqRMzZ84kOzubrl27kp2dzcqVK/nb3/5Gp06dOProowF44403WLt2Ld///vfp +1KkTQ4cO5Rvf+AZz5sypt+7zzz+f5cuXJ3u4qzvxxBOT4fDYY49l7NixtfYuDx48mCOOOILf/OY3 +AMydO5eePXsycuRIPvzwQ55++ml+9rOf0a1bNwoKCpg+ffpun7G6tWvXMmDAgOT6zTffTF5eHr16 +9eKCCy6o83NU74Gu75rWdf06depEZWUl77zzDtu3b2fw4MEMGzas3muXLgZjqRZFRVBWlu4qJCm9 +QgjNXlKhepjr3r07ABs3bgRgypQpyWA2e/ZsTjvtNLp27cqaNWvYtGkT//iP/0jfvn3p27cvJ554 +IuvWrUsea5999iE7Ozu5ftlllzF8+HDGjh3LZz7zGa655hoAli1bRllZWfI4eXl5XH311axevbre +urt06cKVV17JlVdeuce+p59+mqOOOor8/Hzy8vJ4+umnWbt2ba3HmTx5cvIzPvjgg0yZMgWA5cuX +s23bNgYOHJis65vf/Gadx8nPz2flypXJ9QsvvJDy8nKmT5/Otm3b6v0sQIPX9NJLL631+g0fPpwb +briBkpIS+vfvz5QpU3arI5MYjKVa2GMsSYmewuYuLe34449nzZo1vP3228yZMycZGgsKCujRowcL +Fixg/fr1rF+/ng0bNvBRtWdx1gzuPXv25LrrrmPp0qU89thj/PSnP+XFF1+kuLiYfffdN3mc8vJy +PvroIx5//PEG65s2bRobNmzg17/+dXJbZWUlZ5xxBpdddhlr1qyhvLycE088sc7rNWHCBEpLSykr +K+M3v/lN8jMWFxfTrVs31q1bl6xrw4YNzJ8/v9bjjBkzZrc6atOzZ082bdqUXF+1alXydUPXNCcn +p9brB4khIS+//DLLli0DYMaMGQ1durQwGEu1MBhLUtvQuXNnJkyYwKWXXkp5eTnHH388kAi95513 +HtOnT2fNmjUAlJWV8dxzz9V5rCeffDI5XrlXr1507tyZrKwsRo0aRa9evZg1axZbtmxhx44dLFiw +YLfHsdWlU6dOlJSUJHtPIRGMKysrKSgoICsri6effrreugoKCvjSl77EtGnT2HfffTnggAOARE/6 +2LFjufjii/n444+JMfL+++/z+9//vtbjlJSU8PLLL3PJJZckH8+2du3a3R4Nd+ihh7JgwQLmz5/P +1q1bmTlzZvIvEA1d07qu36JFi3jxxReprKykS5cudO/evdGPrmttmVmVlGZ5ebB1K1T9S50kqRU1 +NASj5v7Jkyczd+5cJk6cuFvguuaaa/jMZz7D5z//efr06cPYsWNZtGhRncddvHgx//RP/0SvXr04 ++uijufDCC/nSl75EVlYWTzzxBPPmzWPYsGH069eP8847j4qKikbXN3DgwOT2nJwcbrrpJiZMmEDf +vn2ZM2cOp556ar2fecqUKcydOzd5090u9957L5WVlRx88MH07duXCRMm7NbLW91+++3Ha6+9xgcf +fMChhx5Kbm4uxx57LIWFhfzoRz9KtrnqqqsYM2YM+++//x5PqKjvmtZ1/bZu3cqMGTPYZ599GDRo +EGvWrOHqq6+u9/OmS2iNf+ZIniyE2Jrnk5pj//3h8ceh6i/mktSuhBBaZaiD1NLq+i5XbW/SQHd7 +jKU6OJxCkqSOxWAs1cFgLElSx9KoYBxCOCGE8F4IYVEI4fJa9vcOITwWQpgXQvjfEMLXU16p1MoM +xpIkdSwNBuMQQhbwC2AcMAKYHEI4sEazC4EFMcbDgOOA60MInVNdrNSaDMaSJHUsjekxHgUsjjEu +izFuA+YANW+djECvqte9gHUxxu2pK1NqfQZjSZI6lsYE40Lgg2rrK6q2VfcL4OAQwt+Bt4GLUlOe +lD6FhQZjSZI6klTdfDcOeCvGOAg4HLg5hJCTomNLaWGPsSRJHUtjxgGXAYOrrRdVbatuGnA1QIxx +aQjhb8CBwB5TwpSUlCRfjx49mtGjRzepYKm17LMPVFTAli3QrVu6q5Gk1BoyZEiDE2lIbcGQIUMA +KC0tpbS0tFnHanCCjxBCJ2AhMAZYCbwOTI4xvlutzc3A6hjjzBBCfxKB+NAY4/oax3KCD7Upw4bB +734Hw4enuxJJktQULTLBR4xxB/Ad4DlgATAnxvhuCOGCEML5Vc1+DHwhhDAfeB64rGYoltoih1NI +ktRxNOqRajHGZ4ADamz7r2qvV5IYZyy1KwZjSZI6Dme+k+phMJYkqeMwGEv1MBhLktRxGIylehiM +JUnqOAzGUj0MxpIkdRwGY6keBmNJkjqOBp9jnNKT+RxjtTE7dkD37vDJJ5Cdne5qJElSY7XIc4yl +jqxTJ+jfH1auTHclkiSppRmMpQY4nEKSpI7BYCw1wGAsSVLHYDCWGmAwliSpYzAYSw0wGEuS1DEY +jKUGGIwlSeoYDMZSAwzGkiR1DAZjqQEGY0mSOgYn+JAaUFkJOTmweXPiucaSJCnzOcGH1AK6dIH8 +fPjww3RXIkmSWpLBWGqEwkKHU0iS1N4ZjKVGcJyxJEntn8FYagSDsSRJ7Z/BWGoEg7EkSe2fwVhq +BIOxJEntn8FYagSDsSRJ7Z/BWGoEg7EkSe2fE3xIjbB5M/Tpk/iZ5V8nJUnKeE7wIbWQ7t2hVy9Y +uzbdlUiSpJZiMJYayeEUkiS1bwZjqZEMxpIktW8GY6mRDMaSJLVvBmOpkYqKoKws3VVIkqSWYjCW +GskeY0mS2jeDsdRIBmNJkto3g7HUSAZjSZLaN4Ox1EiFhYlg7Bw1kiS1TwZjqZF69YLsbNiwId2V +SJKklmAwlprA4RSSJLVfBmOpCQzGkiS1XwZjqQkMxpIktV8GY6kJDMaSJLVfBmOpCXY9mUKSJLU/ +BmOpCewxliSp/TIYS01gMJYkqf0yGEtNYDCWJKn9MhhLTdCnD2zfDhUV6a5EkiSlmsFYaoIQEr3G +ZWXprkSSJKWawVhqIodTSJLUPhmMpSYyGEuS1D4ZjKUmMhhLktQ+GYylJjIYS5LUPhmMpSYyGEuS +1D4ZjKUmMhhLktQ+GYylJvJxbZIktU8GY6mJCgpg40bYvDndlUiSpFQyGEtNFAIUFtprLElSe2Mw +lvaC44wlSWp/DMbSXjAYS5LU/hiMpb1gMJYkqf0xGEt7wWAsSVL7YzCW9oLBWJKk9qdRwTiEcEII +4b0QwqIQwuV1tBkdQngrhPBOCOHF1JYpZRaDsSRJ7U/nhhqEELKAXwBjgL8Db4QQfhtjfK9am1zg +ZmBsjLEshFDQUgVLmcBgLElS+9OYHuNRwOIY47IY4zZgDnBqjTZTgF/FGMsAYoxrU1umlFn69YP1 +66GyMt3G3FU+AAATCUlEQVSVSJKkVGlMMC4EPqi2vqJqW3X7A31DCC+GEN4IIUxNVYFSJurUCQYM +gL//Pd2VSJKkVGlwKEUTjnME8GWgJ/CnEMKfYoxLUnR8KePsGk4xdGi6K5EkSanQmGBcBgyutl5U +ta26FcDaGOMWYEsI4ffAocAewbikpCT5evTo0YwePbppFUsZwnHGkiRljtLSUkpLS5t1jBBjrL9B +CJ2AhSRuvlsJvA5MjjG+W63NgcDPgROArsBrwJkxxr/WOFZs6HxSW/Hd78KgQXDJJemuRJIk1RRC +IMYYmvKeBnuMY4w7QgjfAZ4jMSb5zhjjuyGECxK74+0xxvdCCM8C84EdwO01Q7HU3hQVwfLl6a5C +kiSlSoM9xik9mT3GakcefjixPPpouiuRJEk17U2PsTPfSXvJMcaSJLUvBmNpLxmMJUlqXxxKIe2l +bdugZ0/YtAk6p+rBh5IkKSUcSiG1ouxsKCiAVavSXYkkSUoFg7HUDA6nkCSp/TAYS81QVARlNae7 +kSRJbZLBWGoGe4wlSWo/DMZSMxiMJUlqPwzGUjMYjCVJaj8MxlIzGIwlSWo/DMZSMxiMJUlqP5zg +Q2qGLVsgNxc2b4Ys/5opSVLGcIIPqZV165YIxmvWpLsSSZLUXAZjqZkcTiFJUvtgMJaayWAsSVL7 +YDCWmqmw0GAsSVJ7YDCWmskeY0mS2geDsdRMBmNJktoHg7HUTAZjSZLaB4Ox1EwGY0mS2gcn+JCa +aeNG2Gcf2LQJQpMeIy5JklqKE3xIaZCTk5joY/36dFciSZKaw2AspYDDKSRJavsMxlIKGIwlSWr7 +DMZSChiMJUlq+wzGUgoYjCVJavsMxlIKGIwlSWr7DMZSChiMJUlq+wzGUgoUFUFZWbqrkCRJzWEw +llLAHmNJkto+g7GUAr17Q4xQUZHuSiRJ0t4yGEspEIK9xpIktXUGYylFDMaSJLVtBmMpRQzGkiS1 +bQZjKUUMxpIktW0GYylFDMaSJLVtBmMpRQzGkiS1bQZjKUUKCw3GkiS1ZQZjKUXsMZYkqW0zGEsp +kp8PmzbBJ5+kuxJJkrQ3DMZSiuya5KOsLN2VSJKkvWEwllLI4RSSJLVdBmMphQzGkiS1XQZjKYUM +xpIktV0GYymFDMaSJLVdBmMphQzGkiS1XQZjKYUMxpIktV0GYymFDMaSJLVdIcbYeicLIbbm+aTW +tnMndOsGFRWJn5IkKT1CCMQYQ1PeY4+xlEJZWTBoEPz97+muRJIkNZXBWEoxZ7+TJKltMhhLKeY4 +Y0mS2iaDsZRiBmNJktomg7GUYgZjSZLaJoOxlGIGY0mS2iaDsZRiBmNJktomg7GUYgZjSZLaJif4 +kFJs+3bo0QM++QSys9NdjSRJHVOLTfARQjghhPBeCGFRCOHyetqNDCFsCyGc3pQipPakc2fo1w9W +rUp3JZIkqSkaDMYhhCzgF8A4YAQwOYRwYB3t/hN4NtVFSm2NwykkSWp7GtNjPApYHGNcFmPcBswB +Tq2l3b8AjwKrU1if1CYVFhqMJUlqaxoTjAuBD6qtr6jalhRCGAScFmO8FWjSWA6pPbLHWJKktidV +T6W4Aag+9thwrA7NYCxJUtvTuRFtyoDB1daLqrZV9zlgTgghAAXAiSGEbTHGx2oerKSkJPl69OjR +jB49uoklS5mvqAjefDPdVUiS1HGUlpZSWlrarGM0+Li2EEInYCEwBlgJvA5MjjG+W0f7u4DHY4y/ +rmWfj2tTh/DyyzBjBvzhD+muRJKkjmlvHtfWYI9xjHFHCOE7wHMkhl7cGWN8N4RwQWJ3vL3mW5pS +gNQeOZRCkqS2xwk+pBawdSv06gWbN0OnTumuRpKkjqfFJviQ1DRdu0JeHqz24YWSJLUZBmOphTic +QpKktsVgLLUQg7EkSW2LwVhqIQZjSZLaFoOx1EIMxpIktS0GY6mFFBVBWc2pcCRJUsYyGEstxB5j +SZLaFoOx1EIMxpIktS1O8CG1kE2bID8/8TM06fHikiSpuZzgQ8ogPXoklnXr0l2JJElqDIOx1IIc +TiFJUtthMJZakMFYkqS2w2AstSCDsSRJbYfBWGpBBmNJktoOg7HUggzGkiS1HQZjqQUVFhqMJUlq +KwzGUguyx1iSpLbDYCy1oF3B2HltJEnKfAZjqQX17g1ZWfDRR+muRJIkNcRgLLUwh1NIktQ2GIyl +FmYwliSpbTAYSy3MYCxJUttgMJZamMFYkqS2wWAstTCDsSRJbYPBWGphBmNJktoGg7HUwgzGkiS1 +DQZjqYUZjCVJahsMxlILy8uDykrYuDHdlUiSpPoYjKUWFkKi17isLN2VSJKk+hiMpVbgcApJkjKf +wVhqBQZjSZIyn8FYagUGY0mSMp/BWGoFBmNJkjKfwVhqBQZjSZIyn8FYagUGY0mSMp/BWGoFBmNJ +kjKfwVhqBQUF8PHHsGVLuiuRJEl1MRhLrSArCwYOdJIPSZIymcFYaiUOp5AkKbMZjKVWYjCWJCmz +GYylVmIwliQpsxmMpVZiMJYkKbMZjKVWYjCWJCmzGYylVmIwliQpsxmMpVZiMJYkKbOFGGPrnSyE +2JrnkzLJjh3QvTts3AhduqS7GkmS2rcQAjHG0JT32GMstZJOnaB/f1i5Mt2VSJKk2hiMpVbkcApJ +kjKXwVhqRQZjSZIyl8FYakVFRVBWlu4qJElSbQzGUiuyx1iSpMxlMJZakcFYkqTMZTCWWpHBWJKk +zGUwllqRwViSpMzlBB9SK6qshJwc2Lw58VxjSZLUMpzgQ8pwXbpAfj58+GG6K5EkSTUZjKVW5nAK +SZIyk8FYamUGY0mSMlOjgnEI4YQQwnshhEUhhMtr2T8lhPB21fJKCOEfUl+q1D4YjCVJykwNBuMQ +QhbwC2AcMAKYHEI4sEaz94EvxhgPBX4M3JHqQqX2orDQYCxJUiZqTI/xKGBxjHFZjHEbMAc4tXqD +GOOrMcaPqlZfBQpTW6bUfthjLElSZmpMMC4EPqi2voL6g+83gKebU5TUnhmMJUnKTJ1TebAQwnHA +NOCYVB5Xak8MxpIkZabGBOMyYHC19aKqbbsJIRwC3A6cEGMsr+tgJSUlydejR49m9OjRjSxVah8K +C6GsDHbuhCyfCyNJUkqUlpZSWlrarGM0OPNdCKETsBAYA6wEXgcmxxjfrdZmMDAXmBpjfLWeYznz +nQQUFMBf/wr9+qW7EkmS2qe9mfmuwR7jGOOOEMJ3gOdIjEm+M8b4bgjhgsTueDtwJdAXuCWEEIBt +McZRTf8IUsewaziFwViSpMzRYI9xSk9mj7EEwMknw/nnwymnpLsSSZLap73pMXaEo5QG3oAnSVLm +MRhLaWAwliQp8xiMpTQwGEuSlHkMxlIaGIwlSco8BmMpDYqKEs8yliRJmcNgLKVBYWGix9iHtEiS +lDkMxlIa9OoF2dmwYUO6K5EkSbsYjKU0cZyxJEmZxWAspYnBWJKkzGIwltLEYCxJUmYxGEtpYjCW +JCmzGIylNDEYS5KUWQzGUpoYjCVJyiwGYylNDMaSJGUWg7GUJrsm+ZAkSZnBYCylSZ8+sH07VFSk +uxJJkgQGYyltQkgMpygrS3clkiQJDMZSWjnOWJKkzGEwltLIYCxJUuYwGEtpZDCWJClzGIylNDIY +S5KUOQzGUhoZjCVJyhwGYymNDMaSJGUOg7GURgZjSZIyh8FYSqOCAvjkE9i0Kd2VSJIkg7GURiEk +poZ2kg9JktLPYCylmbPfSZKUGQzGUpo5zliSpMxgMJbSzGAsSVJmMBhLaWYwliQpM3ROdwFSR1dU +BHPnprsKZaqtW2HDBujUac8lK+vTnyGku9LmixF27EgsO3d++rrmsm1bYtm+/dPXNdcb83pv3rN9 +e2KJ8dOaq9ef7tfpOB8kvn+7voO7XqdiW2PbVq+r+s/atjVm3968vyF1/Tda33+7e7Mv04+X6QzG +UprZYyxIBOCFC2HBgsTy178mfi5fDr167R4Ma4bGGHcPyXUF6L3ZVn17Y0JrXfsas73656irns6d +E0t2dmKp63V9++p63aNH49rtqm2X6gEgE1639vlqhsTqS2O3Nff91cNyzZ97u29v3l+XusJzfaF6 +b/Zl+vFa23PPNf09BmMpzQzGHUtlZSIA7wq+u5Zly2DYMDj4YBgxAiZNSvzcbz/o0qX+Y+7qaa0t +fDZnW/XtO3c2HFobG7Dbe8+3pLYrxFaM9SGE2Jrnk9qCHTuge3fYuLHhAKS2o7ISFi/ePfwuWAB/ ++xsMHZoIvdWX/ff3z1+SUimEQIyxSX/dNhhLGWDIEHjppURgUtuybVvtAfj99xN/rtXD78EHwwEH +QNeu6a5aktq/vQnGDqWQMsCu4RQG48y1bRssWVJ7AC4u/nQIxFe+Av/+74kA3K1buquWJDWFwVjK +AIWFjjPOJNu3wxtvwAsvwP/+byIAL1mS+HPa1ft76qlwxRWJANy9e7orliSlgsFYygDegJd+H3wA +zz6bWObOTfyZHH88/L//B5ddBgcemHhqgSSp/TIYSxmgqCjxWC61ns2b4fe//zQMf/gh/NM/JYLw +jTfCoEHprlCS1NoMxlIGKCqCP/4x3VW0bzHCu+8mQvAzzySu96GHwrhxcNdd8I//mHhkmCSp4zIY +SxnAoRQto7wcfve7T3uFs7ISQfiCC+Chh6BPn3RXKEnKJAZjKQMYjFNjx47ETXPPPJMIwgsWwDHH +JMLwJZckbpRzAglJUl18jrGUAbZtg549YdOmxNSzarwVK3a/aW7QIDjhhEQYPuYYH5kmSR2VE3xI +bdigQfD664neY9Vt82Z4+eVPe4VXrUo8PWLcOBg7NvFINUmSnOBDasN2DacwGO+u+k1zzz4Lf/gD +HHKIN81JklLPYCxliI48zjhG2LIlMZTkk08Sy4IFn4bhEBJB+LzzYM4cb5qTJLUMg7GUITI5GMcI +lZWJwFo9vDZ3fdfrTZsgOzsxzrpHj8TPYcMSYfi7301MruFNc5KklmYwljJEcTHceiuUlibWY0ws +1V83tJ6Ktjt3Jsbx1gyvWVm7B9ddS13rubmJcdONad+jhzcdSpLSz5vvpAyxfn0iFIfw6QK1v07V +vtraZmVB9+57Btns7Bb9+JIkpZRPpZAkSZJoI0+lCA4UlCRJUgZq9WBsj7EkSZJa2t50xma1QB2S +JElSm2MwliRJkjAYS5IkSYDBWJIkSQIMxpIkSRJgMJYkSZKARgbjEMIJIYT3QgiLQgiX19HmphDC +4hDCvBDCYaktU5IkSWpZDQbjEEIW8AtgHDACmBxCOLBGmxOB4THG/YALgNtaoFapRZWWlqa7BKlW +fjeVqfxuqr1pTI/xKGBxjHFZjHEbMAc4tUabU4F7AWKMrwG5IYT+Ka1UamH+glem8rupTOV3U+1N +Y4JxIfBBtfUVVdvqa1NWSxtJkiQpY3nznSRJkgSEGGP9DUL4PFASYzyhan0GEGOM11RrcxvwYozx +oar194AvxRg/rHGs+k8mSZIkpUiMMTSlfedGtHkD+EwIYQiwEpgETK7R5jHgQuChqiC9oWYo3pvi +JEmSpNbSYDCOMe4IIXwHeI7E0Is7Y4zvhhAuSOyOt8cYnwohnBRCWAJ8Akxr2bIlSZKk1GpwKIUk +SZLUEbTazXeNmSRESocQwv+FEN4OIbwVQng93fWoYwsh3BlC+DCEML/atrwQwnMhhIUhhGdDCLnp +rFEdUx3fzR+EEFaEEP5StZyQzhrVMYUQikIIL4QQFoQQ/jeE8K9V25v8u7NVgnFjJgmR0mgnMDrG +eHiMcVS6i1GHdxeJ35XVzQB+F2M8AHgB+LdWr0qq/bsJ8NMY4xFVyzOtXZQEbAe+G2McARwFXFiV +M5v8u7O1eowbM0mIlC4BH12oDBFjfAUor7H5VOCeqtf3AKe1alESdX43IfE7VEqbGOOqGOO8qtcb +gXeBIvbid2drhYHGTBIipUsEng8hvBFCOC/dxUi16LfrST8xxlVAvzTXI1X3nRDCvBDCfzvMR+kW +QhgKHAa8CvRv6u9Oe8kkODrGeARwEol/fjkm3QVJDfCuaWWKW4B9Y4yHAauAn6a5HnVgIYQc4FHg +oqqe45q/Kxv83dlawbgMGFxtvahqm5R2McaVVT/XAL8hMfRHyiQfhhD6A4QQBgCr01yPBCR+b8ZP +H291BzAynfWo4wohdCYRiu+LMf62anOTf3e2VjBOThISQuhCYpKQx1rp3FKdQgg9qv6GSQihJzAW +eCe9VUkEdh+3+Rjw9arXXwN+W/MNUivZ7btZFTZ2OR1/fyp9fgn8NcZ4Y7VtTf7d2WrPMa56hMuN +fDpJyH+2yomleoQQhpHoJY4kJrx5wO+m0imEMBsYDeQDHwI/AP4HeAQoBpYBE2OMG9JVozqmOr6b +x5EYz7kT+D/ggtpmvpVaUgjhaOD3wP+S+P95BK4AXgcepgm/O53gQ5IkScKb7yRJkiTAYCxJkiQB +BmNJkiQJMBhLkiRJgMFYkiRJAgzGkiRJEmAwliRJkgCDsSRJkgTA/wdTBj79QR260QAAAABJRU5E +rkJggg== +" +> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>Nothing. It is interesting to note that the graphs are almost exactly the same: This would imply again that the variables we removed earlier (all the binary classifiers) indeed have almost no predictive power. It seems this problem is high-dimensional, but with almost no data that can actually inform our decisions.</p> +<h1 id="Summary-for-Day-1">Summary for Day 1<a class="anchor-link" href="#Summary-for-Day-1">&#182;</a></h1><p>After spending a couple hours with this dataset, there seems to be a fundamental issue in play: We have very high-dimensional data, and it has no bearing on our ability to actually predict customer satisfaction. This can be a huge issue: it implies that <strong>no matter what model we use, we fundamentally can't perform well.</strong> I'm sure most of this is because I'm not an experienced data scientist. Even so, we have yet to develop a strategy that can actually beat out the village idiot; <strong>so far, the bank is best off just assuming all its customers are satisfied.</strong> Hopefully more to come soon.</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[9]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">end</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> +<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Running time: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">end</span> <span class="o">-</span> <span class="n">start</span><span class="p">))</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt"></div> +<div class="output_subarea output_stream output_stdout output_text"> +<pre>Running time: 0:00:58.715714 +</pre> +</div> +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<h1 id="Appendix">Appendix<a class="anchor-link" href="#Appendix">&#182;</a></h1><p>Code used to split the initial training data:</p> +<div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">sklearn.cross_validation</span> <span class="kn">import</span> <span class="n">train_test_split</span> +<span class="n">data</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">&#39;train.csv&#39;</span><span class="p">)</span> +<span class="n">data</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">ID</span> + +<span class="n">data_train</span><span class="p">,</span> <span class="n">data_validate</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span> + <span class="n">data</span><span class="p">,</span> <span class="n">train_size</span><span class="o">=.</span><span class="mi">7</span><span class="p">)</span> + +<span class="n">data_train</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;split_train.csv&#39;</span><span class="p">)</span> +<span class="n">data_validate</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;split_validate.csv&#39;</span><span class="p">)</span> +</pre></div> + +</div> +</div> +</div></p> +<script type="text/x-mathjax-config"> +# MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); +MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$']]}}); +</script> + +<script async src='https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML'></script>Profitability using the Investment Formula2016-02-26T00:00:00-05:00Bradlee Speicetag:bspeice.github.io,2016-02-26:profitability-using-the-investment-formula.html<p> <div class="cell border-box-sizing text_cell rendered"> <div class="prompt input_prompt"> </div> @@ -31,7 +953,7 @@ <div class="prompt input_prompt">In&nbsp;[19]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> <span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> <span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span> <span class="kn">from</span> <span class="nn">Quandl</span> <span class="k">import</span> <span class="n">get</span> <span class="k">as</span> <span class="n">qget</span> @@ -130,7 +1052,7 @@ <div class="prompt input_prompt">In&nbsp;[7]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">fang_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s2">&quot;YAHOO/FB&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/AAPL&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/NFLX&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/GOOG&quot;</span><span class="p">])</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">fang_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s2">&quot;YAHOO/FB&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/AAPL&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/NFLX&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/GOOG&quot;</span><span class="p">])</span> </pre></div> </div> @@ -143,7 +1065,7 @@ <div class="prompt input_prompt">In&nbsp;[8]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">fang_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">fang_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">8</span><span class="p">);</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Distribution of Days Until Profitability&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -617,7 +1539,7 @@ kiSpNScYJEmSJElSa04wSJIkSZKk1pxgkCRJkiRJrf1/uiw9D8O0d6EAAAAASUVORK5CYII= <div class="prompt input_prompt">In&nbsp;[10]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">fang_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">fang_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Profitability score over time&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -2256,7 +3178,7 @@ EZIkSZIkSZIkSZIk6RIpIiRJkiRJkiRJkiRJ0iX+D91jLOlExGw8AAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[13]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">cyclic_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s2">&quot;YAHOO/X&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/CAT&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/NFLX&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/GOOG&quot;</span><span class="p">])</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cyclic_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s2">&quot;YAHOO/X&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/CAT&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/NFLX&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/GOOG&quot;</span><span class="p">])</span> </pre></div> </div> @@ -2269,7 +3191,7 @@ EZIkSZIkSZIkSZIk6RIpIiRJkiRJkiRJkiRJ0iX+D91jLOlExGw8AAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[14]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">cyclic_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cyclic_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">8</span><span class="p">);</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Distribution of Days Until Profitability&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -2757,7 +3679,7 @@ JEmSJEnSwGwwSJIkSZKkgdlgkCRJkiRJA7PBIEmSJEmSBvb/AfKAew4+oiojAAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[15]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">cyclic_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cyclic_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Profitability score over time&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -4542,7 +5464,7 @@ hUKhUCgUpor/A9AnO0PoIItTAAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[21]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">biotech_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s1">&#39;YAHOO/REGN&#39;</span><span class="p">,</span> <span class="s1">&#39;YAHOO/CELG&#39;</span><span class="p">,</span> <span class="s1">&#39;GOOG/NASDAQ_BIB&#39;</span><span class="p">,</span> <span class="s1">&#39;GOOG/NASDAQ_IBB&#39;</span><span class="p">])</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">biotech_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s1">&#39;YAHOO/REGN&#39;</span><span class="p">,</span> <span class="s1">&#39;YAHOO/CELG&#39;</span><span class="p">,</span> <span class="s1">&#39;GOOG/NASDAQ_BIB&#39;</span><span class="p">,</span> <span class="s1">&#39;GOOG/NASDAQ_IBB&#39;</span><span class="p">])</span> </pre></div> </div> @@ -4555,7 +5477,7 @@ hUKhUCgUpor/A9AnO0PoIItTAAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[22]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">biotech_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">biotech_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">8</span><span class="p">);</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Distribution of Days Until Profitability&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -5108,7 +6030,7 @@ WKBTQVcCAAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[23]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">biotech_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">biotech_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Profitability score over time&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -6879,7 +7801,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}}); <div class="inner_cell"> <div class="text_cell_render border-box-sizing rendered_html"> <h3 id="If-you-can-see-into-the-future,-that-is.">If you can see into the future, that is.<a class="anchor-link" href="#If-you-can-see-into-the-future,-that-is.">&#182;</a></h3><p>My previous class in Stochastic Calculus covered a lot of interesting topics, and the important one for today -is the <a href="https://en.wikipedia.org/wiki/Gambler's_ruin">Gambler's Ruin</a> problem. If you're interested in some of the theory behind it, also make sure to check out +is the <a href="https://en.wikipedia.org/wiki/Gambler&#39;s_ruin">Gambler's Ruin</a> problem. If you're interested in some of the theory behind it, also make sure to check out <a href="https://en.wikipedia.org/wiki/Random_walk">random walks</a>. The important bit is that we studied the <a href="https://en.wikipedia.org/wiki/Martingale_%28betting_system%29">Martingale Betting Strategy</a>, which describes for us a <strong>guaranteed way</strong> to <span style='font-size: x-small'>eventually</span> make money.</p> <p>The strategy goes like this: You are going to toss a fair coin with a friend. If you guess heads or tails correctly, you get back double the money you bet. If you guess incorrectly, you lose money. How should you bet?</p> @@ -6939,7 +7861,7 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i}) <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="k">using</span> <span class="n">Quandl</span> +<div class=" highlight hl-julia"><pre><span></span><span class="k">using</span> <span class="n">Quandl</span> <span class="n">api_key</span> <span class="o">=</span> <span class="s">&quot;&quot;</span> <span class="n">daily_investment</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">current_open</span><span class="p">,</span> <span class="n">current_close</span><span class="p">,</span> <span class="n">purchase_history</span><span class="p">,</span> <span class="n">open_history</span><span class="p">)</span> <span class="c"># We&#39;re not going to safeguard against divide by 0 - that&#39;s the user&#39;s responsibility</span> @@ -6969,7 +7891,7 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i}) <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">is_profitable</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">current_price</span><span class="p">,</span> <span class="n">purchase_history</span><span class="p">,</span> <span class="n">open_history</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">is_profitable</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">current_price</span><span class="p">,</span> <span class="n">purchase_history</span><span class="p">,</span> <span class="n">open_history</span><span class="p">)</span> <span class="n">shares</span> <span class="o">=</span> <span class="n">sum</span><span class="p">(</span><span class="n">purchase_history</span> <span class="o">./</span> <span class="n">open_history</span><span class="p">)</span> <span class="k">return</span> <span class="n">current_price</span><span class="o">*</span><span class="n">shares</span> <span class="o">&gt;</span> <span class="n">sum</span><span class="p">(</span><span class="n">purchase_history</span><span class="p">)</span> <span class="k">end</span> @@ -7004,8 +7926,8 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i}) <span class="n">leverages</span> <span class="o">=</span> <span class="p">[</span><span class="n">sum</span><span class="p">(</span><span class="n">investments</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="n">i</span><span class="p">])</span> <span class="k">for</span> <span class="n">i</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">length</span><span class="p">(</span><span class="n">investments</span><span class="p">)]</span> <span class="n">max_leverage</span> <span class="o">=</span> <span class="n">maximum</span><span class="p">(</span><span class="n">leverages</span><span class="p">)</span> <span class="o">/</span> <span class="n">investments</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="n">println</span><span class="p">(</span><span class="s">&quot;Max leverage: </span><span class="si">$(max_leverage)</span><span class="s">&quot;</span><span class="p">)</span> - <span class="n">println</span><span class="p">(</span><span class="s">&quot;Days invested: </span><span class="si">$</span><span class="s">(length(investments))&quot;</span><span class="p">)</span> - <span class="n">println</span><span class="p">(</span><span class="s">&quot;Profit: </span><span class="si">$</span><span class="s">profit&quot;</span><span class="p">)</span> + <span class="n">println</span><span class="p">(</span><span class="s">&quot;Days invested: </span><span class="si">$(length(investments))</span><span class="s">&quot;</span><span class="p">)</span> + <span class="n">println</span><span class="p">(</span><span class="s">&quot;Profit: </span><span class="si">$profit</span><span class="s">&quot;</span><span class="p">)</span> <span class="k">end</span><span class="p">;</span> </pre></div> @@ -7032,7 +7954,7 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i}) <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span> <span class="o">=</span> <span class="n">simulate</span><span class="p">(</span><span class="s">&quot;YAHOO/LMT&quot;</span><span class="p">,</span> <span class="n">Date</span><span class="p">(</span><span class="mi">2015</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">29</span><span class="p">),</span> <span class="mi">100</span><span class="p">,</span> <span class="mf">1.01</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span> <span class="o">=</span> <span class="n">simulate</span><span class="p">(</span><span class="s">&quot;YAHOO/LMT&quot;</span><span class="p">,</span> <span class="n">Date</span><span class="p">(</span><span class="mi">2015</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">29</span><span class="p">),</span> <span class="mi">100</span><span class="p">,</span> <span class="mf">1.01</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span> <span class="n">sim_summary</span><span class="p">(</span><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span><span class="p">)</span> </pre></div> @@ -7075,7 +7997,7 @@ Profit: 0.6894803101560001 <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span> <span class="o">=</span> <span class="n">simulate</span><span class="p">(</span><span class="s">&quot;YAHOO/LMT&quot;</span><span class="p">,</span> <span class="n">Date</span><span class="p">(</span><span class="mi">2015</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">29</span><span class="p">),</span> <span class="mi">100</span><span class="p">,</span> <span class="mf">1.02</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span> <span class="o">=</span> <span class="n">simulate</span><span class="p">(</span><span class="s">&quot;YAHOO/LMT&quot;</span><span class="p">,</span> <span class="n">Date</span><span class="p">(</span><span class="mi">2015</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">29</span><span class="p">),</span> <span class="mi">100</span><span class="p">,</span> <span class="mf">1.02</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span> <span class="n">sim_summary</span><span class="p">(</span><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span><span class="p">)</span> </pre></div> @@ -7141,7 +8063,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">pickle</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">pickle</span> <span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> <span class="kn">from</span> <span class="nn">bokeh.plotting</span> <span class="k">import</span> <span class="n">output_notebook</span><span class="p">,</span> <span class="n">figure</span><span class="p">,</span> <span class="n">show</span> @@ -7408,7 +8330,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">city_forecasts</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s1">&#39;city_forecasts.p&#39;</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">))</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">city_forecasts</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s1">&#39;city_forecasts.p&#39;</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">))</span> <span class="n">forecasts_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">city_forecasts</span><span class="p">)</span> </pre></div> @@ -7422,7 +8344,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">cities</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;binghamton&#39;</span><span class="p">,</span> <span class="s1">&#39;cary&#39;</span><span class="p">,</span> <span class="s1">&#39;nyc&#39;</span><span class="p">,</span> <span class="s1">&#39;seattle&#39;</span><span class="p">]</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cities</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;binghamton&#39;</span><span class="p">,</span> <span class="s1">&#39;cary&#39;</span><span class="p">,</span> <span class="s1">&#39;nyc&#39;</span><span class="p">,</span> <span class="s1">&#39;seattle&#39;</span><span class="p">]</span> <span class="n">city_colors</span> <span class="o">=</span> <span class="p">{</span><span class="n">cities</span><span class="p">[</span><span class="n">i</span><span class="p">]:</span> <span class="n">Palette</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">4</span><span class="p">)}</span> <span class="k">def</span> <span class="nf">safe_cover</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> @@ -7453,7 +8375,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">years</span> <span class="o">=</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1990</span><span class="p">,</span> <span class="mi">2016</span><span class="p">)</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">years</span> <span class="o">=</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1990</span><span class="p">,</span> <span class="mi">2016</span><span class="p">)</span> <span class="k">def</span> <span class="nf">city_avg_cc</span><span class="p">(</span><span class="n">city</span><span class="p">,</span> <span class="n">month</span><span class="p">):</span> <span class="k">return</span> <span class="p">[</span><span class="n">monthly_avg_cloudcover</span><span class="p">(</span><span class="n">city</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">month</span><span class="p">)</span> <span class="k">for</span> <span class="n">y</span> <span class="ow">in</span> <span class="n">years</span><span class="p">]</span> @@ -7467,7 +8389,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <span class="k">for</span> <span class="n">month</span><span class="p">,</span> <span class="n">month_id</span> <span class="ow">in</span> <span class="n">months</span><span class="p">:</span> <span class="n">month_averages</span> <span class="o">=</span> <span class="p">{</span><span class="n">city</span><span class="p">:</span> <span class="n">city_avg_cc</span><span class="p">(</span><span class="n">city</span><span class="p">,</span> <span class="n">month_id</span><span class="p">)</span> <span class="k">for</span> <span class="n">city</span> <span class="ow">in</span> <span class="n">cities</span><span class="p">}</span> - <span class="n">f</span> <span class="o">=</span> <span class="n">figure</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;{} Average Cloud Cover&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">month</span><span class="p">),</span> + <span class="n">f</span> <span class="o">=</span> <span class="n">figure</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> Average Cloud Cover&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">month</span><span class="p">),</span> <span class="n">x_axis_label</span><span class="o">=</span><span class="s1">&#39;Year&#39;</span><span class="p">,</span> <span class="n">y_axis_label</span><span class="o">=</span><span class="s1">&#39;Cloud Cover Percentage&#39;</span><span class="p">)</span> <span class="k">for</span> <span class="n">city</span> <span class="ow">in</span> <span class="n">cities</span><span class="p">:</span> @@ -7658,7 +8580,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[5]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">safe_precip</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">safe_precip</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> <span class="k">if</span> <span class="n">frame</span> <span class="ow">and</span> <span class="s1">&#39;precipProbability&#39;</span> <span class="ow">in</span> <span class="n">frame</span><span class="p">:</span> <span class="k">return</span> <span class="n">frame</span><span class="p">[</span><span class="s1">&#39;precipProbability&#39;</span><span class="p">]</span> <span class="k">else</span><span class="p">:</span> @@ -7680,7 +8602,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <span class="k">for</span> <span class="n">month</span><span class="p">,</span> <span class="n">month_id</span> <span class="ow">in</span> <span class="n">months</span><span class="p">:</span> <span class="n">month_averages</span> <span class="o">=</span> <span class="p">{</span><span class="n">city</span><span class="p">:</span> <span class="n">city_avg_cc</span><span class="p">(</span><span class="n">city</span><span class="p">,</span> <span class="n">month_id</span><span class="p">)</span> <span class="k">for</span> <span class="n">city</span> <span class="ow">in</span> <span class="n">cities</span><span class="p">}</span> - <span class="n">f</span> <span class="o">=</span> <span class="n">figure</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;{} Average Precipitation Chance&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">month</span><span class="p">),</span> + <span class="n">f</span> <span class="o">=</span> <span class="n">figure</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> Average Precipitation Chance&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">month</span><span class="p">),</span> <span class="n">x_axis_label</span><span class="o">=</span><span class="s1">&#39;Year&#39;</span><span class="p">,</span> <span class="n">y_axis_label</span><span class="o">=</span><span class="s1">&#39;Precipitation Chance Percentage&#39;</span><span class="p">)</span> <span class="k">for</span> <span class="n">city</span> <span class="ow">in</span> <span class="n">cities</span><span class="p">:</span> @@ -7877,7 +8799,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">from</span> <span class="nn">bokeh.plotting</span> <span class="k">import</span> <span class="n">figure</span><span class="p">,</span> <span class="n">output_notebook</span><span class="p">,</span> <span class="n">show</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">bokeh.plotting</span> <span class="k">import</span> <span class="n">figure</span><span class="p">,</span> <span class="n">output_notebook</span><span class="p">,</span> <span class="n">show</span> <span class="kn">from</span> <span class="nn">bokeh.palettes</span> <span class="k">import</span> <span class="n">PuBuGn9</span> <span class="k">as</span> <span class="n">Palette</span> <span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> @@ -8137,7 +9059,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">city_forecasts</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s1">&#39;city_forecasts.p&#39;</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">))</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">city_forecasts</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s1">&#39;city_forecasts.p&#39;</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">))</span> <span class="n">forecast_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">city_forecasts</span><span class="p">)</span> </pre></div> @@ -8151,7 +9073,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">cary_forecast</span> <span class="o">=</span> <span class="n">forecast_df</span><span class="p">[</span><span class="s1">&#39;cary&#39;</span><span class="p">]</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cary_forecast</span> <span class="o">=</span> <span class="n">forecast_df</span><span class="p">[</span><span class="s1">&#39;cary&#39;</span><span class="p">]</span> <span class="n">years</span> <span class="o">=</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1990</span><span class="p">,</span> <span class="mi">2016</span><span class="p">)</span> <span class="n">months</span> <span class="o">=</span> <span class="nb">range</span><span class="p">(</span><span class="mi">7</span><span class="p">,</span> <span class="mi">12</span><span class="p">)</span> <span class="n">months_str</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;July&#39;</span><span class="p">,</span> <span class="s1">&#39;August&#39;</span><span class="p">,</span> <span class="s1">&#39;September&#39;</span><span class="p">,</span> <span class="s1">&#39;October&#39;</span><span class="p">,</span> <span class="s1">&#39;November&#39;</span><span class="p">]</span> @@ -8239,7 +9161,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">monthly_cloudy_days</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">monthly_cloudy_days</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">):</span> <span class="n">dates</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">(</span><span class="n">start</span><span class="o">=</span><span class="n">datetime</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">12</span><span class="p">),</span> <span class="n">end</span><span class="o">=</span><span class="n">datetime</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">12</span><span class="p">),</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;D&#39;</span><span class="p">,</span> <span class="n">closed</span><span class="o">=</span><span class="s1">&#39;left&#39;</span><span class="p">)</span> @@ -8355,7 +9277,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[5]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">safe_precip</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">safe_precip</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> <span class="k">if</span> <span class="n">frame</span> <span class="ow">and</span> <span class="s1">&#39;precipProbability&#39;</span> <span class="ow">in</span> <span class="n">frame</span><span class="p">:</span> <span class="k">return</span> <span class="n">frame</span><span class="p">[</span><span class="s1">&#39;precipProbability&#39;</span><span class="p">]</span> <span class="k">else</span><span class="p">:</span> @@ -8436,7 +9358,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[6]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">monthly_rainy_days</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">monthly_rainy_days</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">):</span> <span class="n">dates</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">(</span><span class="n">start</span><span class="o">=</span><span class="n">datetime</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">12</span><span class="p">),</span> <span class="n">end</span><span class="o">=</span><span class="n">datetime</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">12</span><span class="p">),</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;D&#39;</span><span class="p">,</span> <span class="n">closed</span><span class="o">=</span><span class="s1">&#39;left&#39;</span><span class="p">)</span> @@ -8555,7 +9477,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="inner_cell"> <div class="text_cell_render border-box-sizing rendered_html"> <h1 id="Generating-the-Forecast-file">Generating the Forecast file<a class="anchor-link" href="#Generating-the-Forecast-file">&#182;</a></h1><p>The following code was generates the file that was used throughout the blog post. Please note that I'm retrieving data for other cities to use in a future blog post, only Cary data was used for this post.</p> -<div class="highlight"><pre><span class="kn">import</span> <span class="nn">pandas</span> <span class="kn">as</span> <span class="nn">pd</span> +<div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">pandas</span> <span class="kn">as</span> <span class="nn">pd</span> <span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="nb">reduce</span> <span class="kn">import</span> <span class="nn">requests</span> <span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span> @@ -8633,7 +9555,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">requests</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">requests</span> <span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> <span class="kn">from</span> <span class="nn">dateutil</span> <span class="k">import</span> <span class="n">parser</span> <span class="k">as</span> <span class="n">dtparser</span> @@ -8668,7 +9590,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">class</span> <span class="nc">ArticleListParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">class</span> <span class="nc">ArticleListParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span> <span class="sd">&quot;&quot;&quot;Given a web page with articles on it, parse out the article links&quot;&quot;&quot;</span> <span class="n">articles</span> <span class="o">=</span> <span class="p">[]</span> @@ -8681,7 +9603,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <span class="bp">self</span><span class="o">.</span><span class="n">articles</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">href</span><span class="p">)</span> <span class="n">base_url</span> <span class="o">=</span> <span class="s2">&quot;http://seekingalpha.com/author/wall-street-breakfast/articles&quot;</span> -<span class="n">article_page_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">base_url</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">base_url</span> <span class="o">+</span> <span class="s1">&#39;/{}&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">20</span><span class="p">)]</span> +<span class="n">article_page_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">base_url</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">base_url</span> <span class="o">+</span> <span class="s1">&#39;/</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">20</span><span class="p">)]</span> <span class="n">global_articles</span> <span class="o">=</span> <span class="p">[]</span> <span class="k">for</span> <span class="n">page</span> <span class="ow">in</span> <span class="n">article_page_urls</span><span class="p">:</span> @@ -8703,7 +9625,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">class</span> <span class="nc">ArticleReturnParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">class</span> <span class="nc">ArticleReturnParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span> <span class="s2">&quot;Given an article, parse out the futures returns in it&quot;</span> <span class="n">record_font_tags</span> <span class="o">=</span> <span class="kc">False</span> @@ -8790,7 +9712,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="c1"># article_df is sorted by date, so we get the first row.</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># article_df is sorted by date, so we get the first row.</span> <span class="n">start_date</span> <span class="o">=</span> <span class="n">article_df</span><span class="o">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s1">&#39;date&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">&#39;date&#39;</span><span class="p">]</span> <span class="o">-</span> <span class="n">relativedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="n">SPY</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;GOOG/NYSE_SPY&quot;</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span> <span class="n">DJIA</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;GOOG/AMS_DIA&quot;</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span> @@ -8819,7 +9741,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[5]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">calculate_opening_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">calculate_opening_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> <span class="c1"># I&#39;m not a huge fan of the appending for loop,</span> <span class="c1"># but it&#39;s a bit verbose for a comprehension</span> <span class="n">data</span> <span class="o">=</span> <span class="p">{}</span> @@ -8932,7 +9854,7 @@ S&amp;P 0.604478 0.597015 0.811808 0.848708 <div class="prompt input_prompt">In&nbsp;[6]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">calculate_closing_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">calculate_closing_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> <span class="c1"># I&#39;m not a huge fan of the appending for loop,</span> <span class="c1"># but it&#39;s a bit verbose for a comprehension</span> <span class="n">data</span> <span class="o">=</span> <span class="p">{}</span> @@ -9059,7 +9981,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="k">using</span> <span class="n">Gadfly</span> +<div class=" highlight hl-julia"><pre><span></span><span class="k">using</span> <span class="n">Gadfly</span> </pre></div> </div> @@ -9095,7 +10017,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">S0</span> <span class="o">=</span> <span class="mf">102.2</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">S0</span> <span class="o">=</span> <span class="mf">102.2</span> <span class="n">nominal</span> <span class="o">=</span> <span class="mi">100</span> <span class="n">q</span> <span class="o">=</span> <span class="mf">2.84</span> <span class="o">/</span> <span class="mi">100</span> <span class="n">σ</span> <span class="o">=</span> <span class="mf">15.37</span> <span class="o">/</span> <span class="mi">100</span> @@ -9151,7 +10073,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">simulate_gbm</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">S0</span><span class="p">,</span> <span class="n">μ</span><span class="p">,</span> <span class="n">σ</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">simulate_gbm</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">S0</span><span class="p">,</span> <span class="n">μ</span><span class="p">,</span> <span class="n">σ</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span> <span class="c"># Set the initial state</span> <span class="n">m</span> <span class="o">=</span> <span class="n">length</span><span class="p">(</span><span class="n">S0</span><span class="p">)</span> <span class="n">t</span> <span class="o">=</span> <span class="n">T</span> <span class="o">/</span> <span class="n">n</span> @@ -9222,7 +10144,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">initial</span> <span class="o">=</span> <span class="n">ones</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span> <span class="o">*</span> <span class="n">S0</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">initial</span> <span class="o">=</span> <span class="n">ones</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span> <span class="o">*</span> <span class="n">S0</span> <span class="c"># Using μ=0, T=.25 for now, we&#39;ll use the proper values later</span> <span class="n">motion</span> <span class="o">=</span> <span class="n">simulate_gbm</span><span class="p">(</span><span class="n">initial</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">σ</span><span class="p">,</span> <span class="o">.</span><span class="mi">25</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span> @@ -10830,7 +11752,7 @@ fig.select("#fig-3a6dd25ad25c4037a166889ee51bb151-element-20") <div class="prompt input_prompt">In&nbsp;[5]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">forward_term</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">yearly_term</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">forward_term</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">yearly_term</span><span class="p">)</span> <span class="c"># It is assumed that we have a yearly term structure passed in, and starts at year 0</span> <span class="c"># This implies a nominal rate above 0 for the first year!</span> <span class="n">years</span> <span class="o">=</span> <span class="n">length</span><span class="p">(</span><span class="n">term</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span> <span class="c"># because we start at 0</span> @@ -10860,14 +11782,14 @@ fig.select("#fig-3a6dd25ad25c4037a166889ee51bb151-element-20") <div class="prompt input_prompt">In&nbsp;[6]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="c"># Example term structure taken from:</span> +<div class=" highlight hl-julia"><pre><span></span><span class="c"># Example term structure taken from:</span> <span class="c"># http://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yield</span> <span class="c"># Linear interpolation used years in-between periods, assuming real-dollar</span> <span class="c"># interest rates</span> <span class="n">forward_yield</span> <span class="o">=</span> <span class="n">forward_term</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> <span class="n">calculated_term2</span> <span class="o">=</span> <span class="n">term</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">*</span> <span class="n">forward_yield</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> -<span class="n">println</span><span class="p">(</span><span class="s">&quot;Actual term[2]: </span><span class="si">$</span><span class="s">(term[2]); Calculated term[2]: </span><span class="si">$(calculated_term2)</span><span class="s">&quot;</span><span class="p">)</span> +<span class="n">println</span><span class="p">(</span><span class="s">&quot;Actual term[2]: </span><span class="si">$(term[2])</span><span class="s">; Calculated term[2]: </span><span class="si">$(calculated_term2)</span><span class="s">&quot;</span><span class="p">)</span> </pre></div> </div> @@ -10904,7 +11826,7 @@ fig.select("#fig-3a6dd25ad25c4037a166889ee51bb151-element-20") <div class="prompt input_prompt">In&nbsp;[7]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">full_motion</span> <span class="o">=</span> <span class="n">ones</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span> <span class="o">*</span> <span class="n">S0</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">full_motion</span> <span class="o">=</span> <span class="n">ones</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span> <span class="o">*</span> <span class="n">S0</span> <span class="n">full_term</span> <span class="o">=</span> <span class="n">vcat</span><span class="p">(</span><span class="n">term</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">forward_yield</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">T</span> <span class="n">μ</span> <span class="o">=</span> <span class="p">(</span><span class="n">full_term</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span> <span class="o">-</span> <span class="n">q</span><span class="p">)</span> @@ -12465,7 +13387,7 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20") <div class="prompt input_prompt">In&nbsp;[8]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">full_simulation</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">S0</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">,</span> <span class="n">m</span><span class="p">,</span> <span class="n">term</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">full_simulation</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">S0</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">,</span> <span class="n">m</span><span class="p">,</span> <span class="n">term</span><span class="p">)</span> <span class="n">forward</span> <span class="o">=</span> <span class="n">vcat</span><span class="p">(</span><span class="n">term</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">forward_term</span><span class="p">(</span><span class="n">term</span><span class="p">))</span> <span class="c"># And an S0 to kick things off.</span> @@ -12481,7 +13403,7 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20") <span class="n">tic</span><span class="p">()</span> <span class="n">full_simulation</span><span class="p">(</span><span class="n">S0</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">,</span> <span class="n">m</span><span class="p">,</span> <span class="n">term</span><span class="p">)</span> <span class="n">time</span> <span class="o">=</span> <span class="n">toq</span><span class="p">()</span> -<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Time to run simulation: %.2fs&quot;</span><span class="p">,</span> <span class="n">time</span><span class="p">)</span> +<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Time to run simulation: </span><span class="si">%.2f</span><span class="s">s&quot;</span><span class="p">,</span> <span class="n">time</span><span class="p">)</span> </pre></div> </div> @@ -12526,7 +13448,7 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20") <div class="prompt input_prompt">In&nbsp;[9]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="n">strike</span> <span class="o">=</span> <span class="n">S0</span> <span class="n">protection_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="o">*</span> <span class="o">.</span><span class="mi">6</span> <span class="n">coupon</span> <span class="o">=</span> <span class="n">nominal</span> <span class="o">*</span> <span class="o">.</span><span class="mi">07</span> @@ -12570,13 +13492,13 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20") <span class="n">tic</span><span class="p">()</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">athena</span><span class="p">()</span> <span class="n">time</span> <span class="o">=</span> <span class="n">toq</span><span class="p">()</span> - <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation %i: \</span><span class="si">$</span><span class="s">%.4f; Simulation time: %.2fs</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">time</span><span class="p">)</span> + <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation </span><span class="si">%i</span><span class="s">: \$</span><span class="si">%.4f</span><span class="s">; Simulation time: </span><span class="si">%.2f</span><span class="s">s</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">time</span><span class="p">)</span> <span class="k">end</span> <span class="n">final_mean</span> <span class="o">=</span> <span class="n">mean</span><span class="p">(</span><span class="n">mean_payoffs</span><span class="p">)</span> -<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$</span><span class="s">num_simulations simulations: </span><span class="si">$</span><span class="s">(mean(mean_payoffs))&quot;</span><span class="p">)</span> +<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$num_simulations</span><span class="s"> simulations: </span><span class="si">$(mean(mean_payoffs))</span><span class="s">&quot;</span><span class="p">)</span> <span class="n">pv</span> <span class="o">=</span> <span class="n">final_mean</span> <span class="o">*</span> <span class="p">(</span><span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="p">(</span><span class="n">prod</span><span class="p">(</span><span class="n">forward_structure</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">T</span><span class="p">))</span> -<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Athena note: \</span><span class="si">$</span><span class="s">%.2f, notional: \</span><span class="si">$</span><span class="s">%.2f&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">,</span> <span class="n">nominal</span><span class="p">)</span> +<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Athena note: \$</span><span class="si">%.2f</span><span class="s">, notional: \$</span><span class="si">%.2f</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">,</span> <span class="n">nominal</span><span class="p">)</span> </pre></div> </div> @@ -12637,7 +13559,7 @@ Present value of Athena note: $95.00, notional: $100.00</pre> <div class="prompt input_prompt">In&nbsp;[10]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="n">coupon_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="o">*</span> <span class="o">.</span><span class="mi">8</span> <span class="n">protection_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="o">*</span> <span class="o">.</span><span class="mi">6</span> <span class="n">coupon</span> <span class="o">=</span> <span class="n">nominal</span> <span class="o">*</span> <span class="o">.</span><span class="mi">06</span> @@ -12684,13 +13606,13 @@ Present value of Athena note: $95.00, notional: $100.00</pre> <span class="n">tic</span><span class="p">()</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">phoenix_no_memory</span><span class="p">()</span> <span class="n">time</span> <span class="o">=</span> <span class="n">toq</span><span class="p">()</span> - <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation %i: \</span><span class="si">$</span><span class="s">%.4f; Simulation time: %.2fs</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">time</span><span class="p">)</span> + <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation </span><span class="si">%i</span><span class="s">: \$</span><span class="si">%.4f</span><span class="s">; Simulation time: </span><span class="si">%.2f</span><span class="s">s</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">time</span><span class="p">)</span> <span class="k">end</span> <span class="n">final_mean</span> <span class="o">=</span> <span class="n">mean</span><span class="p">(</span><span class="n">mean_payoffs</span><span class="p">)</span> -<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$</span><span class="s">num_simulations simulations: </span><span class="si">$</span><span class="s">(mean(mean_payoffs))&quot;</span><span class="p">)</span> +<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$num_simulations</span><span class="s"> simulations: </span><span class="si">$(mean(mean_payoffs))</span><span class="s">&quot;</span><span class="p">)</span> <span class="n">pv</span> <span class="o">=</span> <span class="n">final_mean</span> <span class="o">*</span> <span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="p">(</span><span class="n">prod</span><span class="p">(</span><span class="n">forward_structure</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="p">(</span><span class="n">T</span><span class="p">))</span> -<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Phoenix without memory note: \</span><span class="si">$</span><span class="s">%.2f&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">)</span> +<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Phoenix without memory note: \$</span><span class="si">%.2f</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">)</span> </pre></div> </div> @@ -12737,7 +13659,7 @@ Present value of Phoenix without memory note: $97.44</pre> <div class="prompt input_prompt">In&nbsp;[11]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="n">coupon_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="o">*</span> <span class="o">.</span><span class="mi">8</span> <span class="n">protection_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="o">*</span> <span class="o">.</span><span class="mi">6</span> <span class="n">coupon</span> <span class="o">=</span> <span class="n">nominal</span> <span class="o">*</span> <span class="o">.</span><span class="mi">07</span> @@ -12791,14 +13713,14 @@ Present value of Phoenix without memory note: $97.44</pre> <span class="n">tic</span><span class="p">()</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">phoenix_with_memory</span><span class="p">()</span> <span class="n">time</span> <span class="o">=</span> <span class="n">toq</span><span class="p">()</span> - <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation %i: \</span><span class="si">$</span><span class="s">%.4f; Simulation time: %.2fs</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> + <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation </span><span class="si">%i</span><span class="s">: \$</span><span class="si">%.4f</span><span class="s">; Simulation time: </span><span class="si">%.2f</span><span class="s">s</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">time</span><span class="p">)</span> <span class="k">end</span> <span class="n">final_mean</span> <span class="o">=</span> <span class="n">mean</span><span class="p">(</span><span class="n">mean_payoffs</span><span class="p">)</span> -<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$</span><span class="s">num_simulations simulations: </span><span class="si">$</span><span class="s">(mean(mean_payoffs))&quot;</span><span class="p">)</span> +<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$num_simulations</span><span class="s"> simulations: </span><span class="si">$(mean(mean_payoffs))</span><span class="s">&quot;</span><span class="p">)</span> <span class="n">pv</span> <span class="o">=</span> <span class="n">final_mean</span> <span class="o">*</span> <span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="p">(</span><span class="n">prod</span><span class="p">(</span><span class="n">forward_structure</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="p">(</span><span class="n">T</span><span class="p">))</span> -<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Phoenix with memory note: \</span><span class="si">$</span><span class="s">%.2f&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">)</span> +<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Phoenix with memory note: \$</span><span class="si">%.2f</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">)</span> </pre></div> </div> @@ -12900,7 +13822,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">from</span> <span class="nn">IPython.display</span> <span class="k">import</span> <span class="n">display</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">IPython.display</span> <span class="k">import</span> <span class="n">display</span> <span class="kn">import</span> <span class="nn">Quandl</span> <span class="kn">from</span> <span class="nn">datetime</span> <span class="k">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span> @@ -12909,7 +13831,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar <span class="n">lookback</span> <span class="o">=</span> <span class="mi">30</span> <span class="n">d_col</span> <span class="o">=</span> <span class="s1">&#39;Close&#39;</span> -<span class="n">data</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;YAHOO/{}&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">tick</span><span class="p">))[</span><span class="o">-</span><span class="n">lookback</span><span class="p">:]</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> +<span class="n">data</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;YAHOO/</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">tick</span><span class="p">))[</span><span class="o">-</span><span class="n">lookback</span><span class="p">:]</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> <span class="n">market</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">market_ticker</span><span class="p">)</span> </pre></div> @@ -12933,7 +13855,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">returns</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">data</span><span class="p">[</span><span class="n">tick</span><span class="p">][</span><span class="n">d_col</span><span class="p">]</span><span class="o">.</span><span class="n">pct_change</span><span class="p">()</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">returns</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">data</span><span class="p">[</span><span class="n">tick</span><span class="p">][</span><span class="n">d_col</span><span class="p">]</span><span class="o">.</span><span class="n">pct_change</span><span class="p">()</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> <span class="n">display</span><span class="p">({</span><span class="n">tick</span><span class="p">:</span> <span class="n">returns</span><span class="p">[</span><span class="n">tick</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">})</span> </pre></div> @@ -12979,7 +13901,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">market_returns</span> <span class="o">=</span> <span class="n">market</span><span class="o">.</span><span class="n">pct_change</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">market_returns</span> <span class="o">=</span> <span class="n">market</span><span class="o">.</span><span class="n">pct_change</span><span class="p">()</span> <span class="n">sharpe</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">ret</span><span class="p">:</span> <span class="p">(</span><span class="n">ret</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span> <span class="o">-</span> <span class="n">market_returns</span><span class="p">[</span><span class="n">d_col</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">())</span> <span class="o">/</span> <span class="n">ret</span><span class="o">.</span><span class="n">std</span><span class="p">()</span> <span class="n">sharpes</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">sharpe</span><span class="p">(</span><span class="n">returns</span><span class="p">[</span><span class="n">tick</span><span class="p">])</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> @@ -13027,7 +13949,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">drawdown</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">ret</span><span class="p">:</span> <span class="n">ret</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">drawdown</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">ret</span><span class="p">:</span> <span class="n">ret</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> <span class="n">drawdowns</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">drawdown</span><span class="p">(</span><span class="n">returns</span><span class="p">[</span><span class="n">tick</span><span class="p">])</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> <span class="n">display</span><span class="p">(</span><span class="n">drawdowns</span><span class="p">)</span> @@ -13080,7 +14002,7 @@ s.t.\ \ & \vec{1} \omega = 1\\ <div class="prompt input_prompt">In&nbsp;[5]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> <span class="kn">from</span> <span class="nn">scipy.optimize</span> <span class="k">import</span> <span class="n">minimize</span> <span class="c1">#sharpe_limit = .1</span> @@ -13134,13 +14056,13 @@ s.t.\ \ & \vec{1} \omega = 1\\ <span class="c1"># Optimization time!</span> <span class="n">display</span><span class="p">(</span><span class="n">optimal</span><span class="o">.</span><span class="n">message</span><span class="p">)</span> -<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Holdings: {}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">tickers</span><span class="p">,</span> <span class="n">optimal</span><span class="o">.</span><span class="n">x</span><span class="p">))))</span> +<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Holdings: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">tickers</span><span class="p">,</span> <span class="n">optimal</span><span class="o">.</span><span class="n">x</span><span class="p">))))</span> <span class="n">expected_return</span> <span class="o">=</span> <span class="n">optimal</span><span class="o">.</span><span class="n">fun</span> <span class="o">*</span> <span class="o">-</span><span class="mi">100</span> <span class="c1"># multiply by -100 to scale, and compensate for minimizing</span> -<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Expected Return: {:.3f}%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expected_return</span><span class="p">))</span> +<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Expected Return: </span><span class="si">{:.3f}</span><span class="s2">%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expected_return</span><span class="p">))</span> <span class="n">expected_drawdown</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">optimal</span><span class="o">.</span><span class="n">x</span><span class="p">)</span> <span class="o">*</span> <span class="n">dd_a</span><span class="p">)</span> <span class="o">/</span> <span class="nb">sum</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">optimal</span><span class="o">.</span><span class="n">x</span><span class="p">))</span> <span class="o">*</span> <span class="mi">100</span> -<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Expected Max Drawdown: {0:.2f}%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expected_drawdown</span><span class="p">))</span> +<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Expected Max Drawdown: </span><span class="si">{0:.2f}</span><span class="s2">%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expected_drawdown</span><span class="p">))</span> <span class="c1"># TODO: Calculate expected Sharpe</span> </pre></div> diff --git a/feeds/blog.atom.xml b/feeds/blog.atom.xml index 7b192e3..9b1152b 100644 --- a/feeds/blog.atom.xml +++ b/feeds/blog.atom.xml @@ -1,5 +1,927 @@ -Bradlee Speicehttps://bspeice.github.io/2016-02-26T00:00:00-05:00Profitability using the Investment Formula2016-02-26T00:00:00-05:00Bradlee Speicetag:bspeice.github.io,2016-02-26:profitability-using-the-investment-formula.html<p> +Bradlee Speicehttps://bspeice.github.io/2016-03-05T00:00:00-05:00Predicting Santander Customer Happiness2016-03-05T00:00:00-05:00Bradlee Speicetag:bspeice.github.io,2016-03-05:predicting-santander-customer-happiness.html<p> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<h3 id="My-first-Kaggle-competition">My first Kaggle competition<a class="anchor-link" href="#My-first-Kaggle-competition">&#182;</a></h3><p>It's time! After embarking on a Machine Learning class this semester, and with a Saturday in which I don't have much planned, I wanted to put this class and training to work. It's my first competition submission. I want to walk you guys through how I'm approaching this problem, because I thought it would be really neat. The competition is Banco Santander's <a href="https://www.kaggle.com/c/santander-customer-satisfaction">Santander Customer Satisfaction</a> competition. It seemed like an easy enough problem I could actually make decent progress on it.</p> +<h1 id="Data-Exploration">Data Exploration<a class="anchor-link" href="#Data-Exploration">&#182;</a></h1><p>First up: we need to load our data and do some exploratory work. Because we're going to be using this data for model selection prior to testing, we need to make a further split. I've already gone ahead and done this work, please see the code in the <a href="#Appendix">appendix below</a>.</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[1]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> +<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> +<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span> +<span class="o">%</span><span class="k">matplotlib</span> inline + +<span class="c1"># Record how long it takes to run the notebook - I&#39;m curious.</span> +<span class="kn">from</span> <span class="nn">datetime</span> <span class="k">import</span> <span class="n">datetime</span> +<span class="n">start</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> + +<span class="n">dataset</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">&#39;split_train.csv&#39;</span><span class="p">)</span> +<span class="n">dataset</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">ID</span> +<span class="n">X</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">drop</span><span class="p">([</span><span class="s1">&#39;TARGET&#39;</span><span class="p">,</span> <span class="s1">&#39;ID&#39;</span><span class="p">,</span> <span class="s1">&#39;ID.1&#39;</span><span class="p">],</span> <span class="mi">1</span><span class="p">)</span> +<span class="n">y</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">TARGET</span> +</pre></div> + +</div> +</div> +</div> + +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[2]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">y</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt output_prompt">Out[2]:</div> + + +<div class="output_text output_subarea output_execute_result"> +<pre>array([0, 1], dtype=int64)</pre> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[3]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="nb">len</span><span class="p">(</span><span class="n">X</span><span class="o">.</span><span class="n">columns</span><span class="p">)</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt output_prompt">Out[3]:</div> + + +<div class="output_text output_subarea output_execute_result"> +<pre>369</pre> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>Okay, so there are only <a href="https://www.kaggle.com/c/santander-customer-satisfaction/data">two classes we're predicting</a>: 1 for unsatisfied customers, 0 for satisfied customers. I would have preferred this to be something more like a regression, or predicting multiple classes: maybe the customer isn't the most happy, but is nowhere near closing their accounts. For now though, that's just the data we're working with.</p> +<p>Now, I'd like to make a scatter matrix of everything going on. Unfortunately as noted above, we have 369 different features. There's no way I can graphically make sense of that much data to start with.</p> +<p>We're also not told what the data actually represents: Are these survey results? Average time between contact with a customer care person? Frequency of contacting a customer care person? The idea is that I need to reduce the number of dimensions we're predicting across.</p> +<h2 id="Dimensionality-Reduction-pt.-1---Binary-Classifiers">Dimensionality Reduction pt. 1 - Binary Classifiers<a class="anchor-link" href="#Dimensionality-Reduction-pt.-1---Binary-Classifiers">&#182;</a></h2><p>My first attempt to reduce the data dimensionality is to find all the binary classifiers in the dataset (i.e. 0 or 1 values) and see if any of those are good (or anti-good) predictors of the final data.</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[4]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cols</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">columns</span> +<span class="n">b_class</span> <span class="o">=</span> <span class="p">[]</span> +<span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">cols</span><span class="p">:</span> + <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">unique</span><span class="p">())</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span> + <span class="n">b_class</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">c</span><span class="p">)</span> + +<span class="nb">len</span><span class="p">(</span><span class="n">b_class</span><span class="p">)</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt output_prompt">Out[4]:</div> + + +<div class="output_text output_subarea output_execute_result"> +<pre>111</pre> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>So there are 111 features in the dataset that are a binary label. Let's see if any of them are good at predicting the users satisfaction!</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[5]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># First we need to `binarize` the data to 0-1; some of the labels are {0, 1},</span> +<span class="c1"># some are {0, 3}, etc.</span> +<span class="kn">from</span> <span class="nn">sklearn.preprocessing</span> <span class="k">import</span> <span class="n">binarize</span> +<span class="n">X_bin</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">b_class</span><span class="p">])</span> + +<span class="n">accuracy</span> <span class="o">=</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">X_bin</span><span class="p">[:,</span><span class="n">i</span><span class="p">]</span> <span class="o">==</span> <span class="n">y</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">b_class</span><span class="p">))]</span> +<span class="n">acc_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;Accuracy&quot;</span><span class="p">:</span> <span class="n">accuracy</span><span class="p">},</span> <span class="n">index</span><span class="o">=</span><span class="n">b_class</span><span class="p">)</span> +<span class="n">acc_df</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt output_prompt">Out[5]:</div> + +<div class="output_html rendered_html output_subarea output_execute_result"> +<div> +<table border="1" class="dataframe"> + <thead> + <tr style="text-align: right;"> + <th></th> + <th>Accuracy</th> + </tr> + </thead> + <tbody> + <tr> + <th>count</th> + <td>111.000000</td> + </tr> + <tr> + <th>mean</th> + <td>0.905159</td> + </tr> + <tr> + <th>std</th> + <td>0.180602</td> + </tr> + <tr> + <th>min</th> + <td>0.043598</td> + </tr> + <tr> + <th>25%</th> + <td>0.937329</td> + </tr> + <tr> + <th>50%</th> + <td>0.959372</td> + </tr> + <tr> + <th>75%</th> + <td>0.960837</td> + </tr> + <tr> + <th>max</th> + <td>0.960837</td> + </tr> + </tbody> +</table> +</div> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>Wow! Looks like we've got some incredibly predictive features! So much so that we should be a bit concerned. My initial guess for what's happening is that we have a sparsity issue: so many of the values are 0, and these likely happen to line up with satisfied customers.</p> +<p>So the question we must now answer, which I likely should have asked long before now: What exactly is the distribution of un/satisfied customers?</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[6]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">unsat</span> <span class="o">=</span> <span class="n">y</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">count</span><span class="p">()</span> +<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Satisfied customers: </span><span class="si">{}</span><span class="s2">; Unsatisfied customers: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">y</span><span class="p">)</span> <span class="o">-</span> <span class="n">unsat</span><span class="p">,</span> <span class="n">unsat</span><span class="p">))</span> +<span class="n">naive_guess</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">y</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">y</span><span class="p">)))</span> +<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Naive guess accuracy: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">))</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt"></div> +<div class="output_subarea output_stream output_stdout output_text"> +<pre>Satisfied customers: 51131; Unsatisfied customers: 2083 +Naive guess accuracy: 0.9608561656706882 +</pre> +</div> +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>This is a bit discouraging. A naive guess of "always satisfied" performs as well as our best individual binary classifier. What this tells me then, is that these data columns aren't incredibly helpful in prediction. I'd be interested in a polynomial expansion of this data-set, but for now, that's more computation than I want to take on.</p> +<h1 id="Dimensionality-Reduction-pt.-2---LDA">Dimensionality Reduction pt. 2 - LDA<a class="anchor-link" href="#Dimensionality-Reduction-pt.-2---LDA">&#182;</a></h1><p>Knowing that our naive guess performs so well is a blessing and a curse:</p> +<ul> +<li>Curse: The threshold for performance is incredibly high: We can only "improve" over the naive guess by 4%</li> +<li>Blessing: All the binary classification features we just discovered are worthless on their own. We can throw them out and reduce the data dimensionality from 369 to 111.</li> +</ul> +<p>Now, in removing these features from the dataset, I'm not saying that there is no "information" contained within them. There might be. But the only way we'd know is through a polynomial expansion, and I'm not going to take that on within this post.</p> +<p>My initial thought for a "next guess" is to use the <a href="http://scikit-learn.org/stable/modules/lda_qda.html">LDA</a> model for dimensionality reduction. However, it can only reduce dimensions to $1 - p$, with $p$ being the number of classes. Since this is a binary classification, every LDA model that I try will have dimensionality one; when I actually try this, the predictor ends up being slightly less accurate than the naive guess.</p> +<p>Instead, let's take a different approach to dimensionality reduction: <a href="http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">principle components analysis</a>. This allows us to perform the dimensionality reduction without worrying about the number of classes. Then, we'll use a <a href="http://scikit-learn.org/stable/modules/naive_bayes.html#gaussian-naive-bayes">Gaussian Naive Bayes</a> model to actually do the prediction. This model is chosen simply because it doesn't take a long time to fit and compute; because PCA will take so long, I just want a prediction at the end of this. We can worry about using a more sophisticated LDA/QDA/SVM model later.</p> +<p>Now into the actual process: We're going to test out PCA dimensionality reduction from 1 - 20 dimensions, and then predict using a Gaussian Naive Bayes model. The 20 dimensions upper limit was selected because the accuracy never improves after you get beyond that (I found out by running it myself). Hopefully, we'll find that we can create a model better than the naive guess.</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[7]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">sklearn.naive_bayes</span> <span class="k">import</span> <span class="n">GaussianNB</span> +<span class="kn">from</span> <span class="nn">sklearn.decomposition</span> <span class="k">import</span> <span class="n">PCA</span> + +<span class="n">X_no_bin</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">b_class</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> + +<span class="k">def</span> <span class="nf">evaluate_gnb</span><span class="p">(</span><span class="n">dims</span><span class="p">):</span> + <span class="n">pca</span> <span class="o">=</span> <span class="n">PCA</span><span class="p">(</span><span class="n">n_components</span><span class="o">=</span><span class="n">dims</span><span class="p">)</span> + <span class="n">X_xform</span> <span class="o">=</span> <span class="n">pca</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">X_no_bin</span><span class="p">)</span> + + <span class="n">gnb</span> <span class="o">=</span> <span class="n">GaussianNB</span><span class="p">()</span> + <span class="n">gnb</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> + <span class="k">return</span> <span class="n">gnb</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> + +<span class="n">dim_range</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">21</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Gaussian NB Accuracy&quot;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Inverse Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">legend</span><span class="p">();</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt"></div> + + +<div class="output_png output_subarea "> +<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAsYAAAFwCAYAAAC7E71AAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz +AAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8VfWd//HXNxDWQAiJbElYpK503KZgrdpiGUH9iVor +CCi2OFXb2hmxdWHsaEOXcURt1dZldKw74tJ26r4UjdW2bq3ISJXNDkgKsgUjsoTl+/vjhmsIWclN +7k3yej4e55F7zvnecz73cI1vvnzP+YYYI5IkSVJHl5XuAiRJkqRMYDCWJEmSMBhLkiRJgMFYkiRJ +AgzGkiRJEmAwliRJkoBGBOMQwp0hhA9DCPPraXNTCGFxCGFeCOGw1JYoSZIktbzG9BjfBYyra2cI +4URgeIxxP+AC4LYU1SZJkiS1mgaDcYzxFaC8nianAvdWtX0NyA0h9E9NeZIkSVLrSMUY40Lgg2rr +ZVXbJEmSpDbDm+8kSZIkoHMKjlEGFFdbL6ratocQQkzB+SRJkqQGxRhDU9o3NhiHqqU2jwEXAg+F +ED4PbIgxflhPgU2pT2o1JSUllJSUpLsMaQ9+N5Wp/G4qk4XQpEwMNCIYhxBmA6OB/BDCcuAHQBcg +xhhvjzE+FUI4KYSwBPgEmNbkKiRJkqQ0azAYxxinNKLNd1JTjiRJkpQe3nwnVRk9enS6S5Bq5XdT +mcrvptqb0JpjfkMI0THGkiRJamkhhBa7+U6SJLUjQ4cOZdmyZekuQ2q2IUOG8H//938pOZY9xpIk +dUBVvWnpLkNqtrq+y3vTY+wYY0mSJAmDsSRJkgQYjCVJkiTAYCxJktSgk046ifvuuy/dZaiFdbib +7371K1i/Pq0l7CaE+pesrIbbNHcRdOsGRx+d7iokqfVk+s13c+bM4YYbbuCdd94hJyeHYcOGcc45 +5/Ctb30r3aWl1D333MO0adOYNWsWl1xySXJ7cXExDzzwAF/84heZOXMmP/nJT+jWrVty349+9CNO +P/30eo9dWlrKl7/8Za655houvfTSFv0c6ZTKm++IMbbaAkQXFxcXFxeXzFgy1XXXXRcHDBgQf/3r +X8eNGzfGGGOcN29ePPvss2NlZWWaq0utu+++O+bn58d99tkn+VljjLGoqCi+9NJLMcYYS0pK4tSp +U5P7nn322di9e/e4evXqeo89bdq0WFBQED/72c+2TPH12L59e6udq4HveJOyaqsPpWhqgS4urbEc +fXTk979Pfx0uLi4urbVkqoqKCn7wgx9w66238pWvfIWePXsCcOihh3LfffeRnZ0NwFNPPcURRxxB +bm4uQ4YMYebMmcljvPTSSxQXF+923GHDhvHCCy8A8MYbbzBy5Ehyc3MZOHBgsqd269atTJ06lYKC +AvLy8jjyyCNZs2YNAMcddxy//OUvAXj//fcZM2YMBQUF9OvXj7PPPpuKiordznX99ddz6KGHkpeX +x+TJk6msrKzzMx900EEcddRRXH/99Y26RmPHjqVXr14sXbq0zjabNm3i0Ucf5eabb2bx4sX85S9/ +2W3/K6+8wtFHH01eXh5Dhgzh3nvvBWDLli1873vfY+jQoeTl5fHFL36RrVu3NnhNZ86cyYQJE5g6 +dSp9+vThnnvu4Y033uALX/gCeXl5FBYW8i//8i9s3749+f4FCxYwduxY8vPzGThwIP/5n//Jhx9+ +SM+ePSkvL0+2+8tf/kK/fv3YsWNHnZ83Vd9xxxhLQFERrFiR7iokSX/605+orKzklFNOqbddTk4O +9913Hx999BFPPvkkt912G4899lhyf6hnrOBFF13E9OnT+eijj1i6dCkTJ04EEsMaKioqKCsrY/36 +9dx222107959j/fHGLniiitYtWoV7777LitWrKCkpGS3No888gjPPfccf/vb33j77be5++6766wn +hMCPfvQjbrjhBjZs2FDv5wZ48skn2bZtGwcffHCdbX71q1/Rq1cvJkyYwNixY7nnnnuS+5YvX85J +J53ERRddxNq1a5k3bx6HHXYYAN/73vd46623ePXVV1m/fj2zZs0iKysrWWd9HnvsMSZOnMiGDRs4 +66yz6Ny5MzfccAPr16/nT3/6Ey+88AK33HILABs3buT444/npJNOYuXKlSxZsoQxY8bQv39/jjvu +OB5++OHkce+//34mT55Mp06dGrw2zWUwljAYS1KmWLt2LQUFBckwBiR7Nnv06MErr7wCwBe/+EVG +jBgBwGc/+1kmTZrESy+91KhzdOnShSVLlrBu3Tp69OjBqFGjAMjOzmbdunUsWrSIEAKHH344OTk5 +e7x/+PDhjBkzhs6dO5Ofn8/FF1+8x7kvuugi+vfvT58+fRg/fjzz5s2rt6ZDDjmE448/nmuuuabW +/Q899BB9+/YlJyeH0047jSuuuILevXvXebx7772XSZMmEUJgypQpzJkzJ9njOnv2bI4//ngmTpxI +p06dyMvL45BDDiHGyF133cVNN93EgAEDCCHw+c9/PtlL35CjjjqK8ePHA9C1a1cOP/xwRo0aRQiB +wYMHc/755yev0xNPPMHAgQOZPn06Xbp0oWfPnowcORKAc845J3mj486dO3nwwQeZOnVqo2poLoOx +hMFYkmqTjhu88/PzWbt2LTt37kxu+8Mf/kB5eTkFBQXJ7a+99hpf/vKX6devH3369OG//uu/WLt2 +baPOceedd7Jw4UIOPPBAjjzySJ588kkApk6dyrhx45g0aRJFRUVcfvnltf7z/erVq5k8eTJFRUX0 +6dOHs88+e49z9+/fP/m6R48ebNy4scG6fvjDH3LrrbeyevXqPfadeeaZrF+/no0bN7J06VLuuece +7rjjjlqPs2LFCl588UWmTJkCwCmnnMLmzZuTn/ODDz5g+PDhe7xv7dq1bN26lX333bfBWmtTc6jF +4sWLGT9+PAMHDqRPnz58//vfT16numoAOPXUU3n33XdZtmwZzz33HH369OFzn/vcXtXUVAZjCYOx +JNUmxuYvTXXUUUfRtWtXfvvb39ZSz6cHPOusszjttNMoKytjw4YNXHDBBcn9PXv2ZNOmTcm2O3bs +SI4VhkSP7+zZs1mzZg2XXXYZZ5xxBps3b6Zz585ceeWVLFiwgD/+8Y888cQTybG31V1xxRVkZWWx +YMECNmzYwP3335+ScdsHHHAAp59+Oj/5yU/qHbYwePBgTjzxRB5//PFa9997773EGJOhdPjw4Wzd +ujU5nKK4uJglS5bs8b6CggK6detW69jlhq4p7DnU4lvf+hYHHXQQS5cuZcOGDfzkJz9JXqfi4uI6 +x0h37dqViRMnct9993H//fe3Wm8xGIwlwGAsSZkiNzeXq666im9/+9v86le/YuPGjcQYmTdv3m7B +bOPGjeTl5ZGdnc3rr7/O7Nmzk/v2339/tmzZwtNPP8327dv58Y9/vNvNbw888ECy5zI3N5cQAllZ +WZSWlvLOO++wc+dOcnJyyM7OrnVc68cff0xOTg69evWirKyMa6+9NmWf/6qrruKuu+7aY6xx9eC9 +YsUKnnnmGT772c/Weox7772XkpIS5s2bx9tvv83bb7/No48+ypNPPkl5eTlnnXUWc+fO5dFHH2XH +jh2sX7+et99+mxAC06ZN47vf/S4rV65k586dvPrqq2zbtq3Ba1qbjz/+mN69e9OjRw/ee+89br31 +1uS+k08+mVWrVnHTTTdRWVnJxo0bef3115P7p06dyt13383jjz9uMJZam8FYkjLHpZdeyk9/+lNm +zZrFgAEDGDBgAN/61reYNWsWX/jCFwC45ZZbuPLKK8nNzeXHP/4xZ555ZvL9vXv35pZbbuGf//mf +KSoqolevXhQVFSX3P/PMM4wYMYLevXtz8cUX89BDD9G1a1dWrVrFGWecQW5uLiNGjOC4447j7LPP +BnbvDf3BD37An//85+T44a9+9au71d/QTWr1GTp0KFOnTuWTTz7ZbfvDDz9M79696d27N0ceeSTH +HnssV1111R7vf+2111i+fDnf/va36devX3IZP348++23Hw8++CDFxcU89dRTXHfddfTt25fDDz+c ++fPnA3DdddfxD//wD4wcOZL8/HxmzJjBzp07G7ymtbnuuut44IEH6N27NxdccAGTJk1K7svJyeH5 +55/nscceY8CAAey///6UlpYm93/hC18gKyuLI444Yo8hGi2pw03wIdVm+3bo0QM++QQaeY+BJLVp +mT7BhzRmzBjOOusszj333HrbpXKCD3uMJaBzZ9hnH1i1Kt2VSJKkN954g7feemu3fwloDQZjqYrD +KSRJSr+vf/3rjB07lhtvvDE5wUtr6dyqZ5MymMFYkqT0q28ylJZmj7FUxWAsSVLHZjCWqhiMJUnq +2AzGUhWDsSRJHZvBWKpiMJYkqWMzGEtVDMaSJHVsBmOpyqBBsHIl7NiR7kokSc1x9dVXc/7556e7 +DLVBBmOpSteukJcHq1enuxJJ6tiGDh1K//792bx5c3LbnXfeyXHHHdeo9//bv/0bt99+e4vU9vzz +z/PlL3+Z3r17s88++3DEEUdw7bXXUllZ2SLnU+syGEvVOJxCktIvhMDOnTu54YYb9tieTo888ggT +Jkzg7LPPZvny5axZs4aHHnqIFStW8MEHH6S1NqWGwViqpqgIysrSXYUk6dJLL+X666+noqKi1v3T +p09n8ODB5ObmMnLkSF555ZXkvpkzZ3LOOecAcNJJJ3HLLbfs9t7DDjuM//mf/wHgvffeY+zYseTn +53PQQQfxyCOP1FnT9773PUpKSjj33HPp06cPAPvttx833ngjw4cPB2DatGlcddVVyfe89NJLFBcX +J9dXrlzJGWecQb9+/Rg+fDg///nPk/veeOMNRo4cSW5uLgMHDuSSSy4BYOvWrUydOpWCggLy8vI4 +8sgjWbNmTcMXUU1mMJaqscdYkjLD5z73OUaPHs21115b6/5Ro0Yxf/58ysvLmTJlChMmTKh1OMPk +yZOZPXt2cv2vf/0ry5cv5+STT2bTpk2MHTuWs88+m7Vr1zJnzhwuvPBC3nvvvT2Os3DhQsrKyjj9 +9NOb/Fl29XTHGBk/fjyHH344K1euZO7cudx44408//zzAFx00UVMnz6djz76iKVLlzJx4kQA7rnn +HioqKigrK2P9+vXcdtttdO/evcl1qGEGY6kag7EkZY6ZM2fyi1/8gnXr1u2xb8qUKfTp04esrCwu +vvhitm7dysKFC/do95WvfIW33347OdRh9uzZnH766XTu3JknnniCYcOGcc455xBC4NBDD+X000+v +tdd47dq1AAwYMCC5bfLkyeTl5dGzZ08eeOCBBj/P66+/ztq1a/n+979Pp06dGDp0KN/4xjeYM2cO +ANnZ2SxZsoR169bRo0cPRo0aldy+bt06Fi1aRAiBww8/nJycnEZcQTWVwViqxmAsSZ8KITR7aY4R +I0Zw8sknc/XVV++x77rrruPggw8mLy+PvLw8KioqkuG1upycHE466aRk+HzwwQc5++yzAVi2bBmv +vvoqffv2pW/fvuTl5TF79mxWrVq1x3Hy8/OBxFCIXR588EHKy8s54ogj2NGIRxotX76csrKy3c53 +9dVXs7rqru9f/vKXLFy4kAMPPJAjjzySJ598EoCpU6cybtw4Jk2aRFFRETNmzGjU+dR0BmOpGoOx +JH0qxtjspblKSkq44447KKt2A8jLL7/Mtddey6OPPkp5eTnl5eX07t27zvPtGk7x6quvsnXrVkaP +Hg1AcXExo0ePZv369axfv57y8nIqKiq4+eab9zjGAQccQGFhIb/+9a/rrbdnz55s2rQpuV49SBcX +F7Pvvvvudr6PPvqIxx9/HIDhw4cze/Zs1qxZw2WXXcYZZ5zB5s2b6dy5M1deeSULFizgj3/8I48/ +/jj33ntvo6+hGs9gLFVjMJakzDJ8+HDOPPNMbrrppuS2jRs3kp2dTX5+PpWVlfzwhz/k448/rvMY +J510EsuWLeOqq67izDPPTG4/+eSTWbRoEffffz/bt29n27ZtvPnmm7WOMQ4hcN111zFz5kzuvPNO +NmzYAMDixYv58MMPk+0OO+wwnnrqKcrLy1m1ahU33nhjct+oUaPo1asXs2bNYsuWLezYsYMFCxbw +5ptvAvDAAw8ke71zc3MJIZCVlUVpaSnvvPMOO3fuJCcnh+zsbLKyjHAtwasqVVNYmHgqRQo6OSRJ +e6nmEIyrrrqKTZs2JbePGzeOcePGsf/++zNs2DB69Oix25MfaurSpQunn346c+fOZcqUKcntOTk5 +PPfcc8yZM4dBgwYxaNAgZsyYUecziSdOnMjDDz/Mfffdx+DBg9lnn32YNGkS3/zmN5kwYQKQGPZw +yCGHMHToUE444QQmTZqUfH9WVhZPPPEE8+bNY9iwYfTr14/zzjsv+eSNZ555hhEjRtC7d28uvvhi +HnroIbp27cqqVas444wzyM3NZcSIERx33HFMnTp17y6u6hVS8c8cjT5ZCLE1zyftjfx8WLgQCgrS +XYkktZwQQkqGOkjpVtd3uWp7kwa622Ms1eBwCkmSOiaDsVSDwViSpI7JYCzVUFhoMJYkqSMyGEs1 +2GMsSVLHZDCWajAYS5LUMRmMpRoMxpIkdUwGY6kGg7EkSR2TwViqYVcw9vGekiR1LAZjqYbevSEr +Cz76KN2VSJI6oldeeYWDDjoo3WV0SAZjqRYOp5Ck9Bk2bBgvvPBCusvYa1//+tfJysrizTffTG5b +unQpWVmNi13HHHMM7777bovUtmTJEiZPnky/fv3o06cPBxxwABdddBF///vfW+R8bY3BWKqFwViS +OpYdO3ak7FghBPLz8/n3f//3Pban05IlSzjyyCMpKipi3rx5bNiwgT/84Q8MHz6cV155Ja21ZQqD +sVQLg7EkZYZ77rmHY489lksvvZS+ffsyfPhwnn32WQAefvhhRo4cuVv7n/3sZ5x22mkAVFZWcskl +lzBkyBAGDhzIt7/9bbZu3QrASy+9RHFxMbNmzWLgwIGce+65rFu3jvHjx5OXl0d+fj5f+tKXksdd +uXIlZ5xxBv369WP48OH8/Oc/r7fur33ta8yfP5+XX3651v133303Bx98ML179+Yzn/kMt99+e3Lf +rtoAZs2axYQJE3Z770UXXcT06dMBqKio4Bvf+AaDBg2iuLiYK6+8kljHTTIzZ87kmGOO4dprr2XQ +oEEAFBQU8K//+q9MnDhxt+tdXVZWFu+//36D17S+63fNNddQVFRE7969Oeigg3jxxRfrvX7pYjCW +amEwlqTM8frrr3PQQQexbt06Lr30Us4991wAxo8fz6JFi1i6dGmy7YMPPshZZ50FwOWXX86SJUuY +P38+S5YsoaysjB/+8IfJtqtWrWLDhg0sX76c22+/neuvv57i4mLWrVvH6tWr+Y//+A8AYoyMHz+e +ww8/nJUrVzJ37lxuvPFGnn/++Tpr7tGjB1dccQVXXHFFrfv79+/PU089RUVFBXfddRcXX3wx8+bN +S+7f1bs8adIknn76aT755BMAdu7cySOPPJL8jF/72tfo0qUL77//Pm+99RbPP/88//3f/13rOX/3 +u9/x1a9+tf6LzZ4929XX67umdV2/RYsWcfPNN/PnP/+ZiooKnn32WYYOHdpgHelgMJZqYTCWpMwx +ZMgQzj33XEIIfO1rX2PlypWsXr2a7t27c8opp/Dggw8CsHjxYhYuXMgpp5wCwB133MHPfvYzcnNz +6dmzJzNmzEi2BejUqRMzZ84kOzubrl27kp2dzcqVK/nb3/5Gp06dOProowF44403WLt2Ld///vfp +1KkTQ4cO5Rvf+AZz5sypt+7zzz+f5cuXJ3u4qzvxxBOT4fDYY49l7NixtfYuDx48mCOOOILf/OY3 +AMydO5eePXsycuRIPvzwQ55++ml+9rOf0a1bNwoKCpg+ffpun7G6tWvXMmDAgOT6zTffTF5eHr16 +9eKCCy6o83NU74Gu75rWdf06depEZWUl77zzDtu3b2fw4MEMGzas3muXLgZjqRZFRVBWlu4qJCm9 +QgjNXlKhepjr3r07ABs3bgRgypQpyWA2e/ZsTjvtNLp27cqaNWvYtGkT//iP/0jfvn3p27cvJ554 +IuvWrUsea5999iE7Ozu5ftlllzF8+HDGjh3LZz7zGa655hoAli1bRllZWfI4eXl5XH311axevbre +urt06cKVV17JlVdeuce+p59+mqOOOor8/Hzy8vJ4+umnWbt2ba3HmTx5cvIzPvjgg0yZMgWA5cuX +s23bNgYOHJis65vf/Gadx8nPz2flypXJ9QsvvJDy8nKmT5/Otm3b6v0sQIPX9NJLL631+g0fPpwb +briBkpIS+vfvz5QpU3arI5MYjKVa2GMsSYmewuYuLe34449nzZo1vP3228yZMycZGgsKCujRowcL +Fixg/fr1rF+/ng0bNvBRtWdx1gzuPXv25LrrrmPp0qU89thj/PSnP+XFF1+kuLiYfffdN3mc8vJy +PvroIx5//PEG65s2bRobNmzg17/+dXJbZWUlZ5xxBpdddhlr1qyhvLycE088sc7rNWHCBEpLSykr +K+M3v/lN8jMWFxfTrVs31q1bl6xrw4YNzJ8/v9bjjBkzZrc6atOzZ082bdqUXF+1alXydUPXNCcn +p9brB4khIS+//DLLli0DYMaMGQ1durQwGEu1MBhLUtvQuXNnJkyYwKWXXkp5eTnHH388kAi95513 +HtOnT2fNmjUAlJWV8dxzz9V5rCeffDI5XrlXr1507tyZrKwsRo0aRa9evZg1axZbtmxhx44dLFiw +YLfHsdWlU6dOlJSUJHtPIRGMKysrKSgoICsri6effrreugoKCvjSl77EtGnT2HfffTnggAOARE/6 +2LFjufjii/n444+JMfL+++/z+9//vtbjlJSU8PLLL3PJJZckH8+2du3a3R4Nd+ihh7JgwQLmz5/P +1q1bmTlzZvIvEA1d07qu36JFi3jxxReprKykS5cudO/evdGPrmttmVmVlGZ5ebB1K1T9S50kqRU1 +NASj5v7Jkyczd+5cJk6cuFvguuaaa/jMZz7D5z//efr06cPYsWNZtGhRncddvHgx//RP/0SvXr04 ++uijufDCC/nSl75EVlYWTzzxBPPmzWPYsGH069eP8847j4qKikbXN3DgwOT2nJwcbrrpJiZMmEDf +vn2ZM2cOp556ar2fecqUKcydOzd5090u9957L5WVlRx88MH07duXCRMm7NbLW91+++3Ha6+9xgcf +fMChhx5Kbm4uxx57LIWFhfzoRz9KtrnqqqsYM2YM+++//x5PqKjvmtZ1/bZu3cqMGTPYZ599GDRo +EGvWrOHqq6+u9/OmS2iNf+ZIniyE2Jrnk5pj//3h8ceh6i/mktSuhBBaZaiD1NLq+i5XbW/SQHd7 +jKU6OJxCkqSOxWAs1cFgLElSx9KoYBxCOCGE8F4IYVEI4fJa9vcOITwWQpgXQvjfEMLXU16p1MoM +xpIkdSwNBuMQQhbwC2AcMAKYHEI4sEazC4EFMcbDgOOA60MInVNdrNSaDMaSJHUsjekxHgUsjjEu +izFuA+YANW+djECvqte9gHUxxu2pK1NqfQZjSZI6lsYE40Lgg2rrK6q2VfcL4OAQwt+Bt4GLUlOe +lD6FhQZjSZI6klTdfDcOeCvGOAg4HLg5hJCTomNLaWGPsSRJHUtjxgGXAYOrrRdVbatuGnA1QIxx +aQjhb8CBwB5TwpSUlCRfjx49mtGjRzepYKm17LMPVFTAli3QrVu6q5Gk1BoyZEiDE2lIbcGQIUMA +KC0tpbS0tFnHanCCjxBCJ2AhMAZYCbwOTI4xvlutzc3A6hjjzBBCfxKB+NAY4/oax3KCD7Upw4bB +734Hw4enuxJJktQULTLBR4xxB/Ad4DlgATAnxvhuCOGCEML5Vc1+DHwhhDAfeB64rGYoltoih1NI +ktRxNOqRajHGZ4ADamz7r2qvV5IYZyy1KwZjSZI6Dme+k+phMJYkqeMwGEv1MBhLktRxGIylehiM +JUnqOAzGUj0MxpIkdRwGY6keBmNJkjqOBp9jnNKT+RxjtTE7dkD37vDJJ5Cdne5qJElSY7XIc4yl +jqxTJ+jfH1auTHclkiSppRmMpQY4nEKSpI7BYCw1wGAsSVLHYDCWGmAwliSpYzAYSw0wGEuS1DEY +jKUGGIwlSeoYDMZSAwzGkiR1DAZjqQEGY0mSOgYn+JAaUFkJOTmweXPiucaSJCnzOcGH1AK6dIH8 +fPjww3RXIkmSWpLBWGqEwkKHU0iS1N4ZjKVGcJyxJEntn8FYagSDsSRJ7Z/BWGoEg7EkSe2fwVhq +BIOxJEntn8FYagSDsSRJ7Z/BWGoEg7EkSe2fE3xIjbB5M/Tpk/iZ5V8nJUnKeE7wIbWQ7t2hVy9Y +uzbdlUiSpJZiMJYayeEUkiS1bwZjqZEMxpIktW8GY6mRDMaSJLVvBmOpkYqKoKws3VVIkqSWYjCW +GskeY0mS2jeDsdRIBmNJkto3g7HUSAZjSZLaN4Ox1EiFhYlg7Bw1kiS1TwZjqZF69YLsbNiwId2V +SJKklmAwlprA4RSSJLVfBmOpCQzGkiS1XwZjqQkMxpIktV8GY6kJDMaSJLVfBmOpCXY9mUKSJLU/ +BmOpCewxliSp/TIYS01gMJYkqf0yGEtNYDCWJKn9MhhLTdCnD2zfDhUV6a5EkiSlmsFYaoIQEr3G +ZWXprkSSJKWawVhqIodTSJLUPhmMpSYyGEuS1D4ZjKUmMhhLktQ+GYylJjIYS5LUPhmMpSYyGEuS +1D4ZjKUmMhhLktQ+GYylJvJxbZIktU8GY6mJCgpg40bYvDndlUiSpFQyGEtNFAIUFtprLElSe2Mw +lvaC44wlSWp/DMbSXjAYS5LU/hiMpb1gMJYkqf0xGEt7wWAsSVL7YzCW9oLBWJKk9qdRwTiEcEII +4b0QwqIQwuV1tBkdQngrhPBOCOHF1JYpZRaDsSRJ7U/nhhqEELKAXwBjgL8Db4QQfhtjfK9am1zg +ZmBsjLEshFDQUgVLmcBgLElS+9OYHuNRwOIY47IY4zZgDnBqjTZTgF/FGMsAYoxrU1umlFn69YP1 +66GyMt3G3FU+AAATCUlEQVSVSJKkVGlMMC4EPqi2vqJqW3X7A31DCC+GEN4IIUxNVYFSJurUCQYM +gL//Pd2VSJKkVGlwKEUTjnME8GWgJ/CnEMKfYoxLUnR8KePsGk4xdGi6K5EkSanQmGBcBgyutl5U +ta26FcDaGOMWYEsI4ffAocAewbikpCT5evTo0YwePbppFUsZwnHGkiRljtLSUkpLS5t1jBBjrL9B +CJ2AhSRuvlsJvA5MjjG+W63NgcDPgROArsBrwJkxxr/WOFZs6HxSW/Hd78KgQXDJJemuRJIk1RRC +IMYYmvKeBnuMY4w7QgjfAZ4jMSb5zhjjuyGECxK74+0xxvdCCM8C84EdwO01Q7HU3hQVwfLl6a5C +kiSlSoM9xik9mT3GakcefjixPPpouiuRJEk17U2PsTPfSXvJMcaSJLUvBmNpLxmMJUlqXxxKIe2l +bdugZ0/YtAk6p+rBh5IkKSUcSiG1ouxsKCiAVavSXYkkSUoFg7HUDA6nkCSp/TAYS81QVARlNae7 +kSRJbZLBWGoGe4wlSWo/DMZSMxiMJUlqPwzGUjMYjCVJaj8MxlIzGIwlSWo/DMZSMxiMJUlqP5zg +Q2qGLVsgNxc2b4Ys/5opSVLGcIIPqZV165YIxmvWpLsSSZLUXAZjqZkcTiFJUvtgMJaayWAsSVL7 +YDCWmqmw0GAsSVJ7YDCWmskeY0mS2geDsdRMBmNJktoHg7HUTAZjSZLaB4Ox1EwGY0mS2gcn+JCa +aeNG2Gcf2LQJQpMeIy5JklqKE3xIaZCTk5joY/36dFciSZKaw2AspYDDKSRJavsMxlIKGIwlSWr7 +DMZSChiMJUlq+wzGUgoYjCVJavsMxlIKGIwlSWr7DMZSChiMJUlq+wzGUgoUFUFZWbqrkCRJzWEw +llLAHmNJkto+g7GUAr17Q4xQUZHuSiRJ0t4yGEspEIK9xpIktXUGYylFDMaSJLVtBmMpRQzGkiS1 +bQZjKUUMxpIktW0GYylFDMaSJLVtBmMpRQzGkiS1bQZjKUUKCw3GkiS1ZQZjKUXsMZYkqW0zGEsp +kp8PmzbBJ5+kuxJJkrQ3DMZSiuya5KOsLN2VSJKkvWEwllLI4RSSJLVdBmMphQzGkiS1XQZjKYUM +xpIktV0GYymFDMaSJLVdBmMphQzGkiS1XQZjKYUMxpIktV0GYymFDMaSJLVdIcbYeicLIbbm+aTW +tnMndOsGFRWJn5IkKT1CCMQYQ1PeY4+xlEJZWTBoEPz97+muRJIkNZXBWEoxZ7+TJKltMhhLKeY4 +Y0mS2iaDsZRiBmNJktomg7GUYgZjSZLaJoOxlGIGY0mS2iaDsZRiBmNJktomg7GUYgZjSZLaJif4 +kFJs+3bo0QM++QSys9NdjSRJHVOLTfARQjghhPBeCGFRCOHyetqNDCFsCyGc3pQipPakc2fo1w9W +rUp3JZIkqSkaDMYhhCzgF8A4YAQwOYRwYB3t/hN4NtVFSm2NwykkSWp7GtNjPApYHGNcFmPcBswB +Tq2l3b8AjwKrU1if1CYVFhqMJUlqaxoTjAuBD6qtr6jalhRCGAScFmO8FWjSWA6pPbLHWJKktidV +T6W4Aag+9thwrA7NYCxJUtvTuRFtyoDB1daLqrZV9zlgTgghAAXAiSGEbTHGx2oerKSkJPl69OjR +jB49uoklS5mvqAjefDPdVUiS1HGUlpZSWlrarGM0+Li2EEInYCEwBlgJvA5MjjG+W0f7u4DHY4y/ +rmWfj2tTh/DyyzBjBvzhD+muRJKkjmlvHtfWYI9xjHFHCOE7wHMkhl7cGWN8N4RwQWJ3vL3mW5pS +gNQeOZRCkqS2xwk+pBawdSv06gWbN0OnTumuRpKkjqfFJviQ1DRdu0JeHqz24YWSJLUZBmOphTic +QpKktsVgLLUQg7EkSW2LwVhqIQZjSZLaFoOx1EIMxpIktS0GY6mFFBVBWc2pcCRJUsYyGEstxB5j +SZLaFoOx1EIMxpIktS1O8CG1kE2bID8/8TM06fHikiSpuZzgQ8ogPXoklnXr0l2JJElqDIOx1IIc +TiFJUtthMJZakMFYkqS2w2AstSCDsSRJbYfBWGpBBmNJktoOg7HUggzGkiS1HQZjqQUVFhqMJUlq +KwzGUguyx1iSpLbDYCy1oF3B2HltJEnKfAZjqQX17g1ZWfDRR+muRJIkNcRgLLUwh1NIktQ2GIyl +FmYwliSpbTAYSy3MYCxJUttgMJZamMFYkqS2wWAstTCDsSRJbYPBWGphBmNJktoGg7HUwgzGkiS1 +DQZjqYUZjCVJahsMxlILy8uDykrYuDHdlUiSpPoYjKUWFkKi17isLN2VSJKk+hiMpVbgcApJkjKf +wVhqBQZjSZIyn8FYagUGY0mSMp/BWGoFBmNJkjKfwVhqBQZjSZIyn8FYagUGY0mSMp/BWGoFBmNJ +kjKfwVhqBQUF8PHHsGVLuiuRJEl1MRhLrSArCwYOdJIPSZIymcFYaiUOp5AkKbMZjKVWYjCWJCmz +GYylVmIwliQpsxmMpVZiMJYkKbMZjKVWYjCWJCmzGYylVmIwliQpsxmMpVZiMJYkKbOFGGPrnSyE +2JrnkzLJjh3QvTts3AhduqS7GkmS2rcQAjHG0JT32GMstZJOnaB/f1i5Mt2VSJKk2hiMpVbkcApJ +kjKXwVhqRQZjSZIyl8FYakVFRVBWlu4qJElSbQzGUiuyx1iSpMxlMJZakcFYkqTMZTCWWpHBWJKk +zGUwllqRwViSpMzlBB9SK6qshJwc2Lw58VxjSZLUMpzgQ8pwXbpAfj58+GG6K5EkSTUZjKVW5nAK +SZIyk8FYamUGY0mSMlOjgnEI4YQQwnshhEUhhMtr2T8lhPB21fJKCOEfUl+q1D4YjCVJykwNBuMQ +QhbwC2AcMAKYHEI4sEaz94EvxhgPBX4M3JHqQqX2orDQYCxJUiZqTI/xKGBxjHFZjHEbMAc4tXqD +GOOrMcaPqlZfBQpTW6bUfthjLElSZmpMMC4EPqi2voL6g+83gKebU5TUnhmMJUnKTJ1TebAQwnHA +NOCYVB5Xak8MxpIkZabGBOMyYHC19aKqbbsJIRwC3A6cEGMsr+tgJSUlydejR49m9OjRjSxVah8K +C6GsDHbuhCyfCyNJUkqUlpZSWlrarGM0OPNdCKETsBAYA6wEXgcmxxjfrdZmMDAXmBpjfLWeYznz +nQQUFMBf/wr9+qW7EkmS2qe9mfmuwR7jGOOOEMJ3gOdIjEm+M8b4bgjhgsTueDtwJdAXuCWEEIBt +McZRTf8IUsewaziFwViSpMzRYI9xSk9mj7EEwMknw/nnwymnpLsSSZLap73pMXaEo5QG3oAnSVLm +MRhLaWAwliQp8xiMpTQwGEuSlHkMxlIaGIwlSco8BmMpDYqKEs8yliRJmcNgLKVBYWGix9iHtEiS +lDkMxlIa9OoF2dmwYUO6K5EkSbsYjKU0cZyxJEmZxWAspYnBWJKkzGIwltLEYCxJUmYxGEtpYjCW +JCmzGIylNDEYS5KUWQzGUpoYjCVJyiwGYylNDMaSJGUWg7GUJrsm+ZAkSZnBYCylSZ8+sH07VFSk +uxJJkgQGYyltQkgMpygrS3clkiQJDMZSWjnOWJKkzGEwltLIYCxJUuYwGEtpZDCWJClzGIylNDIY +S5KUOQzGUhoZjCVJyhwGYymNDMaSJGUOg7GURgZjSZIyh8FYSqOCAvjkE9i0Kd2VSJIkg7GURiEk +poZ2kg9JktLPYCylmbPfSZKUGQzGUpo5zliSpMxgMJbSzGAsSVJmMBhLaWYwliQpM3ROdwFSR1dU +BHPnprsKZaqtW2HDBujUac8lK+vTnyGku9LmixF27EgsO3d++rrmsm1bYtm+/dPXNdcb83pv3rN9 +e2KJ8dOaq9ef7tfpOB8kvn+7voO7XqdiW2PbVq+r+s/atjVm3968vyF1/Tda33+7e7Mv04+X6QzG +UprZYyxIBOCFC2HBgsTy178mfi5fDr167R4Ma4bGGHcPyXUF6L3ZVn17Y0JrXfsas73656irns6d +E0t2dmKp63V9++p63aNH49rtqm2X6gEgE1639vlqhsTqS2O3Nff91cNyzZ97u29v3l+XusJzfaF6 +b/Zl+vFa23PPNf09BmMpzQzGHUtlZSIA7wq+u5Zly2DYMDj4YBgxAiZNSvzcbz/o0qX+Y+7qaa0t +fDZnW/XtO3c2HFobG7Dbe8+3pLYrxFaM9SGE2Jrnk9qCHTuge3fYuLHhAKS2o7ISFi/ePfwuWAB/ ++xsMHZoIvdWX/ff3z1+SUimEQIyxSX/dNhhLGWDIEHjppURgUtuybVvtAfj99xN/rtXD78EHwwEH +QNeu6a5aktq/vQnGDqWQMsCu4RQG48y1bRssWVJ7AC4u/nQIxFe+Av/+74kA3K1buquWJDWFwVjK +AIWFjjPOJNu3wxtvwAsvwP/+byIAL1mS+HPa1ft76qlwxRWJANy9e7orliSlgsFYygDegJd+H3wA +zz6bWObOTfyZHH88/L//B5ddBgcemHhqgSSp/TIYSxmgqCjxWC61ns2b4fe//zQMf/gh/NM/JYLw +jTfCoEHprlCS1NoMxlIGKCqCP/4x3VW0bzHCu+8mQvAzzySu96GHwrhxcNdd8I//mHhkmCSp4zIY +SxnAoRQto7wcfve7T3uFs7ISQfiCC+Chh6BPn3RXKEnKJAZjKQMYjFNjx47ETXPPPJMIwgsWwDHH +JMLwJZckbpRzAglJUl18jrGUAbZtg549YdOmxNSzarwVK3a/aW7QIDjhhEQYPuYYH5kmSR2VE3xI +bdigQfD664neY9Vt82Z4+eVPe4VXrUo8PWLcOBg7NvFINUmSnOBDasN2DacwGO+u+k1zzz4Lf/gD +HHKIN81JklLPYCxliI48zjhG2LIlMZTkk08Sy4IFn4bhEBJB+LzzYM4cb5qTJLUMg7GUITI5GMcI +lZWJwFo9vDZ3fdfrTZsgOzsxzrpHj8TPYcMSYfi7301MruFNc5KklmYwljJEcTHceiuUlibWY0ws +1V83tJ6Ktjt3Jsbx1gyvWVm7B9ddS13rubmJcdONad+jhzcdSpLSz5vvpAyxfn0iFIfw6QK1v07V +vtraZmVB9+57Btns7Bb9+JIkpZRPpZAkSZJoI0+lCA4UlCRJUgZq9WBsj7EkSZJa2t50xma1QB2S +JElSm2MwliRJkjAYS5IkSYDBWJIkSQIMxpIkSRJgMJYkSZKARgbjEMIJIYT3QgiLQgiX19HmphDC +4hDCvBDCYaktU5IkSWpZDQbjEEIW8AtgHDACmBxCOLBGmxOB4THG/YALgNtaoFapRZWWlqa7BKlW +fjeVqfxuqr1pTI/xKGBxjHFZjHEbMAc4tUabU4F7AWKMrwG5IYT+Ka1UamH+glem8rupTOV3U+1N +Y4JxIfBBtfUVVdvqa1NWSxtJkiQpY3nznSRJkgSEGGP9DUL4PFASYzyhan0GEGOM11RrcxvwYozx +oar194AvxRg/rHGs+k8mSZIkpUiMMTSlfedGtHkD+EwIYQiwEpgETK7R5jHgQuChqiC9oWYo3pvi +JEmSpNbSYDCOMe4IIXwHeI7E0Is7Y4zvhhAuSOyOt8cYnwohnBRCWAJ8Akxr2bIlSZKk1GpwKIUk +SZLUEbTazXeNmSRESocQwv+FEN4OIbwVQng93fWoYwsh3BlC+DCEML/atrwQwnMhhIUhhGdDCLnp +rFEdUx3fzR+EEFaEEP5StZyQzhrVMYUQikIIL4QQFoQQ/jeE8K9V25v8u7NVgnFjJgmR0mgnMDrG +eHiMcVS6i1GHdxeJ35XVzQB+F2M8AHgB+LdWr0qq/bsJ8NMY4xFVyzOtXZQEbAe+G2McARwFXFiV +M5v8u7O1eowbM0mIlC4BH12oDBFjfAUor7H5VOCeqtf3AKe1alESdX43IfE7VEqbGOOqGOO8qtcb +gXeBIvbid2drhYHGTBIipUsEng8hvBFCOC/dxUi16LfrST8xxlVAvzTXI1X3nRDCvBDCfzvMR+kW +QhgKHAa8CvRv6u9Oe8kkODrGeARwEol/fjkm3QVJDfCuaWWKW4B9Y4yHAauAn6a5HnVgIYQc4FHg +oqqe45q/Kxv83dlawbgMGFxtvahqm5R2McaVVT/XAL8hMfRHyiQfhhD6A4QQBgCr01yPBCR+b8ZP +H291BzAynfWo4wohdCYRiu+LMf62anOTf3e2VjBOThISQuhCYpKQx1rp3FKdQgg9qv6GSQihJzAW +eCe9VUkEdh+3+Rjw9arXXwN+W/MNUivZ7btZFTZ2OR1/fyp9fgn8NcZ4Y7VtTf7d2WrPMa56hMuN +fDpJyH+2yomleoQQhpHoJY4kJrx5wO+m0imEMBsYDeQDHwI/AP4HeAQoBpYBE2OMG9JVozqmOr6b +x5EYz7kT+D/ggtpmvpVaUgjhaOD3wP+S+P95BK4AXgcepgm/O53gQ5IkScKb7yRJkiTAYCxJkiQB +BmNJkiQJMBhLkiRJgMFYkiRJAgzGkiRJEmAwliRJkgCDsSRJkgTA/wdTBj79QR260QAAAABJRU5E +rkJggg== +" +> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>**sigh...** After all the effort and computational power, we're still at square one: we have yet to beat out the naive guess threshold. With PCA in play we end up performing terribly, but not terribly enough that we can guess against ourselves.</p> +<p>Let's try one last-ditch attempt using the entire data set:</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[8]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">evaluate_gnb_full</span><span class="p">(</span><span class="n">dims</span><span class="p">):</span> + <span class="n">pca</span> <span class="o">=</span> <span class="n">PCA</span><span class="p">(</span><span class="n">n_components</span><span class="o">=</span><span class="n">dims</span><span class="p">)</span> + <span class="n">X_xform</span> <span class="o">=</span> <span class="n">pca</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span> + + <span class="n">gnb</span> <span class="o">=</span> <span class="n">GaussianNB</span><span class="p">()</span> + <span class="n">gnb</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> + <span class="k">return</span> <span class="n">gnb</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span> + +<span class="n">dim_range</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">21</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Gaussian NB Accuracy&quot;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Inverse Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span> +<span class="n">plt</span><span class="o">.</span><span class="n">legend</span><span class="p">();</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt"></div> + + +<div class="output_png output_subarea "> +<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAsYAAAFwCAYAAAC7E71AAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz +AAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl8VfWd//HXNxDWQAiJbElYpK503KZgrdpiGUH9iVor +CCi2OFXb2hmxdWHsaEOXcURt1dZldKw74tJ26r4UjdW2bq3ISJXNDkgKsgUjsoTl+/vjhmsIWclN +7k3yej4e55F7zvnecz73cI1vvnzP+YYYI5IkSVJHl5XuAiRJkqRMYDCWJEmSMBhLkiRJgMFYkiRJ +AgzGkiRJEmAwliRJkoBGBOMQwp0hhA9DCPPraXNTCGFxCGFeCOGw1JYoSZIktbzG9BjfBYyra2cI +4URgeIxxP+AC4LYU1SZJkiS1mgaDcYzxFaC8nianAvdWtX0NyA0h9E9NeZIkSVLrSMUY40Lgg2rr +ZVXbJEmSpDbDm+8kSZIkoHMKjlEGFFdbL6ratocQQkzB+SRJkqQGxRhDU9o3NhiHqqU2jwEXAg+F +ED4PbIgxflhPgU2pT2o1JSUllJSUpLsMaQ9+N5Wp/G4qk4XQpEwMNCIYhxBmA6OB/BDCcuAHQBcg +xhhvjzE+FUI4KYSwBPgEmNbkKiRJkqQ0azAYxxinNKLNd1JTjiRJkpQe3nwnVRk9enS6S5Bq5XdT +mcrvptqb0JpjfkMI0THGkiRJamkhhBa7+U6SJLUjQ4cOZdmyZekuQ2q2IUOG8H//938pOZY9xpIk +dUBVvWnpLkNqtrq+y3vTY+wYY0mSJAmDsSRJkgQYjCVJkiTAYCxJktSgk046ifvuuy/dZaiFdbib +7371K1i/Pq0l7CaE+pesrIbbNHcRdOsGRx+d7iokqfVk+s13c+bM4YYbbuCdd94hJyeHYcOGcc45 +5/Ctb30r3aWl1D333MO0adOYNWsWl1xySXJ7cXExDzzwAF/84heZOXMmP/nJT+jWrVty349+9CNO +P/30eo9dWlrKl7/8Za655houvfTSFv0c6ZTKm++IMbbaAkQXFxcXFxeXzFgy1XXXXRcHDBgQf/3r +X8eNGzfGGGOcN29ePPvss2NlZWWaq0utu+++O+bn58d99tkn+VljjLGoqCi+9NJLMcYYS0pK4tSp +U5P7nn322di9e/e4evXqeo89bdq0WFBQED/72c+2TPH12L59e6udq4HveJOyaqsPpWhqgS4urbEc +fXTk979Pfx0uLi4urbVkqoqKCn7wgx9w66238pWvfIWePXsCcOihh3LfffeRnZ0NwFNPPcURRxxB +bm4uQ4YMYebMmcljvPTSSxQXF+923GHDhvHCCy8A8MYbbzBy5Ehyc3MZOHBgsqd269atTJ06lYKC +AvLy8jjyyCNZs2YNAMcddxy//OUvAXj//fcZM2YMBQUF9OvXj7PPPpuKiordznX99ddz6KGHkpeX +x+TJk6msrKzzMx900EEcddRRXH/99Y26RmPHjqVXr14sXbq0zjabNm3i0Ucf5eabb2bx4sX85S9/ +2W3/K6+8wtFHH01eXh5Dhgzh3nvvBWDLli1873vfY+jQoeTl5fHFL36RrVu3NnhNZ86cyYQJE5g6 +dSp9+vThnnvu4Y033uALX/gCeXl5FBYW8i//8i9s3749+f4FCxYwduxY8vPzGThwIP/5n//Jhx9+ +SM+ePSkvL0+2+8tf/kK/fv3YsWNHnZ83Vd9xxxhLQFERrFiR7iokSX/605+orKzklFNOqbddTk4O +9913Hx999BFPPvkkt912G4899lhyf6hnrOBFF13E9OnT+eijj1i6dCkTJ04EEsMaKioqKCsrY/36 +9dx222107959j/fHGLniiitYtWoV7777LitWrKCkpGS3No888gjPPfccf/vb33j77be5++6766wn +hMCPfvQjbrjhBjZs2FDv5wZ48skn2bZtGwcffHCdbX71q1/Rq1cvJkyYwNixY7nnnnuS+5YvX85J +J53ERRddxNq1a5k3bx6HHXYYAN/73vd46623ePXVV1m/fj2zZs0iKysrWWd9HnvsMSZOnMiGDRs4 +66yz6Ny5MzfccAPr16/nT3/6Ey+88AK33HILABs3buT444/npJNOYuXKlSxZsoQxY8bQv39/jjvu +OB5++OHkce+//34mT55Mp06dGrw2zWUwljAYS1KmWLt2LQUFBckwBiR7Nnv06MErr7wCwBe/+EVG +jBgBwGc/+1kmTZrESy+91KhzdOnShSVLlrBu3Tp69OjBqFGjAMjOzmbdunUsWrSIEAKHH344OTk5 +e7x/+PDhjBkzhs6dO5Ofn8/FF1+8x7kvuugi+vfvT58+fRg/fjzz5s2rt6ZDDjmE448/nmuuuabW +/Q899BB9+/YlJyeH0047jSuuuILevXvXebx7772XSZMmEUJgypQpzJkzJ9njOnv2bI4//ngmTpxI +p06dyMvL45BDDiHGyF133cVNN93EgAEDCCHw+c9/PtlL35CjjjqK8ePHA9C1a1cOP/xwRo0aRQiB +wYMHc/755yev0xNPPMHAgQOZPn06Xbp0oWfPnowcORKAc845J3mj486dO3nwwQeZOnVqo2poLoOx +hMFYkmqTjhu88/PzWbt2LTt37kxu+8Mf/kB5eTkFBQXJ7a+99hpf/vKX6devH3369OG//uu/WLt2 +baPOceedd7Jw4UIOPPBAjjzySJ588kkApk6dyrhx45g0aRJFRUVcfvnltf7z/erVq5k8eTJFRUX0 +6dOHs88+e49z9+/fP/m6R48ebNy4scG6fvjDH3LrrbeyevXqPfadeeaZrF+/no0bN7J06VLuuece +7rjjjlqPs2LFCl588UWmTJkCwCmnnMLmzZuTn/ODDz5g+PDhe7xv7dq1bN26lX333bfBWmtTc6jF +4sWLGT9+PAMHDqRPnz58//vfT16numoAOPXUU3n33XdZtmwZzz33HH369OFzn/vcXtXUVAZjCYOx +JNUmxuYvTXXUUUfRtWtXfvvb39ZSz6cHPOusszjttNMoKytjw4YNXHDBBcn9PXv2ZNOmTcm2O3bs +SI4VhkSP7+zZs1mzZg2XXXYZZ5xxBps3b6Zz585ceeWVLFiwgD/+8Y888cQTybG31V1xxRVkZWWx +YMECNmzYwP3335+ScdsHHHAAp59+Oj/5yU/qHbYwePBgTjzxRB5//PFa9997773EGJOhdPjw4Wzd +ujU5nKK4uJglS5bs8b6CggK6detW69jlhq4p7DnU4lvf+hYHHXQQS5cuZcOGDfzkJz9JXqfi4uI6 +x0h37dqViRMnct9993H//fe3Wm8xGIwlwGAsSZkiNzeXq666im9/+9v86le/YuPGjcQYmTdv3m7B +bOPGjeTl5ZGdnc3rr7/O7Nmzk/v2339/tmzZwtNPP8327dv58Y9/vNvNbw888ECy5zI3N5cQAllZ +WZSWlvLOO++wc+dOcnJyyM7OrnVc68cff0xOTg69evWirKyMa6+9NmWf/6qrruKuu+7aY6xx9eC9 +YsUKnnnmGT772c/Weox7772XkpIS5s2bx9tvv83bb7/No48+ypNPPkl5eTlnnXUWc+fO5dFHH2XH +jh2sX7+et99+mxAC06ZN47vf/S4rV65k586dvPrqq2zbtq3Ba1qbjz/+mN69e9OjRw/ee+89br31 +1uS+k08+mVWrVnHTTTdRWVnJxo0bef3115P7p06dyt13383jjz9uMJZam8FYkjLHpZdeyk9/+lNm +zZrFgAEDGDBgAN/61reYNWsWX/jCFwC45ZZbuPLKK8nNzeXHP/4xZ555ZvL9vXv35pZbbuGf//mf +KSoqolevXhQVFSX3P/PMM4wYMYLevXtz8cUX89BDD9G1a1dWrVrFGWecQW5uLiNGjOC4447j7LPP +BnbvDf3BD37An//85+T44a9+9au71d/QTWr1GTp0KFOnTuWTTz7ZbfvDDz9M79696d27N0ceeSTH +HnssV1111R7vf+2111i+fDnf/va36devX3IZP348++23Hw8++CDFxcU89dRTXHfddfTt25fDDz+c ++fPnA3DdddfxD//wD4wcOZL8/HxmzJjBzp07G7ymtbnuuut44IEH6N27NxdccAGTJk1K7svJyeH5 +55/nscceY8CAAey///6UlpYm93/hC18gKyuLI444Yo8hGi2pw03wIdVm+3bo0QM++QQaeY+BJLVp +mT7BhzRmzBjOOusszj333HrbpXKCD3uMJaBzZ9hnH1i1Kt2VSJKkN954g7feemu3fwloDQZjqYrD +KSRJSr+vf/3rjB07lhtvvDE5wUtr6dyqZ5MymMFYkqT0q28ylJZmj7FUxWAsSVLHZjCWqhiMJUnq +2AzGUhWDsSRJHZvBWKpiMJYkqWMzGEtVDMaSJHVsBmOpyqBBsHIl7NiR7kokSc1x9dVXc/7556e7 +DLVBBmOpSteukJcHq1enuxJJ6tiGDh1K//792bx5c3LbnXfeyXHHHdeo9//bv/0bt99+e4vU9vzz +z/PlL3+Z3r17s88++3DEEUdw7bXXUllZ2SLnU+syGEvVOJxCktIvhMDOnTu54YYb9tieTo888ggT +Jkzg7LPPZvny5axZs4aHHnqIFStW8MEHH6S1NqWGwViqpqgIysrSXYUk6dJLL+X666+noqKi1v3T +p09n8ODB5ObmMnLkSF555ZXkvpkzZ3LOOecAcNJJJ3HLLbfs9t7DDjuM//mf/wHgvffeY+zYseTn +53PQQQfxyCOP1FnT9773PUpKSjj33HPp06cPAPvttx833ngjw4cPB2DatGlcddVVyfe89NJLFBcX +J9dXrlzJGWecQb9+/Rg+fDg///nPk/veeOMNRo4cSW5uLgMHDuSSSy4BYOvWrUydOpWCggLy8vI4 +8sgjWbNmTcMXUU1mMJaqscdYkjLD5z73OUaPHs21115b6/5Ro0Yxf/58ysvLmTJlChMmTKh1OMPk +yZOZPXt2cv2vf/0ry5cv5+STT2bTpk2MHTuWs88+m7Vr1zJnzhwuvPBC3nvvvT2Os3DhQsrKyjj9 +9NOb/Fl29XTHGBk/fjyHH344K1euZO7cudx44408//zzAFx00UVMnz6djz76iKVLlzJx4kQA7rnn +HioqKigrK2P9+vXcdtttdO/evcl1qGEGY6kag7EkZY6ZM2fyi1/8gnXr1u2xb8qUKfTp04esrCwu +vvhitm7dysKFC/do95WvfIW33347OdRh9uzZnH766XTu3JknnniCYcOGcc455xBC4NBDD+X000+v +tdd47dq1AAwYMCC5bfLkyeTl5dGzZ08eeOCBBj/P66+/ztq1a/n+979Pp06dGDp0KN/4xjeYM2cO +ANnZ2SxZsoR169bRo0cPRo0aldy+bt06Fi1aRAiBww8/nJycnEZcQTWVwViqxmAsSZ8KITR7aY4R +I0Zw8sknc/XVV++x77rrruPggw8mLy+PvLw8KioqkuG1upycHE466aRk+HzwwQc5++yzAVi2bBmv +vvoqffv2pW/fvuTl5TF79mxWrVq1x3Hy8/OBxFCIXR588EHKy8s54ogj2NGIRxotX76csrKy3c53 +9dVXs7rqru9f/vKXLFy4kAMPPJAjjzySJ598EoCpU6cybtw4Jk2aRFFRETNmzGjU+dR0BmOpGoOx +JH0qxtjspblKSkq44447KKt2A8jLL7/Mtddey6OPPkp5eTnl5eX07t27zvPtGk7x6quvsnXrVkaP +Hg1AcXExo0ePZv369axfv57y8nIqKiq4+eab9zjGAQccQGFhIb/+9a/rrbdnz55s2rQpuV49SBcX +F7Pvvvvudr6PPvqIxx9/HIDhw4cze/Zs1qxZw2WXXcYZZ5zB5s2b6dy5M1deeSULFizgj3/8I48/ +/jj33ntvo6+hGs9gLFVjMJakzDJ8+HDOPPNMbrrppuS2jRs3kp2dTX5+PpWVlfzwhz/k448/rvMY +J510EsuWLeOqq67izDPPTG4/+eSTWbRoEffffz/bt29n27ZtvPnmm7WOMQ4hcN111zFz5kzuvPNO +NmzYAMDixYv58MMPk+0OO+wwnnrqKcrLy1m1ahU33nhjct+oUaPo1asXs2bNYsuWLezYsYMFCxbw +5ptvAvDAAw8ke71zc3MJIZCVlUVpaSnvvPMOO3fuJCcnh+zsbLKyjHAtwasqVVNYmHgqRQo6OSRJ +e6nmEIyrrrqKTZs2JbePGzeOcePGsf/++zNs2DB69Oix25MfaurSpQunn346c+fOZcqUKcntOTk5 +PPfcc8yZM4dBgwYxaNAgZsyYUecziSdOnMjDDz/Mfffdx+DBg9lnn32YNGkS3/zmN5kwYQKQGPZw +yCGHMHToUE444QQmTZqUfH9WVhZPPPEE8+bNY9iwYfTr14/zzjsv+eSNZ555hhEjRtC7d28uvvhi +HnroIbp27cqqVas444wzyM3NZcSIERx33HFMnTp17y6u6hVS8c8cjT5ZCLE1zyftjfx8WLgQCgrS +XYkktZwQQkqGOkjpVtd3uWp7kwa622Ms1eBwCkmSOiaDsVSDwViSpI7JYCzVUFhoMJYkqSMyGEs1 +2GMsSVLHZDCWajAYS5LUMRmMpRoMxpIkdUwGY6kGg7EkSR2TwViqYVcw9vGekiR1LAZjqYbevSEr +Cz76KN2VSJI6oldeeYWDDjoo3WV0SAZjqRYOp5Ck9Bk2bBgvvPBCusvYa1//+tfJysrizTffTG5b +unQpWVmNi13HHHMM7777bovUtmTJEiZPnky/fv3o06cPBxxwABdddBF///vfW+R8bY3BWKqFwViS +OpYdO3ak7FghBPLz8/n3f//3Pban05IlSzjyyCMpKipi3rx5bNiwgT/84Q8MHz6cV155Ja21ZQqD +sVQLg7EkZYZ77rmHY489lksvvZS+ffsyfPhwnn32WQAefvhhRo4cuVv7n/3sZ5x22mkAVFZWcskl +lzBkyBAGDhzIt7/9bbZu3QrASy+9RHFxMbNmzWLgwIGce+65rFu3jvHjx5OXl0d+fj5f+tKXksdd +uXIlZ5xxBv369WP48OH8/Oc/r7fur33ta8yfP5+XX3651v133303Bx98ML179+Yzn/kMt99+e3Lf +rtoAZs2axYQJE3Z770UXXcT06dMBqKio4Bvf+AaDBg2iuLiYK6+8kljHTTIzZ87kmGOO4dprr2XQ +oEEAFBQU8K//+q9MnDhxt+tdXVZWFu+//36D17S+63fNNddQVFRE7969Oeigg3jxxRfrvX7pYjCW +amEwlqTM8frrr3PQQQexbt06Lr30Us4991wAxo8fz6JFi1i6dGmy7YMPPshZZ50FwOWXX86SJUuY +P38+S5YsoaysjB/+8IfJtqtWrWLDhg0sX76c22+/neuvv57i4mLWrVvH6tWr+Y//+A8AYoyMHz+e +ww8/nJUrVzJ37lxuvPFGnn/++Tpr7tGjB1dccQVXXHFFrfv79+/PU089RUVFBXfddRcXX3wx8+bN +S+7f1bs8adIknn76aT755BMAdu7cySOPPJL8jF/72tfo0qUL77//Pm+99RbPP/88//3f/13rOX/3 +u9/x1a9+tf6LzZ4929XX67umdV2/RYsWcfPNN/PnP/+ZiooKnn32WYYOHdpgHelgMJZqYTCWpMwx +ZMgQzj33XEIIfO1rX2PlypWsXr2a7t27c8opp/Dggw8CsHjxYhYuXMgpp5wCwB133MHPfvYzcnNz +6dmzJzNmzEi2BejUqRMzZ84kOzubrl27kp2dzcqVK/nb3/5Gp06dOProowF44403WLt2Ld///vfp +1KkTQ4cO5Rvf+AZz5sypt+7zzz+f5cuXJ3u4qzvxxBOT4fDYY49l7NixtfYuDx48mCOOOILf/OY3 +AMydO5eePXsycuRIPvzwQ55++ml+9rOf0a1bNwoKCpg+ffpun7G6tWvXMmDAgOT6zTffTF5eHr16 +9eKCCy6o83NU74Gu75rWdf06depEZWUl77zzDtu3b2fw4MEMGzas3muXLgZjqRZFRVBWlu4qJCm9 +QgjNXlKhepjr3r07ABs3bgRgypQpyWA2e/ZsTjvtNLp27cqaNWvYtGkT//iP/0jfvn3p27cvJ554 +IuvWrUsea5999iE7Ozu5ftlllzF8+HDGjh3LZz7zGa655hoAli1bRllZWfI4eXl5XH311axevbre +urt06cKVV17JlVdeuce+p59+mqOOOor8/Hzy8vJ4+umnWbt2ba3HmTx5cvIzPvjgg0yZMgWA5cuX +s23bNgYOHJis65vf/Gadx8nPz2flypXJ9QsvvJDy8nKmT5/Otm3b6v0sQIPX9NJLL631+g0fPpwb +briBkpIS+vfvz5QpU3arI5MYjKVa2GMsSYmewuYuLe34449nzZo1vP3228yZMycZGgsKCujRowcL +Fixg/fr1rF+/ng0bNvBRtWdx1gzuPXv25LrrrmPp0qU89thj/PSnP+XFF1+kuLiYfffdN3mc8vJy +PvroIx5//PEG65s2bRobNmzg17/+dXJbZWUlZ5xxBpdddhlr1qyhvLycE088sc7rNWHCBEpLSykr +K+M3v/lN8jMWFxfTrVs31q1bl6xrw4YNzJ8/v9bjjBkzZrc6atOzZ082bdqUXF+1alXydUPXNCcn +p9brB4khIS+//DLLli0DYMaMGQ1durQwGEu1MBhLUtvQuXNnJkyYwKWXXkp5eTnHH388kAi95513 +HtOnT2fNmjUAlJWV8dxzz9V5rCeffDI5XrlXr1507tyZrKwsRo0aRa9evZg1axZbtmxhx44dLFiw +YLfHsdWlU6dOlJSUJHtPIRGMKysrKSgoICsri6effrreugoKCvjSl77EtGnT2HfffTnggAOARE/6 +2LFjufjii/n444+JMfL+++/z+9//vtbjlJSU8PLLL3PJJZckH8+2du3a3R4Nd+ihh7JgwQLmz5/P +1q1bmTlzZvIvEA1d07qu36JFi3jxxReprKykS5cudO/evdGPrmttmVmVlGZ5ebB1K1T9S50kqRU1 +NASj5v7Jkyczd+5cJk6cuFvguuaaa/jMZz7D5z//efr06cPYsWNZtGhRncddvHgx//RP/0SvXr04 ++uijufDCC/nSl75EVlYWTzzxBPPmzWPYsGH069eP8847j4qKikbXN3DgwOT2nJwcbrrpJiZMmEDf +vn2ZM2cOp556ar2fecqUKcydOzd5090u9957L5WVlRx88MH07duXCRMm7NbLW91+++3Ha6+9xgcf +fMChhx5Kbm4uxx57LIWFhfzoRz9KtrnqqqsYM2YM+++//x5PqKjvmtZ1/bZu3cqMGTPYZ599GDRo +EGvWrOHqq6+u9/OmS2iNf+ZIniyE2Jrnk5pj//3h8ceh6i/mktSuhBBaZaiD1NLq+i5XbW/SQHd7 +jKU6OJxCkqSOxWAs1cFgLElSx9KoYBxCOCGE8F4IYVEI4fJa9vcOITwWQpgXQvjfEMLXU16p1MoM +xpIkdSwNBuMQQhbwC2AcMAKYHEI4sEazC4EFMcbDgOOA60MInVNdrNSaDMaSJHUsjekxHgUsjjEu +izFuA+YANW+djECvqte9gHUxxu2pK1NqfQZjSZI6lsYE40Lgg2rrK6q2VfcL4OAQwt+Bt4GLUlOe +lD6FhQZjSZI6klTdfDcOeCvGOAg4HLg5hJCTomNLaWGPsSRJHUtjxgGXAYOrrRdVbatuGnA1QIxx +aQjhb8CBwB5TwpSUlCRfjx49mtGjRzepYKm17LMPVFTAli3QrVu6q5Gk1BoyZEiDE2lIbcGQIUMA +KC0tpbS0tFnHanCCjxBCJ2AhMAZYCbwOTI4xvlutzc3A6hjjzBBCfxKB+NAY4/oax3KCD7Upw4bB +734Hw4enuxJJktQULTLBR4xxB/Ad4DlgATAnxvhuCOGCEML5Vc1+DHwhhDAfeB64rGYoltoih1NI +ktRxNOqRajHGZ4ADamz7r2qvV5IYZyy1KwZjSZI6Dme+k+phMJYkqeMwGEv1MBhLktRxGIylehiM +JUnqOAzGUj0MxpIkdRwGY6keBmNJkjqOBp9jnNKT+RxjtTE7dkD37vDJJ5Cdne5qJElSY7XIc4yl +jqxTJ+jfH1auTHclkiSppRmMpQY4nEKSpI7BYCw1wGAsSVLHYDCWGmAwliSpYzAYSw0wGEuS1DEY +jKUGGIwlSeoYDMZSAwzGkiR1DAZjqQEGY0mSOgYn+JAaUFkJOTmweXPiucaSJCnzOcGH1AK6dIH8 +fPjww3RXIkmSWpLBWGqEwkKHU0iS1N4ZjKVGcJyxJEntn8FYagSDsSRJ7Z/BWGoEg7EkSe2fwVhq +BIOxJEntn8FYagSDsSRJ7Z/BWGoEg7EkSe2fE3xIjbB5M/Tpk/iZ5V8nJUnKeE7wIbWQ7t2hVy9Y +uzbdlUiSpJZiMJYayeEUkiS1bwZjqZEMxpIktW8GY6mRDMaSJLVvBmOpkYqKoKws3VVIkqSWYjCW +GskeY0mS2jeDsdRIBmNJkto3g7HUSAZjSZLaN4Ox1EiFhYlg7Bw1kiS1TwZjqZF69YLsbNiwId2V +SJKklmAwlprA4RSSJLVfBmOpCQzGkiS1XwZjqQkMxpIktV8GY6kJDMaSJLVfBmOpCXY9mUKSJLU/ +BmOpCewxliSp/TIYS01gMJYkqf0yGEtNYDCWJKn9MhhLTdCnD2zfDhUV6a5EkiSlmsFYaoIQEr3G +ZWXprkSSJKWawVhqIodTSJLUPhmMpSYyGEuS1D4ZjKUmMhhLktQ+GYylJjIYS5LUPhmMpSYyGEuS +1D4ZjKUmMhhLktQ+GYylJvJxbZIktU8GY6mJCgpg40bYvDndlUiSpFQyGEtNFAIUFtprLElSe2Mw +lvaC44wlSWp/DMbSXjAYS5LU/hiMpb1gMJYkqf0xGEt7wWAsSVL7YzCW9oLBWJKk9qdRwTiEcEII +4b0QwqIQwuV1tBkdQngrhPBOCOHF1JYpZRaDsSRJ7U/nhhqEELKAXwBjgL8Db4QQfhtjfK9am1zg +ZmBsjLEshFDQUgVLmcBgLElS+9OYHuNRwOIY47IY4zZgDnBqjTZTgF/FGMsAYoxrU1umlFn69YP1 +66GyMt3G3FU+AAATCUlEQVSVSJKkVGlMMC4EPqi2vqJqW3X7A31DCC+GEN4IIUxNVYFSJurUCQYM +gL//Pd2VSJKkVGlwKEUTjnME8GWgJ/CnEMKfYoxLUnR8KePsGk4xdGi6K5EkSanQmGBcBgyutl5U +ta26FcDaGOMWYEsI4ffAocAewbikpCT5evTo0YwePbppFUsZwnHGkiRljtLSUkpLS5t1jBBjrL9B +CJ2AhSRuvlsJvA5MjjG+W63NgcDPgROArsBrwJkxxr/WOFZs6HxSW/Hd78KgQXDJJemuRJIk1RRC +IMYYmvKeBnuMY4w7QgjfAZ4jMSb5zhjjuyGECxK74+0xxvdCCM8C84EdwO01Q7HU3hQVwfLl6a5C +kiSlSoM9xik9mT3GakcefjixPPpouiuRJEk17U2PsTPfSXvJMcaSJLUvBmNpLxmMJUlqXxxKIe2l +bdugZ0/YtAk6p+rBh5IkKSUcSiG1ouxsKCiAVavSXYkkSUoFg7HUDA6nkCSp/TAYS81QVARlNae7 +kSRJbZLBWGoGe4wlSWo/DMZSMxiMJUlqPwzGUjMYjCVJaj8MxlIzGIwlSWo/DMZSMxiMJUlqP5zg +Q2qGLVsgNxc2b4Ys/5opSVLGcIIPqZV165YIxmvWpLsSSZLUXAZjqZkcTiFJUvtgMJaayWAsSVL7 +YDCWmqmw0GAsSVJ7YDCWmskeY0mS2geDsdRMBmNJktoHg7HUTAZjSZLaB4Ox1EwGY0mS2gcn+JCa +aeNG2Gcf2LQJQpMeIy5JklqKE3xIaZCTk5joY/36dFciSZKaw2AspYDDKSRJavsMxlIKGIwlSWr7 +DMZSChiMJUlq+wzGUgoYjCVJavsMxlIKGIwlSWr7DMZSChiMJUlq+wzGUgoUFUFZWbqrkCRJzWEw +llLAHmNJkto+g7GUAr17Q4xQUZHuSiRJ0t4yGEspEIK9xpIktXUGYylFDMaSJLVtBmMpRQzGkiS1 +bQZjKUUMxpIktW0GYylFDMaSJLVtBmMpRQzGkiS1bQZjKUUKCw3GkiS1ZQZjKUXsMZYkqW0zGEsp +kp8PmzbBJ5+kuxJJkrQ3DMZSiuya5KOsLN2VSJKkvWEwllLI4RSSJLVdBmMphQzGkiS1XQZjKYUM +xpIktV0GYymFDMaSJLVdBmMphQzGkiS1XQZjKYUMxpIktV0GYymFDMaSJLVdIcbYeicLIbbm+aTW +tnMndOsGFRWJn5IkKT1CCMQYQ1PeY4+xlEJZWTBoEPz97+muRJIkNZXBWEoxZ7+TJKltMhhLKeY4 +Y0mS2iaDsZRiBmNJktomg7GUYgZjSZLaJoOxlGIGY0mS2iaDsZRiBmNJktomg7GUYgZjSZLaJif4 +kFJs+3bo0QM++QSys9NdjSRJHVOLTfARQjghhPBeCGFRCOHyetqNDCFsCyGc3pQipPakc2fo1w9W +rUp3JZIkqSkaDMYhhCzgF8A4YAQwOYRwYB3t/hN4NtVFSm2NwykkSWp7GtNjPApYHGNcFmPcBswB +Tq2l3b8AjwKrU1if1CYVFhqMJUlqaxoTjAuBD6qtr6jalhRCGAScFmO8FWjSWA6pPbLHWJKktidV +T6W4Aag+9thwrA7NYCxJUtvTuRFtyoDB1daLqrZV9zlgTgghAAXAiSGEbTHGx2oerKSkJPl69OjR +jB49uoklS5mvqAjefDPdVUiS1HGUlpZSWlrarGM0+Li2EEInYCEwBlgJvA5MjjG+W0f7u4DHY4y/ +rmWfj2tTh/DyyzBjBvzhD+muRJKkjmlvHtfWYI9xjHFHCOE7wHMkhl7cGWN8N4RwQWJ3vL3mW5pS +gNQeOZRCkqS2xwk+pBawdSv06gWbN0OnTumuRpKkjqfFJviQ1DRdu0JeHqz24YWSJLUZBmOphTic +QpKktsVgLLUQg7EkSW2LwVhqIQZjSZLaFoOx1EIMxpIktS0GY6mFFBVBWc2pcCRJUsYyGEstxB5j +SZLaFoOx1EIMxpIktS1O8CG1kE2bID8/8TM06fHikiSpuZzgQ8ogPXoklnXr0l2JJElqDIOx1IIc +TiFJUtthMJZakMFYkqS2w2AstSCDsSRJbYfBWGpBBmNJktoOg7HUggzGkiS1HQZjqQUVFhqMJUlq +KwzGUguyx1iSpLbDYCy1oF3B2HltJEnKfAZjqQX17g1ZWfDRR+muRJIkNcRgLLUwh1NIktQ2GIyl +FmYwliSpbTAYSy3MYCxJUttgMJZamMFYkqS2wWAstTCDsSRJbYPBWGphBmNJktoGg7HUwgzGkiS1 +DQZjqYUZjCVJahsMxlILy8uDykrYuDHdlUiSpPoYjKUWFkKi17isLN2VSJKk+hiMpVbgcApJkjKf +wVhqBQZjSZIyn8FYagUGY0mSMp/BWGoFBmNJkjKfwVhqBQZjSZIyn8FYagUGY0mSMp/BWGoFBmNJ +kjKfwVhqBQUF8PHHsGVLuiuRJEl1MRhLrSArCwYOdJIPSZIymcFYaiUOp5AkKbMZjKVWYjCWJCmz +GYylVmIwliQpsxmMpVZiMJYkKbMZjKVWYjCWJCmzGYylVmIwliQpsxmMpVZiMJYkKbOFGGPrnSyE +2JrnkzLJjh3QvTts3AhduqS7GkmS2rcQAjHG0JT32GMstZJOnaB/f1i5Mt2VSJKk2hiMpVbkcApJ +kjKXwVhqRQZjSZIyl8FYakVFRVBWlu4qJElSbQzGUiuyx1iSpMxlMJZakcFYkqTMZTCWWpHBWJKk +zGUwllqRwViSpMzlBB9SK6qshJwc2Lw58VxjSZLUMpzgQ8pwXbpAfj58+GG6K5EkSTUZjKVW5nAK +SZIyk8FYamUGY0mSMlOjgnEI4YQQwnshhEUhhMtr2T8lhPB21fJKCOEfUl+q1D4YjCVJykwNBuMQ +QhbwC2AcMAKYHEI4sEaz94EvxhgPBX4M3JHqQqX2orDQYCxJUiZqTI/xKGBxjHFZjHEbMAc4tXqD +GOOrMcaPqlZfBQpTW6bUfthjLElSZmpMMC4EPqi2voL6g+83gKebU5TUnhmMJUnKTJ1TebAQwnHA +NOCYVB5Xak8MxpIkZabGBOMyYHC19aKqbbsJIRwC3A6cEGMsr+tgJSUlydejR49m9OjRjSxVah8K +C6GsDHbuhCyfCyNJUkqUlpZSWlrarGM0OPNdCKETsBAYA6wEXgcmxxjfrdZmMDAXmBpjfLWeYznz +nQQUFMBf/wr9+qW7EkmS2qe9mfmuwR7jGOOOEMJ3gOdIjEm+M8b4bgjhgsTueDtwJdAXuCWEEIBt +McZRTf8IUsewaziFwViSpMzRYI9xSk9mj7EEwMknw/nnwymnpLsSSZLap73pMXaEo5QG3oAnSVLm +MRhLaWAwliQp8xiMpTQwGEuSlHkMxlIaGIwlSco8BmMpDYqKEs8yliRJmcNgLKVBYWGix9iHtEiS +lDkMxlIa9OoF2dmwYUO6K5EkSbsYjKU0cZyxJEmZxWAspYnBWJKkzGIwltLEYCxJUmYxGEtpYjCW +JCmzGIylNDEYS5KUWQzGUpoYjCVJyiwGYylNDMaSJGUWg7GUJrsm+ZAkSZnBYCylSZ8+sH07VFSk +uxJJkgQGYyltQkgMpygrS3clkiQJDMZSWjnOWJKkzGEwltLIYCxJUuYwGEtpZDCWJClzGIylNDIY +S5KUOQzGUhoZjCVJyhwGYymNDMaSJGUOg7GURgZjSZIyh8FYSqOCAvjkE9i0Kd2VSJIkg7GURiEk +poZ2kg9JktLPYCylmbPfSZKUGQzGUpo5zliSpMxgMJbSzGAsSVJmMBhLaWYwliQpM3ROdwFSR1dU +BHPnprsKZaqtW2HDBujUac8lK+vTnyGku9LmixF27EgsO3d++rrmsm1bYtm+/dPXNdcb83pv3rN9 +e2KJ8dOaq9ef7tfpOB8kvn+7voO7XqdiW2PbVq+r+s/atjVm3968vyF1/Tda33+7e7Mv04+X6QzG +UprZYyxIBOCFC2HBgsTy178mfi5fDr167R4Ma4bGGHcPyXUF6L3ZVn17Y0JrXfsas73656irns6d +E0t2dmKp63V9++p63aNH49rtqm2X6gEgE1639vlqhsTqS2O3Nff91cNyzZ97u29v3l+XusJzfaF6 +b/Zl+vFa23PPNf09BmMpzQzGHUtlZSIA7wq+u5Zly2DYMDj4YBgxAiZNSvzcbz/o0qX+Y+7qaa0t +fDZnW/XtO3c2HFobG7Dbe8+3pLYrxFaM9SGE2Jrnk9qCHTuge3fYuLHhAKS2o7ISFi/ePfwuWAB/ ++xsMHZoIvdWX/ff3z1+SUimEQIyxSX/dNhhLGWDIEHjppURgUtuybVvtAfj99xN/rtXD78EHwwEH +QNeu6a5aktq/vQnGDqWQMsCu4RQG48y1bRssWVJ7AC4u/nQIxFe+Av/+74kA3K1buquWJDWFwVjK +AIWFjjPOJNu3wxtvwAsvwP/+byIAL1mS+HPa1ft76qlwxRWJANy9e7orliSlgsFYygDegJd+H3wA +zz6bWObOTfyZHH88/L//B5ddBgcemHhqgSSp/TIYSxmgqCjxWC61ns2b4fe//zQMf/gh/NM/JYLw +jTfCoEHprlCS1NoMxlIGKCqCP/4x3VW0bzHCu+8mQvAzzySu96GHwrhxcNdd8I//mHhkmCSp4zIY +SxnAoRQto7wcfve7T3uFs7ISQfiCC+Chh6BPn3RXKEnKJAZjKQMYjFNjx47ETXPPPJMIwgsWwDHH +JMLwJZckbpRzAglJUl18jrGUAbZtg549YdOmxNSzarwVK3a/aW7QIDjhhEQYPuYYH5kmSR2VE3xI +bdigQfD664neY9Vt82Z4+eVPe4VXrUo8PWLcOBg7NvFINUmSnOBDasN2DacwGO+u+k1zzz4Lf/gD +HHKIN81JklLPYCxliI48zjhG2LIlMZTkk08Sy4IFn4bhEBJB+LzzYM4cb5qTJLUMg7GUITI5GMcI +lZWJwFo9vDZ3fdfrTZsgOzsxzrpHj8TPYcMSYfi7301MruFNc5KklmYwljJEcTHceiuUlibWY0ws +1V83tJ6Ktjt3Jsbx1gyvWVm7B9ddS13rubmJcdONad+jhzcdSpLSz5vvpAyxfn0iFIfw6QK1v07V +vtraZmVB9+57Btns7Bb9+JIkpZRPpZAkSZJoI0+lCA4UlCRJUgZq9WBsj7EkSZJa2t50xma1QB2S +JElSm2MwliRJkjAYS5IkSYDBWJIkSQIMxpIkSRJgMJYkSZKARgbjEMIJIYT3QgiLQgiX19HmphDC +4hDCvBDCYaktU5IkSWpZDQbjEEIW8AtgHDACmBxCOLBGmxOB4THG/YALgNtaoFapRZWWlqa7BKlW +fjeVqfxuqr1pTI/xKGBxjHFZjHEbMAc4tUabU4F7AWKMrwG5IYT+Ka1UamH+glem8rupTOV3U+1N +Y4JxIfBBtfUVVdvqa1NWSxtJkiQpY3nznSRJkgSEGGP9DUL4PFASYzyhan0GEGOM11RrcxvwYozx +oar194AvxRg/rHGs+k8mSZIkpUiMMTSlfedGtHkD+EwIYQiwEpgETK7R5jHgQuChqiC9oWYo3pvi +JEmSpNbSYDCOMe4IIXwHeI7E0Is7Y4zvhhAuSOyOt8cYnwohnBRCWAJ8Akxr2bIlSZKk1GpwKIUk +SZLUEbTazXeNmSRESocQwv+FEN4OIbwVQng93fWoYwsh3BlC+DCEML/atrwQwnMhhIUhhGdDCLnp +rFEdUx3fzR+EEFaEEP5StZyQzhrVMYUQikIIL4QQFoQQ/jeE8K9V25v8u7NVgnFjJgmR0mgnMDrG +eHiMcVS6i1GHdxeJ35XVzQB+F2M8AHgB+LdWr0qq/bsJ8NMY4xFVyzOtXZQEbAe+G2McARwFXFiV +M5v8u7O1eowbM0mIlC4BH12oDBFjfAUor7H5VOCeqtf3AKe1alESdX43IfE7VEqbGOOqGOO8qtcb +gXeBIvbid2drhYHGTBIipUsEng8hvBFCOC/dxUi16LfrST8xxlVAvzTXI1X3nRDCvBDCfzvMR+kW +QhgKHAa8CvRv6u9Oe8kkODrGeARwEol/fjkm3QVJDfCuaWWKW4B9Y4yHAauAn6a5HnVgIYQc4FHg +oqqe45q/Kxv83dlawbgMGFxtvahqm5R2McaVVT/XAL8hMfRHyiQfhhD6A4QQBgCr01yPBCR+b8ZP +H291BzAynfWo4wohdCYRiu+LMf62anOTf3e2VjBOThISQuhCYpKQx1rp3FKdQgg9qv6GSQihJzAW +eCe9VUkEdh+3+Rjw9arXXwN+W/MNUivZ7btZFTZ2OR1/fyp9fgn8NcZ4Y7VtTf7d2WrPMa56hMuN +fDpJyH+2yomleoQQhpHoJY4kJrx5wO+m0imEMBsYDeQDHwI/AP4HeAQoBpYBE2OMG9JVozqmOr6b +x5EYz7kT+D/ggtpmvpVaUgjhaOD3wP+S+P95BK4AXgcepgm/O53gQ5IkScKb7yRJkiTAYCxJkiQB +BmNJkiQJMBhLkiRJgMFYkiRJAgzGkiRJEmAwliRJkgCDsSRJkgTA/wdTBj79QR260QAAAABJRU5E +rkJggg== +" +> +</div> + +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<p>Nothing. It is interesting to note that the graphs are almost exactly the same: This would imply again that the variables we removed earlier (all the binary classifiers) indeed have almost no predictive power. It seems this problem is high-dimensional, but with almost no data that can actually inform our decisions.</p> +<h1 id="Summary-for-Day-1">Summary for Day 1<a class="anchor-link" href="#Summary-for-Day-1">&#182;</a></h1><p>After spending a couple hours with this dataset, there seems to be a fundamental issue in play: We have very high-dimensional data, and it has no bearing on our ability to actually predict customer satisfaction. This can be a huge issue: it implies that <strong>no matter what model we use, we fundamentally can't perform well.</strong> I'm sure most of this is because I'm not an experienced data scientist. Even so, we have yet to develop a strategy that can actually beat out the village idiot; <strong>so far, the bank is best off just assuming all its customers are satisfied.</strong> Hopefully more to come soon.</p> + +</div> +</div> +</div> +<div class="cell border-box-sizing code_cell rendered"> +<div class="input"> +<div class="prompt input_prompt">In&nbsp;[9]:</div> +<div class="inner_cell"> + <div class="input_area"> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">end</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> +<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Running time: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">end</span> <span class="o">-</span> <span class="n">start</span><span class="p">))</span> +</pre></div> + +</div> +</div> +</div> + +<div class="output_wrapper"> +<div class="output"> + + +<div class="output_area"><div class="prompt"></div> +<div class="output_subarea output_stream output_stdout output_text"> +<pre>Running time: 0:00:58.715714 +</pre> +</div> +</div> + +</div> +</div> + +</div> +<div class="cell border-box-sizing text_cell rendered"> +<div class="prompt input_prompt"> +</div> +<div class="inner_cell"> +<div class="text_cell_render border-box-sizing rendered_html"> +<h1 id="Appendix">Appendix<a class="anchor-link" href="#Appendix">&#182;</a></h1><p>Code used to split the initial training data:</p> +<div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">sklearn.cross_validation</span> <span class="kn">import</span> <span class="n">train_test_split</span> +<span class="n">data</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">&#39;train.csv&#39;</span><span class="p">)</span> +<span class="n">data</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">ID</span> + +<span class="n">data_train</span><span class="p">,</span> <span class="n">data_validate</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span> + <span class="n">data</span><span class="p">,</span> <span class="n">train_size</span><span class="o">=.</span><span class="mi">7</span><span class="p">)</span> + +<span class="n">data_train</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;split_train.csv&#39;</span><span class="p">)</span> +<span class="n">data_validate</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;split_validate.csv&#39;</span><span class="p">)</span> +</pre></div> + +</div> +</div> +</div></p> +<script type="text/x-mathjax-config"> +# MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); +MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$']]}}); +</script> + +<script async src='https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_CHTML'></script>Profitability using the Investment Formula2016-02-26T00:00:00-05:00Bradlee Speicetag:bspeice.github.io,2016-02-26:profitability-using-the-investment-formula.html<p> <div class="cell border-box-sizing text_cell rendered"> <div class="prompt input_prompt"> </div> @@ -31,7 +953,7 @@ <div class="prompt input_prompt">In&nbsp;[19]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> <span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> <span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span> <span class="kn">from</span> <span class="nn">Quandl</span> <span class="k">import</span> <span class="n">get</span> <span class="k">as</span> <span class="n">qget</span> @@ -130,7 +1052,7 @@ <div class="prompt input_prompt">In&nbsp;[7]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">fang_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s2">&quot;YAHOO/FB&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/AAPL&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/NFLX&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/GOOG&quot;</span><span class="p">])</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">fang_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s2">&quot;YAHOO/FB&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/AAPL&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/NFLX&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/GOOG&quot;</span><span class="p">])</span> </pre></div> </div> @@ -143,7 +1065,7 @@ <div class="prompt input_prompt">In&nbsp;[8]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">fang_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">fang_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">8</span><span class="p">);</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Distribution of Days Until Profitability&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -617,7 +1539,7 @@ kiSpNScYJEmSJElSa04wSJIkSZKk1pxgkCRJkiRJrf1/uiw9D8O0d6EAAAAASUVORK5CYII= <div class="prompt input_prompt">In&nbsp;[10]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">fang_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">fang_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Profitability score over time&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -2256,7 +3178,7 @@ EZIkSZIkSZIkSZIk6RIpIiRJkiRJkiRJkiRJ0iX+D91jLOlExGw8AAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[13]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">cyclic_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s2">&quot;YAHOO/X&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/CAT&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/NFLX&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/GOOG&quot;</span><span class="p">])</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cyclic_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s2">&quot;YAHOO/X&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/CAT&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/NFLX&quot;</span><span class="p">,</span> <span class="s2">&quot;YAHOO/GOOG&quot;</span><span class="p">])</span> </pre></div> </div> @@ -2269,7 +3191,7 @@ EZIkSZIkSZIkSZIk6RIpIiRJkiRJkiRJkiRJ0iX+D91jLOlExGw8AAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[14]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">cyclic_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cyclic_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">8</span><span class="p">);</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Distribution of Days Until Profitability&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -2757,7 +3679,7 @@ JEmSJEnSwGwwSJIkSZKkgdlgkCRJkiRJA7PBIEmSJEmSBvb/AfKAew4+oiojAAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[15]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">cyclic_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cyclic_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Profitability score over time&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -4542,7 +5464,7 @@ hUKhUCgUpor/A9AnO0PoIItTAAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[21]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">biotech_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s1">&#39;YAHOO/REGN&#39;</span><span class="p">,</span> <span class="s1">&#39;YAHOO/CELG&#39;</span><span class="p">,</span> <span class="s1">&#39;GOOG/NASDAQ_BIB&#39;</span><span class="p">,</span> <span class="s1">&#39;GOOG/NASDAQ_IBB&#39;</span><span class="p">])</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">biotech_df</span> <span class="o">=</span> <span class="n">simulate_tickers</span><span class="p">([</span><span class="s1">&#39;YAHOO/REGN&#39;</span><span class="p">,</span> <span class="s1">&#39;YAHOO/CELG&#39;</span><span class="p">,</span> <span class="s1">&#39;GOOG/NASDAQ_BIB&#39;</span><span class="p">,</span> <span class="s1">&#39;GOOG/NASDAQ_IBB&#39;</span><span class="p">])</span> </pre></div> </div> @@ -4555,7 +5477,7 @@ hUKhUCgUpor/A9AnO0PoIItTAAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[22]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">biotech_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">biotech_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;days&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">hist</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">8</span><span class="p">);</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Distribution of Days Until Profitability&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -5108,7 +6030,7 @@ WKBTQVcCAAAAAElFTkSuQmCC <div class="prompt input_prompt">In&nbsp;[23]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">biotech_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">biotech_df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">plot</span><span class="p">()</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">18</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span> <span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">suptitle</span><span class="p">(</span><span class="s2">&quot;Profitability score over time&quot;</span><span class="p">,</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">18</span><span class="p">);</span> </pre></div> @@ -6879,7 +7801,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}}); <div class="inner_cell"> <div class="text_cell_render border-box-sizing rendered_html"> <h3 id="If-you-can-see-into-the-future,-that-is.">If you can see into the future, that is.<a class="anchor-link" href="#If-you-can-see-into-the-future,-that-is.">&#182;</a></h3><p>My previous class in Stochastic Calculus covered a lot of interesting topics, and the important one for today -is the <a href="https://en.wikipedia.org/wiki/Gambler's_ruin">Gambler's Ruin</a> problem. If you're interested in some of the theory behind it, also make sure to check out +is the <a href="https://en.wikipedia.org/wiki/Gambler&#39;s_ruin">Gambler's Ruin</a> problem. If you're interested in some of the theory behind it, also make sure to check out <a href="https://en.wikipedia.org/wiki/Random_walk">random walks</a>. The important bit is that we studied the <a href="https://en.wikipedia.org/wiki/Martingale_%28betting_system%29">Martingale Betting Strategy</a>, which describes for us a <strong>guaranteed way</strong> to <span style='font-size: x-small'>eventually</span> make money.</p> <p>The strategy goes like this: You are going to toss a fair coin with a friend. If you guess heads or tails correctly, you get back double the money you bet. If you guess incorrectly, you lose money. How should you bet?</p> @@ -6939,7 +7861,7 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i}) <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="k">using</span> <span class="n">Quandl</span> +<div class=" highlight hl-julia"><pre><span></span><span class="k">using</span> <span class="n">Quandl</span> <span class="n">api_key</span> <span class="o">=</span> <span class="s">&quot;&quot;</span> <span class="n">daily_investment</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">current_open</span><span class="p">,</span> <span class="n">current_close</span><span class="p">,</span> <span class="n">purchase_history</span><span class="p">,</span> <span class="n">open_history</span><span class="p">)</span> <span class="c"># We&#39;re not going to safeguard against divide by 0 - that&#39;s the user&#39;s responsibility</span> @@ -6969,7 +7891,7 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i}) <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">is_profitable</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">current_price</span><span class="p">,</span> <span class="n">purchase_history</span><span class="p">,</span> <span class="n">open_history</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">is_profitable</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">current_price</span><span class="p">,</span> <span class="n">purchase_history</span><span class="p">,</span> <span class="n">open_history</span><span class="p">)</span> <span class="n">shares</span> <span class="o">=</span> <span class="n">sum</span><span class="p">(</span><span class="n">purchase_history</span> <span class="o">./</span> <span class="n">open_history</span><span class="p">)</span> <span class="k">return</span> <span class="n">current_price</span><span class="o">*</span><span class="n">shares</span> <span class="o">&gt;</span> <span class="n">sum</span><span class="p">(</span><span class="n">purchase_history</span><span class="p">)</span> <span class="k">end</span> @@ -7004,8 +7926,8 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i}) <span class="n">leverages</span> <span class="o">=</span> <span class="p">[</span><span class="n">sum</span><span class="p">(</span><span class="n">investments</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="n">i</span><span class="p">])</span> <span class="k">for</span> <span class="n">i</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">length</span><span class="p">(</span><span class="n">investments</span><span class="p">)]</span> <span class="n">max_leverage</span> <span class="o">=</span> <span class="n">maximum</span><span class="p">(</span><span class="n">leverages</span><span class="p">)</span> <span class="o">/</span> <span class="n">investments</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="n">println</span><span class="p">(</span><span class="s">&quot;Max leverage: </span><span class="si">$(max_leverage)</span><span class="s">&quot;</span><span class="p">)</span> - <span class="n">println</span><span class="p">(</span><span class="s">&quot;Days invested: </span><span class="si">$</span><span class="s">(length(investments))&quot;</span><span class="p">)</span> - <span class="n">println</span><span class="p">(</span><span class="s">&quot;Profit: </span><span class="si">$</span><span class="s">profit&quot;</span><span class="p">)</span> + <span class="n">println</span><span class="p">(</span><span class="s">&quot;Days invested: </span><span class="si">$(length(investments))</span><span class="s">&quot;</span><span class="p">)</span> + <span class="n">println</span><span class="p">(</span><span class="s">&quot;Profit: </span><span class="si">$profit</span><span class="s">&quot;</span><span class="p">)</span> <span class="k">end</span><span class="p">;</span> </pre></div> @@ -7032,7 +7954,7 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i}) <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span> <span class="o">=</span> <span class="n">simulate</span><span class="p">(</span><span class="s">&quot;YAHOO/LMT&quot;</span><span class="p">,</span> <span class="n">Date</span><span class="p">(</span><span class="mi">2015</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">29</span><span class="p">),</span> <span class="mi">100</span><span class="p">,</span> <span class="mf">1.01</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span> <span class="o">=</span> <span class="n">simulate</span><span class="p">(</span><span class="s">&quot;YAHOO/LMT&quot;</span><span class="p">,</span> <span class="n">Date</span><span class="p">(</span><span class="mi">2015</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">29</span><span class="p">),</span> <span class="mi">100</span><span class="p">,</span> <span class="mf">1.01</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span> <span class="n">sim_summary</span><span class="p">(</span><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span><span class="p">)</span> </pre></div> @@ -7075,7 +7997,7 @@ Profit: 0.6894803101560001 <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span> <span class="o">=</span> <span class="n">simulate</span><span class="p">(</span><span class="s">&quot;YAHOO/LMT&quot;</span><span class="p">,</span> <span class="n">Date</span><span class="p">(</span><span class="mi">2015</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">29</span><span class="p">),</span> <span class="mi">100</span><span class="p">,</span> <span class="mf">1.02</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span> <span class="o">=</span> <span class="n">simulate</span><span class="p">(</span><span class="s">&quot;YAHOO/LMT&quot;</span><span class="p">,</span> <span class="n">Date</span><span class="p">(</span><span class="mi">2015</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">29</span><span class="p">),</span> <span class="mi">100</span><span class="p">,</span> <span class="mf">1.02</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span> <span class="n">sim_summary</span><span class="p">(</span><span class="n">investments</span><span class="p">,</span> <span class="n">profit</span><span class="p">)</span> </pre></div> @@ -7141,7 +8063,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">pickle</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">pickle</span> <span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> <span class="kn">from</span> <span class="nn">bokeh.plotting</span> <span class="k">import</span> <span class="n">output_notebook</span><span class="p">,</span> <span class="n">figure</span><span class="p">,</span> <span class="n">show</span> @@ -7408,7 +8330,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">city_forecasts</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s1">&#39;city_forecasts.p&#39;</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">))</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">city_forecasts</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s1">&#39;city_forecasts.p&#39;</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">))</span> <span class="n">forecasts_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">city_forecasts</span><span class="p">)</span> </pre></div> @@ -7422,7 +8344,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">cities</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;binghamton&#39;</span><span class="p">,</span> <span class="s1">&#39;cary&#39;</span><span class="p">,</span> <span class="s1">&#39;nyc&#39;</span><span class="p">,</span> <span class="s1">&#39;seattle&#39;</span><span class="p">]</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cities</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;binghamton&#39;</span><span class="p">,</span> <span class="s1">&#39;cary&#39;</span><span class="p">,</span> <span class="s1">&#39;nyc&#39;</span><span class="p">,</span> <span class="s1">&#39;seattle&#39;</span><span class="p">]</span> <span class="n">city_colors</span> <span class="o">=</span> <span class="p">{</span><span class="n">cities</span><span class="p">[</span><span class="n">i</span><span class="p">]:</span> <span class="n">Palette</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">4</span><span class="p">)}</span> <span class="k">def</span> <span class="nf">safe_cover</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> @@ -7453,7 +8375,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">years</span> <span class="o">=</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1990</span><span class="p">,</span> <span class="mi">2016</span><span class="p">)</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">years</span> <span class="o">=</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1990</span><span class="p">,</span> <span class="mi">2016</span><span class="p">)</span> <span class="k">def</span> <span class="nf">city_avg_cc</span><span class="p">(</span><span class="n">city</span><span class="p">,</span> <span class="n">month</span><span class="p">):</span> <span class="k">return</span> <span class="p">[</span><span class="n">monthly_avg_cloudcover</span><span class="p">(</span><span class="n">city</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">month</span><span class="p">)</span> <span class="k">for</span> <span class="n">y</span> <span class="ow">in</span> <span class="n">years</span><span class="p">]</span> @@ -7467,7 +8389,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <span class="k">for</span> <span class="n">month</span><span class="p">,</span> <span class="n">month_id</span> <span class="ow">in</span> <span class="n">months</span><span class="p">:</span> <span class="n">month_averages</span> <span class="o">=</span> <span class="p">{</span><span class="n">city</span><span class="p">:</span> <span class="n">city_avg_cc</span><span class="p">(</span><span class="n">city</span><span class="p">,</span> <span class="n">month_id</span><span class="p">)</span> <span class="k">for</span> <span class="n">city</span> <span class="ow">in</span> <span class="n">cities</span><span class="p">}</span> - <span class="n">f</span> <span class="o">=</span> <span class="n">figure</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;{} Average Cloud Cover&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">month</span><span class="p">),</span> + <span class="n">f</span> <span class="o">=</span> <span class="n">figure</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> Average Cloud Cover&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">month</span><span class="p">),</span> <span class="n">x_axis_label</span><span class="o">=</span><span class="s1">&#39;Year&#39;</span><span class="p">,</span> <span class="n">y_axis_label</span><span class="o">=</span><span class="s1">&#39;Cloud Cover Percentage&#39;</span><span class="p">)</span> <span class="k">for</span> <span class="n">city</span> <span class="ow">in</span> <span class="n">cities</span><span class="p">:</span> @@ -7658,7 +8580,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[5]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">safe_precip</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">safe_precip</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> <span class="k">if</span> <span class="n">frame</span> <span class="ow">and</span> <span class="s1">&#39;precipProbability&#39;</span> <span class="ow">in</span> <span class="n">frame</span><span class="p">:</span> <span class="k">return</span> <span class="n">frame</span><span class="p">[</span><span class="s1">&#39;precipProbability&#39;</span><span class="p">]</span> <span class="k">else</span><span class="p">:</span> @@ -7680,7 +8602,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <span class="k">for</span> <span class="n">month</span><span class="p">,</span> <span class="n">month_id</span> <span class="ow">in</span> <span class="n">months</span><span class="p">:</span> <span class="n">month_averages</span> <span class="o">=</span> <span class="p">{</span><span class="n">city</span><span class="p">:</span> <span class="n">city_avg_cc</span><span class="p">(</span><span class="n">city</span><span class="p">,</span> <span class="n">month_id</span><span class="p">)</span> <span class="k">for</span> <span class="n">city</span> <span class="ow">in</span> <span class="n">cities</span><span class="p">}</span> - <span class="n">f</span> <span class="o">=</span> <span class="n">figure</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;{} Average Precipitation Chance&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">month</span><span class="p">),</span> + <span class="n">f</span> <span class="o">=</span> <span class="n">figure</span><span class="p">(</span><span class="n">title</span><span class="o">=</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> Average Precipitation Chance&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">month</span><span class="p">),</span> <span class="n">x_axis_label</span><span class="o">=</span><span class="s1">&#39;Year&#39;</span><span class="p">,</span> <span class="n">y_axis_label</span><span class="o">=</span><span class="s1">&#39;Precipitation Chance Percentage&#39;</span><span class="p">)</span> <span class="k">for</span> <span class="n">city</span> <span class="ow">in</span> <span class="n">cities</span><span class="p">:</span> @@ -7877,7 +8799,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">from</span> <span class="nn">bokeh.plotting</span> <span class="k">import</span> <span class="n">figure</span><span class="p">,</span> <span class="n">output_notebook</span><span class="p">,</span> <span class="n">show</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">bokeh.plotting</span> <span class="k">import</span> <span class="n">figure</span><span class="p">,</span> <span class="n">output_notebook</span><span class="p">,</span> <span class="n">show</span> <span class="kn">from</span> <span class="nn">bokeh.palettes</span> <span class="k">import</span> <span class="n">PuBuGn9</span> <span class="k">as</span> <span class="n">Palette</span> <span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> @@ -8137,7 +9059,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">city_forecasts</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s1">&#39;city_forecasts.p&#39;</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">))</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">city_forecasts</span> <span class="o">=</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="s1">&#39;city_forecasts.p&#39;</span><span class="p">,</span> <span class="s1">&#39;rb&#39;</span><span class="p">))</span> <span class="n">forecast_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">city_forecasts</span><span class="p">)</span> </pre></div> @@ -8151,7 +9073,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">cary_forecast</span> <span class="o">=</span> <span class="n">forecast_df</span><span class="p">[</span><span class="s1">&#39;cary&#39;</span><span class="p">]</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">cary_forecast</span> <span class="o">=</span> <span class="n">forecast_df</span><span class="p">[</span><span class="s1">&#39;cary&#39;</span><span class="p">]</span> <span class="n">years</span> <span class="o">=</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1990</span><span class="p">,</span> <span class="mi">2016</span><span class="p">)</span> <span class="n">months</span> <span class="o">=</span> <span class="nb">range</span><span class="p">(</span><span class="mi">7</span><span class="p">,</span> <span class="mi">12</span><span class="p">)</span> <span class="n">months_str</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;July&#39;</span><span class="p">,</span> <span class="s1">&#39;August&#39;</span><span class="p">,</span> <span class="s1">&#39;September&#39;</span><span class="p">,</span> <span class="s1">&#39;October&#39;</span><span class="p">,</span> <span class="s1">&#39;November&#39;</span><span class="p">]</span> @@ -8239,7 +9161,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">monthly_cloudy_days</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">monthly_cloudy_days</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">):</span> <span class="n">dates</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">(</span><span class="n">start</span><span class="o">=</span><span class="n">datetime</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">12</span><span class="p">),</span> <span class="n">end</span><span class="o">=</span><span class="n">datetime</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">12</span><span class="p">),</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;D&#39;</span><span class="p">,</span> <span class="n">closed</span><span class="o">=</span><span class="s1">&#39;left&#39;</span><span class="p">)</span> @@ -8355,7 +9277,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[5]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">safe_precip</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">safe_precip</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> <span class="k">if</span> <span class="n">frame</span> <span class="ow">and</span> <span class="s1">&#39;precipProbability&#39;</span> <span class="ow">in</span> <span class="n">frame</span><span class="p">:</span> <span class="k">return</span> <span class="n">frame</span><span class="p">[</span><span class="s1">&#39;precipProbability&#39;</span><span class="p">]</span> <span class="k">else</span><span class="p">:</span> @@ -8436,7 +9358,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[6]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">monthly_rainy_days</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">monthly_rainy_days</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">):</span> <span class="n">dates</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">(</span><span class="n">start</span><span class="o">=</span><span class="n">datetime</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">12</span><span class="p">),</span> <span class="n">end</span><span class="o">=</span><span class="n">datetime</span><span class="p">(</span><span class="n">year</span><span class="p">,</span> <span class="n">month</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">12</span><span class="p">),</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;D&#39;</span><span class="p">,</span> <span class="n">closed</span><span class="o">=</span><span class="s1">&#39;left&#39;</span><span class="p">)</span> @@ -8555,7 +9477,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="inner_cell"> <div class="text_cell_render border-box-sizing rendered_html"> <h1 id="Generating-the-Forecast-file">Generating the Forecast file<a class="anchor-link" href="#Generating-the-Forecast-file">&#182;</a></h1><p>The following code was generates the file that was used throughout the blog post. Please note that I'm retrieving data for other cities to use in a future blog post, only Cary data was used for this post.</p> -<div class="highlight"><pre><span class="kn">import</span> <span class="nn">pandas</span> <span class="kn">as</span> <span class="nn">pd</span> +<div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">pandas</span> <span class="kn">as</span> <span class="nn">pd</span> <span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="nb">reduce</span> <span class="kn">import</span> <span class="nn">requests</span> <span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span> @@ -8633,7 +9555,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">requests</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">requests</span> <span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> <span class="kn">from</span> <span class="nn">dateutil</span> <span class="k">import</span> <span class="n">parser</span> <span class="k">as</span> <span class="n">dtparser</span> @@ -8668,7 +9590,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">class</span> <span class="nc">ArticleListParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">class</span> <span class="nc">ArticleListParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span> <span class="sd">&quot;&quot;&quot;Given a web page with articles on it, parse out the article links&quot;&quot;&quot;</span> <span class="n">articles</span> <span class="o">=</span> <span class="p">[]</span> @@ -8681,7 +9603,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <span class="bp">self</span><span class="o">.</span><span class="n">articles</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">href</span><span class="p">)</span> <span class="n">base_url</span> <span class="o">=</span> <span class="s2">&quot;http://seekingalpha.com/author/wall-street-breakfast/articles&quot;</span> -<span class="n">article_page_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">base_url</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">base_url</span> <span class="o">+</span> <span class="s1">&#39;/{}&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">20</span><span class="p">)]</span> +<span class="n">article_page_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">base_url</span><span class="p">]</span> <span class="o">+</span> <span class="p">[</span><span class="n">base_url</span> <span class="o">+</span> <span class="s1">&#39;/</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">20</span><span class="p">)]</span> <span class="n">global_articles</span> <span class="o">=</span> <span class="p">[]</span> <span class="k">for</span> <span class="n">page</span> <span class="ow">in</span> <span class="n">article_page_urls</span><span class="p">:</span> @@ -8703,7 +9625,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">class</span> <span class="nc">ArticleReturnParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">class</span> <span class="nc">ArticleReturnParser</span><span class="p">(</span><span class="n">HTMLParser</span><span class="p">):</span> <span class="s2">&quot;Given an article, parse out the futures returns in it&quot;</span> <span class="n">record_font_tags</span> <span class="o">=</span> <span class="kc">False</span> @@ -8790,7 +9712,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="c1"># article_df is sorted by date, so we get the first row.</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># article_df is sorted by date, so we get the first row.</span> <span class="n">start_date</span> <span class="o">=</span> <span class="n">article_df</span><span class="o">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s1">&#39;date&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="s1">&#39;date&#39;</span><span class="p">]</span> <span class="o">-</span> <span class="n">relativedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="n">SPY</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;GOOG/NYSE_SPY&quot;</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span> <span class="n">DJIA</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;GOOG/AMS_DIA&quot;</span><span class="p">,</span> <span class="n">trim_start</span><span class="o">=</span><span class="n">start_date</span><span class="p">)</span> @@ -8819,7 +9741,7 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); <div class="prompt input_prompt">In&nbsp;[5]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">calculate_opening_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">calculate_opening_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> <span class="c1"># I&#39;m not a huge fan of the appending for loop,</span> <span class="c1"># but it&#39;s a bit verbose for a comprehension</span> <span class="n">data</span> <span class="o">=</span> <span class="p">{}</span> @@ -8932,7 +9854,7 @@ S&amp;P 0.604478 0.597015 0.811808 0.848708 <div class="prompt input_prompt">In&nbsp;[6]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="k">def</span> <span class="nf">calculate_closing_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="k">def</span> <span class="nf">calculate_closing_ret</span><span class="p">(</span><span class="n">frame</span><span class="p">):</span> <span class="c1"># I&#39;m not a huge fan of the appending for loop,</span> <span class="c1"># but it&#39;s a bit verbose for a comprehension</span> <span class="n">data</span> <span class="o">=</span> <span class="p">{}</span> @@ -9059,7 +9981,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="k">using</span> <span class="n">Gadfly</span> +<div class=" highlight hl-julia"><pre><span></span><span class="k">using</span> <span class="n">Gadfly</span> </pre></div> </div> @@ -9095,7 +10017,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">S0</span> <span class="o">=</span> <span class="mf">102.2</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">S0</span> <span class="o">=</span> <span class="mf">102.2</span> <span class="n">nominal</span> <span class="o">=</span> <span class="mi">100</span> <span class="n">q</span> <span class="o">=</span> <span class="mf">2.84</span> <span class="o">/</span> <span class="mi">100</span> <span class="n">σ</span> <span class="o">=</span> <span class="mf">15.37</span> <span class="o">/</span> <span class="mi">100</span> @@ -9151,7 +10073,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">simulate_gbm</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">S0</span><span class="p">,</span> <span class="n">μ</span><span class="p">,</span> <span class="n">σ</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">simulate_gbm</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">S0</span><span class="p">,</span> <span class="n">μ</span><span class="p">,</span> <span class="n">σ</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span> <span class="c"># Set the initial state</span> <span class="n">m</span> <span class="o">=</span> <span class="n">length</span><span class="p">(</span><span class="n">S0</span><span class="p">)</span> <span class="n">t</span> <span class="o">=</span> <span class="n">T</span> <span class="o">/</span> <span class="n">n</span> @@ -9222,7 +10144,7 @@ Because these are all very similar, we decided to demonstrate all 3 products at <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">initial</span> <span class="o">=</span> <span class="n">ones</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span> <span class="o">*</span> <span class="n">S0</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">initial</span> <span class="o">=</span> <span class="n">ones</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span> <span class="o">*</span> <span class="n">S0</span> <span class="c"># Using μ=0, T=.25 for now, we&#39;ll use the proper values later</span> <span class="n">motion</span> <span class="o">=</span> <span class="n">simulate_gbm</span><span class="p">(</span><span class="n">initial</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">σ</span><span class="p">,</span> <span class="o">.</span><span class="mi">25</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span> @@ -10830,7 +11752,7 @@ fig.select("#fig-3a6dd25ad25c4037a166889ee51bb151-element-20") <div class="prompt input_prompt">In&nbsp;[5]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">forward_term</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">yearly_term</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">forward_term</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">yearly_term</span><span class="p">)</span> <span class="c"># It is assumed that we have a yearly term structure passed in, and starts at year 0</span> <span class="c"># This implies a nominal rate above 0 for the first year!</span> <span class="n">years</span> <span class="o">=</span> <span class="n">length</span><span class="p">(</span><span class="n">term</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span> <span class="c"># because we start at 0</span> @@ -10860,14 +11782,14 @@ fig.select("#fig-3a6dd25ad25c4037a166889ee51bb151-element-20") <div class="prompt input_prompt">In&nbsp;[6]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="c"># Example term structure taken from:</span> +<div class=" highlight hl-julia"><pre><span></span><span class="c"># Example term structure taken from:</span> <span class="c"># http://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yield</span> <span class="c"># Linear interpolation used years in-between periods, assuming real-dollar</span> <span class="c"># interest rates</span> <span class="n">forward_yield</span> <span class="o">=</span> <span class="n">forward_term</span><span class="p">(</span><span class="n">term</span><span class="p">)</span> <span class="n">calculated_term2</span> <span class="o">=</span> <span class="n">term</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">*</span> <span class="n">forward_yield</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> -<span class="n">println</span><span class="p">(</span><span class="s">&quot;Actual term[2]: </span><span class="si">$</span><span class="s">(term[2]); Calculated term[2]: </span><span class="si">$(calculated_term2)</span><span class="s">&quot;</span><span class="p">)</span> +<span class="n">println</span><span class="p">(</span><span class="s">&quot;Actual term[2]: </span><span class="si">$(term[2])</span><span class="s">; Calculated term[2]: </span><span class="si">$(calculated_term2)</span><span class="s">&quot;</span><span class="p">)</span> </pre></div> </div> @@ -10904,7 +11826,7 @@ fig.select("#fig-3a6dd25ad25c4037a166889ee51bb151-element-20") <div class="prompt input_prompt">In&nbsp;[7]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">full_motion</span> <span class="o">=</span> <span class="n">ones</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span> <span class="o">*</span> <span class="n">S0</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">full_motion</span> <span class="o">=</span> <span class="n">ones</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span> <span class="o">*</span> <span class="n">S0</span> <span class="n">full_term</span> <span class="o">=</span> <span class="n">vcat</span><span class="p">(</span><span class="n">term</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">forward_yield</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="o">=</span><span class="mi">1</span><span class="p">:</span><span class="n">T</span> <span class="n">μ</span> <span class="o">=</span> <span class="p">(</span><span class="n">full_term</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">-</span> <span class="mi">1</span> <span class="o">-</span> <span class="n">q</span><span class="p">)</span> @@ -12465,7 +13387,7 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20") <div class="prompt input_prompt">In&nbsp;[8]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">full_simulation</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">S0</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">,</span> <span class="n">m</span><span class="p">,</span> <span class="n">term</span><span class="p">)</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">full_simulation</span> <span class="o">=</span> <span class="n">function</span><span class="p">(</span><span class="n">S0</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">,</span> <span class="n">m</span><span class="p">,</span> <span class="n">term</span><span class="p">)</span> <span class="n">forward</span> <span class="o">=</span> <span class="n">vcat</span><span class="p">(</span><span class="n">term</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">forward_term</span><span class="p">(</span><span class="n">term</span><span class="p">))</span> <span class="c"># And an S0 to kick things off.</span> @@ -12481,7 +13403,7 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20") <span class="n">tic</span><span class="p">()</span> <span class="n">full_simulation</span><span class="p">(</span><span class="n">S0</span><span class="p">,</span> <span class="n">T</span><span class="p">,</span> <span class="n">n</span><span class="p">,</span> <span class="n">m</span><span class="p">,</span> <span class="n">term</span><span class="p">)</span> <span class="n">time</span> <span class="o">=</span> <span class="n">toq</span><span class="p">()</span> -<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Time to run simulation: %.2fs&quot;</span><span class="p">,</span> <span class="n">time</span><span class="p">)</span> +<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Time to run simulation: </span><span class="si">%.2f</span><span class="s">s&quot;</span><span class="p">,</span> <span class="n">time</span><span class="p">)</span> </pre></div> </div> @@ -12526,7 +13448,7 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20") <div class="prompt input_prompt">In&nbsp;[9]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="n">strike</span> <span class="o">=</span> <span class="n">S0</span> <span class="n">protection_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="o">*</span> <span class="o">.</span><span class="mi">6</span> <span class="n">coupon</span> <span class="o">=</span> <span class="n">nominal</span> <span class="o">*</span> <span class="o">.</span><span class="mi">07</span> @@ -12570,13 +13492,13 @@ fig.select("#fig-0378e04b897742b597befd2e8e1c169e-element-20") <span class="n">tic</span><span class="p">()</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">athena</span><span class="p">()</span> <span class="n">time</span> <span class="o">=</span> <span class="n">toq</span><span class="p">()</span> - <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation %i: \</span><span class="si">$</span><span class="s">%.4f; Simulation time: %.2fs</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">time</span><span class="p">)</span> + <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation </span><span class="si">%i</span><span class="s">: \$</span><span class="si">%.4f</span><span class="s">; Simulation time: </span><span class="si">%.2f</span><span class="s">s</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">time</span><span class="p">)</span> <span class="k">end</span> <span class="n">final_mean</span> <span class="o">=</span> <span class="n">mean</span><span class="p">(</span><span class="n">mean_payoffs</span><span class="p">)</span> -<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$</span><span class="s">num_simulations simulations: </span><span class="si">$</span><span class="s">(mean(mean_payoffs))&quot;</span><span class="p">)</span> +<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$num_simulations</span><span class="s"> simulations: </span><span class="si">$(mean(mean_payoffs))</span><span class="s">&quot;</span><span class="p">)</span> <span class="n">pv</span> <span class="o">=</span> <span class="n">final_mean</span> <span class="o">*</span> <span class="p">(</span><span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="p">(</span><span class="n">prod</span><span class="p">(</span><span class="n">forward_structure</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="n">T</span><span class="p">))</span> -<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Athena note: \</span><span class="si">$</span><span class="s">%.2f, notional: \</span><span class="si">$</span><span class="s">%.2f&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">,</span> <span class="n">nominal</span><span class="p">)</span> +<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Athena note: \$</span><span class="si">%.2f</span><span class="s">, notional: \$</span><span class="si">%.2f</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">,</span> <span class="n">nominal</span><span class="p">)</span> </pre></div> </div> @@ -12637,7 +13559,7 @@ Present value of Athena note: $95.00, notional: $100.00</pre> <div class="prompt input_prompt">In&nbsp;[10]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="n">coupon_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="o">*</span> <span class="o">.</span><span class="mi">8</span> <span class="n">protection_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="o">*</span> <span class="o">.</span><span class="mi">6</span> <span class="n">coupon</span> <span class="o">=</span> <span class="n">nominal</span> <span class="o">*</span> <span class="o">.</span><span class="mi">06</span> @@ -12684,13 +13606,13 @@ Present value of Athena note: $95.00, notional: $100.00</pre> <span class="n">tic</span><span class="p">()</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">phoenix_no_memory</span><span class="p">()</span> <span class="n">time</span> <span class="o">=</span> <span class="n">toq</span><span class="p">()</span> - <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation %i: \</span><span class="si">$</span><span class="s">%.4f; Simulation time: %.2fs</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">time</span><span class="p">)</span> + <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation </span><span class="si">%i</span><span class="s">: \$</span><span class="si">%.4f</span><span class="s">; Simulation time: </span><span class="si">%.2f</span><span class="s">s</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">time</span><span class="p">)</span> <span class="k">end</span> <span class="n">final_mean</span> <span class="o">=</span> <span class="n">mean</span><span class="p">(</span><span class="n">mean_payoffs</span><span class="p">)</span> -<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$</span><span class="s">num_simulations simulations: </span><span class="si">$</span><span class="s">(mean(mean_payoffs))&quot;</span><span class="p">)</span> +<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$num_simulations</span><span class="s"> simulations: </span><span class="si">$(mean(mean_payoffs))</span><span class="s">&quot;</span><span class="p">)</span> <span class="n">pv</span> <span class="o">=</span> <span class="n">final_mean</span> <span class="o">*</span> <span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="p">(</span><span class="n">prod</span><span class="p">(</span><span class="n">forward_structure</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="p">(</span><span class="n">T</span><span class="p">))</span> -<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Phoenix without memory note: \</span><span class="si">$</span><span class="s">%.2f&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">)</span> +<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Phoenix without memory note: \$</span><span class="si">%.2f</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">)</span> </pre></div> </div> @@ -12737,7 +13659,7 @@ Present value of Phoenix without memory note: $97.44</pre> <div class="prompt input_prompt">In&nbsp;[11]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-julia"><pre><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> +<div class=" highlight hl-julia"><pre><span></span><span class="n">call_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="n">coupon_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="o">*</span> <span class="o">.</span><span class="mi">8</span> <span class="n">protection_barrier</span> <span class="o">=</span> <span class="n">S0</span> <span class="o">*</span> <span class="o">.</span><span class="mi">6</span> <span class="n">coupon</span> <span class="o">=</span> <span class="n">nominal</span> <span class="o">*</span> <span class="o">.</span><span class="mi">07</span> @@ -12791,14 +13713,14 @@ Present value of Phoenix without memory note: $97.44</pre> <span class="n">tic</span><span class="p">()</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="n">phoenix_with_memory</span><span class="p">()</span> <span class="n">time</span> <span class="o">=</span> <span class="n">toq</span><span class="p">()</span> - <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation %i: \</span><span class="si">$</span><span class="s">%.4f; Simulation time: %.2fs</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> + <span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Mean of simulation </span><span class="si">%i</span><span class="s">: \$</span><span class="si">%.4f</span><span class="s">; Simulation time: </span><span class="si">%.2f</span><span class="s">s</span><span class="se">\n</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">i</span><span class="p">,</span> <span class="n">mean_payoffs</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">time</span><span class="p">)</span> <span class="k">end</span> <span class="n">final_mean</span> <span class="o">=</span> <span class="n">mean</span><span class="p">(</span><span class="n">mean_payoffs</span><span class="p">)</span> -<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$</span><span class="s">num_simulations simulations: </span><span class="si">$</span><span class="s">(mean(mean_payoffs))&quot;</span><span class="p">)</span> +<span class="n">println</span><span class="p">(</span><span class="s">&quot;Mean over </span><span class="si">$num_simulations</span><span class="s"> simulations: </span><span class="si">$(mean(mean_payoffs))</span><span class="s">&quot;</span><span class="p">)</span> <span class="n">pv</span> <span class="o">=</span> <span class="n">final_mean</span> <span class="o">*</span> <span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="p">(</span><span class="n">prod</span><span class="p">(</span><span class="n">forward_structure</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">*</span><span class="p">(</span><span class="n">T</span><span class="p">))</span> -<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Phoenix with memory note: \</span><span class="si">$</span><span class="s">%.2f&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">)</span> +<span class="p">@</span><span class="n">printf</span><span class="p">(</span><span class="s">&quot;Present value of Phoenix with memory note: \$</span><span class="si">%.2f</span><span class="s">&quot;</span><span class="p">,</span> <span class="n">pv</span><span class="p">)</span> </pre></div> </div> @@ -12900,7 +13822,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar <div class="prompt input_prompt">In&nbsp;[1]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">from</span> <span class="nn">IPython.display</span> <span class="k">import</span> <span class="n">display</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">from</span> <span class="nn">IPython.display</span> <span class="k">import</span> <span class="n">display</span> <span class="kn">import</span> <span class="nn">Quandl</span> <span class="kn">from</span> <span class="nn">datetime</span> <span class="k">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span> @@ -12909,7 +13831,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar <span class="n">lookback</span> <span class="o">=</span> <span class="mi">30</span> <span class="n">d_col</span> <span class="o">=</span> <span class="s1">&#39;Close&#39;</span> -<span class="n">data</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;YAHOO/{}&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">tick</span><span class="p">))[</span><span class="o">-</span><span class="n">lookback</span><span class="p">:]</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> +<span class="n">data</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;YAHOO/</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">tick</span><span class="p">))[</span><span class="o">-</span><span class="n">lookback</span><span class="p">:]</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> <span class="n">market</span> <span class="o">=</span> <span class="n">Quandl</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">market_ticker</span><span class="p">)</span> </pre></div> @@ -12933,7 +13855,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar <div class="prompt input_prompt">In&nbsp;[2]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">returns</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">data</span><span class="p">[</span><span class="n">tick</span><span class="p">][</span><span class="n">d_col</span><span class="p">]</span><span class="o">.</span><span class="n">pct_change</span><span class="p">()</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">returns</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">data</span><span class="p">[</span><span class="n">tick</span><span class="p">][</span><span class="n">d_col</span><span class="p">]</span><span class="o">.</span><span class="n">pct_change</span><span class="p">()</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> <span class="n">display</span><span class="p">({</span><span class="n">tick</span><span class="p">:</span> <span class="n">returns</span><span class="p">[</span><span class="n">tick</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">})</span> </pre></div> @@ -12979,7 +13901,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar <div class="prompt input_prompt">In&nbsp;[3]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">market_returns</span> <span class="o">=</span> <span class="n">market</span><span class="o">.</span><span class="n">pct_change</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">market_returns</span> <span class="o">=</span> <span class="n">market</span><span class="o">.</span><span class="n">pct_change</span><span class="p">()</span> <span class="n">sharpe</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">ret</span><span class="p">:</span> <span class="p">(</span><span class="n">ret</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span> <span class="o">-</span> <span class="n">market_returns</span><span class="p">[</span><span class="n">d_col</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">())</span> <span class="o">/</span> <span class="n">ret</span><span class="o">.</span><span class="n">std</span><span class="p">()</span> <span class="n">sharpes</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">sharpe</span><span class="p">(</span><span class="n">returns</span><span class="p">[</span><span class="n">tick</span><span class="p">])</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> @@ -13027,7 +13949,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar <div class="prompt input_prompt">In&nbsp;[4]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="n">drawdown</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">ret</span><span class="p">:</span> <span class="n">ret</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="n">drawdown</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">ret</span><span class="p">:</span> <span class="n">ret</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> <span class="n">drawdowns</span> <span class="o">=</span> <span class="p">{</span><span class="n">tick</span><span class="p">:</span> <span class="n">drawdown</span><span class="p">(</span><span class="n">returns</span><span class="p">[</span><span class="n">tick</span><span class="p">])</span> <span class="k">for</span> <span class="n">tick</span> <span class="ow">in</span> <span class="n">tickers</span><span class="p">}</span> <span class="n">display</span><span class="p">(</span><span class="n">drawdowns</span><span class="p">)</span> @@ -13080,7 +14002,7 @@ s.t.\ \ & \vec{1} \omega = 1\\ <div class="prompt input_prompt">In&nbsp;[5]:</div> <div class="inner_cell"> <div class="input_area"> -<div class=" highlight hl-ipython3"><pre><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> +<div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> <span class="kn">from</span> <span class="nn">scipy.optimize</span> <span class="k">import</span> <span class="n">minimize</span> <span class="c1">#sharpe_limit = .1</span> @@ -13134,13 +14056,13 @@ s.t.\ \ & \vec{1} \omega = 1\\ <span class="c1"># Optimization time!</span> <span class="n">display</span><span class="p">(</span><span class="n">optimal</span><span class="o">.</span><span class="n">message</span><span class="p">)</span> -<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Holdings: {}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">tickers</span><span class="p">,</span> <span class="n">optimal</span><span class="o">.</span><span class="n">x</span><span class="p">))))</span> +<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Holdings: </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">tickers</span><span class="p">,</span> <span class="n">optimal</span><span class="o">.</span><span class="n">x</span><span class="p">))))</span> <span class="n">expected_return</span> <span class="o">=</span> <span class="n">optimal</span><span class="o">.</span><span class="n">fun</span> <span class="o">*</span> <span class="o">-</span><span class="mi">100</span> <span class="c1"># multiply by -100 to scale, and compensate for minimizing</span> -<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Expected Return: {:.3f}%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expected_return</span><span class="p">))</span> +<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Expected Return: </span><span class="si">{:.3f}</span><span class="s2">%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expected_return</span><span class="p">))</span> <span class="n">expected_drawdown</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">optimal</span><span class="o">.</span><span class="n">x</span><span class="p">)</span> <span class="o">*</span> <span class="n">dd_a</span><span class="p">)</span> <span class="o">/</span> <span class="nb">sum</span><span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">optimal</span><span class="o">.</span><span class="n">x</span><span class="p">))</span> <span class="o">*</span> <span class="mi">100</span> -<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Expected Max Drawdown: {0:.2f}%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expected_drawdown</span><span class="p">))</span> +<span class="n">display</span><span class="p">(</span><span class="s2">&quot;Expected Max Drawdown: </span><span class="si">{0:.2f}</span><span class="s2">%&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expected_drawdown</span><span class="p">))</span> <span class="c1"># TODO: Calculate expected Sharpe</span> </pre></div> diff --git a/guaranteed-money-maker.html b/guaranteed-money-maker.html index e1dc3fa..dd6a062 100644 --- a/guaranteed-money-maker.html +++ b/guaranteed-money-maker.html @@ -28,6 +28,17 @@ + + + @@ -82,7 +93,7 @@

If you can see into the future, that is.

My previous class in Stochastic Calculus covered a lot of interesting topics, and the important one for today -is the Gambler's Ruin problem. If you're interested in some of the theory behind it, also make sure to check out +is the Gambler's Ruin problem. If you're interested in some of the theory behind it, also make sure to check out random walks. The important bit is that we studied the Martingale Betting Strategy, which describes for us a guaranteed way to eventually make money.

The strategy goes like this: You are going to toss a fair coin with a friend. If you guess heads or tails correctly, you get back double the money you bet. If you guess incorrectly, you lose money. How should you bet?

@@ -142,7 +153,7 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i})
In [1]:
-
using Quandl
+
using Quandl
 api_key = ""
 daily_investment = function(current_open, current_close, purchase_history, open_history)
     # We're not going to safeguard against divide by 0 - that's the user's responsibility
@@ -172,7 +183,7 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i})
 
In [2]:
-
is_profitable = function(current_price, purchase_history, open_history)
+
is_profitable = function(current_price, purchase_history, open_history)
     shares = sum(purchase_history ./ open_history)
     return current_price*shares > sum(purchase_history)
 end
@@ -207,8 +218,8 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i})
     leverages = [sum(investments[1:i]) for i=1:length(investments)]
     max_leverage = maximum(leverages) / investments[1]
     println("Max leverage: $(max_leverage)")
-    println("Days invested: $(length(investments))")
-    println("Profit: $profit")
+    println("Days invested: $(length(investments))")
+    println("Profit: $profit")
 end;
 
@@ -235,7 +246,7 @@ d_n &> \frac{o_n}{c_n - o_n} \sum_{i=1}^{n-1} d_i(1 - \frac{1}{o_i})
In [3]:
-
investments, profit = simulate("YAHOO/LMT", Date(2015, 11, 29), 100, 1.01, 10)
+
investments, profit = simulate("YAHOO/LMT", Date(2015, 11, 29), 100, 1.01, 10)
 sim_summary(investments, profit)
 
@@ -278,7 +289,7 @@ Profit: 0.6894803101560001
In [4]:
-
investments, profit = simulate("YAHOO/LMT", Date(2015, 11, 29), 100, 1.02, 10)
+
investments, profit = simulate("YAHOO/LMT", Date(2015, 11, 29), 100, 1.02, 10)
 sim_summary(investments, profit)
 
@@ -341,6 +352,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); +
+
+ + +
diff --git a/index.html b/index.html index 0b0e7f6..0440322 100644 --- a/index.html +++ b/index.html @@ -27,6 +27,17 @@ + + + @@ -72,6 +83,8 @@

+
Sat 05 March 2016
+
Predicting Santander Customer Happiness
Fri 26 February 2016
Profitability using the Investment Formula
Wed 03 February 2016
diff --git a/predicting-santander-customer-happiness.html b/predicting-santander-customer-happiness.html new file mode 100644 index 0000000..465ef22 --- /dev/null +++ b/predicting-santander-customer-happiness.html @@ -0,0 +1,1063 @@ + + + + + + + + + + + Predicting Santander Customer Happiness - Bradlee Speice + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+
+ + +
+
+ + + + +
+
+
+
+

Predicting Santander Customer Happiness

+

Bradlee Speice, Sat 05 March 2016, Blog

+
+
+

+ +data science, kaggle, machine learning

+
+
+
+
+ + + +
+ + + + +
+

+

+
+
+
+
+

My first Kaggle competition

It's time! After embarking on a Machine Learning class this semester, and with a Saturday in which I don't have much planned, I wanted to put this class and training to work. It's my first competition submission. I want to walk you guys through how I'm approaching this problem, because I thought it would be really neat. The competition is Banco Santander's Santander Customer Satisfaction competition. It seemed like an easy enough problem I could actually make decent progress on it.

+

Data Exploration

First up: we need to load our data and do some exploratory work. Because we're going to be using this data for model selection prior to testing, we need to make a further split. I've already gone ahead and done this work, please see the code in the appendix below.

+ +
+
+
+
+
+
In [1]:
+
+
+
import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+%matplotlib inline
+
+# Record how long it takes to run the notebook - I'm curious.
+from datetime import datetime
+start = datetime.now()
+
+dataset = pd.read_csv('split_train.csv')
+dataset.index = dataset.ID
+X = dataset.drop(['TARGET', 'ID', 'ID.1'], 1)
+y = dataset.TARGET
+
+ +
+
+
+ +
+
+
+
In [2]:
+
+
+
y.unique()
+
+ +
+
+
+ +
+
+ + +
Out[2]:
+ + +
+
array([0, 1], dtype=int64)
+
+ +
+ +
+
+ +
+
+
+
In [3]:
+
+
+
len(X.columns)
+
+ +
+
+
+ +
+
+ + +
Out[3]:
+ + +
+
369
+
+ +
+ +
+
+ +
+
+
+
+
+
+

Okay, so there are only two classes we're predicting: 1 for unsatisfied customers, 0 for satisfied customers. I would have preferred this to be something more like a regression, or predicting multiple classes: maybe the customer isn't the most happy, but is nowhere near closing their accounts. For now though, that's just the data we're working with.

+

Now, I'd like to make a scatter matrix of everything going on. Unfortunately as noted above, we have 369 different features. There's no way I can graphically make sense of that much data to start with.

+

We're also not told what the data actually represents: Are these survey results? Average time between contact with a customer care person? Frequency of contacting a customer care person? The idea is that I need to reduce the number of dimensions we're predicting across.

+

Dimensionality Reduction pt. 1 - Binary Classifiers

My first attempt to reduce the data dimensionality is to find all the binary classifiers in the dataset (i.e. 0 or 1 values) and see if any of those are good (or anti-good) predictors of the final data.

+ +
+
+
+
+
+
In [4]:
+
+
+
cols = X.columns
+b_class = []
+for c in cols:
+    if len(X[c].unique()) == 2:
+        b_class.append(c)
+        
+len(b_class)
+
+ +
+
+
+ +
+
+ + +
Out[4]:
+ + +
+
111
+
+ +
+ +
+
+ +
+
+
+
+
+
+

So there are 111 features in the dataset that are a binary label. Let's see if any of them are good at predicting the users satisfaction!

+ +
+
+
+
+
+
In [5]:
+
+
+
# First we need to `binarize` the data to 0-1; some of the labels are {0, 1},
+# some are {0, 3}, etc.
+from sklearn.preprocessing import binarize
+X_bin = binarize(X[b_class])
+
+accuracy = [np.mean(X_bin[:,i] == y) for i in range(0, len(b_class))]
+acc_df = pd.DataFrame({"Accuracy": accuracy}, index=b_class)
+acc_df.describe()
+
+ +
+
+
+ +
+
+ + +
Out[5]:
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Accuracy
count111.000000
mean0.905159
std0.180602
min0.043598
25%0.937329
50%0.959372
75%0.960837
max0.960837
+
+
+ +
+ +
+
+ +
+
+
+
+
+
+

Wow! Looks like we've got some incredibly predictive features! So much so that we should be a bit concerned. My initial guess for what's happening is that we have a sparsity issue: so many of the values are 0, and these likely happen to line up with satisfied customers.

+

So the question we must now answer, which I likely should have asked long before now: What exactly is the distribution of un/satisfied customers?

+ +
+
+
+
+
+
In [6]:
+
+
+
unsat = y[y == 1].count()
+print("Satisfied customers: {}; Unsatisfied customers: {}".format(len(y) - unsat, unsat))
+naive_guess = np.mean(y == np.zeros(len(y)))
+print("Naive guess accuracy: {}".format(naive_guess))
+
+ +
+
+
+ +
+
+ + +
+
+
Satisfied customers: 51131; Unsatisfied customers: 2083
+Naive guess accuracy: 0.9608561656706882
+
+
+
+ +
+
+ +
+
+
+
+
+
+

This is a bit discouraging. A naive guess of "always satisfied" performs as well as our best individual binary classifier. What this tells me then, is that these data columns aren't incredibly helpful in prediction. I'd be interested in a polynomial expansion of this data-set, but for now, that's more computation than I want to take on.

+

Dimensionality Reduction pt. 2 - LDA

Knowing that our naive guess performs so well is a blessing and a curse:

+
    +
  • Curse: The threshold for performance is incredibly high: We can only "improve" over the naive guess by 4%
  • +
  • Blessing: All the binary classification features we just discovered are worthless on their own. We can throw them out and reduce the data dimensionality from 369 to 111.
  • +
+

Now, in removing these features from the dataset, I'm not saying that there is no "information" contained within them. There might be. But the only way we'd know is through a polynomial expansion, and I'm not going to take that on within this post.

+

My initial thought for a "next guess" is to use the LDA model for dimensionality reduction. However, it can only reduce dimensions to $1 - p$, with $p$ being the number of classes. Since this is a binary classification, every LDA model that I try will have dimensionality one; when I actually try this, the predictor ends up being slightly less accurate than the naive guess.

+

Instead, let's take a different approach to dimensionality reduction: principle components analysis. This allows us to perform the dimensionality reduction without worrying about the number of classes. Then, we'll use a Gaussian Naive Bayes model to actually do the prediction. This model is chosen simply because it doesn't take a long time to fit and compute; because PCA will take so long, I just want a prediction at the end of this. We can worry about using a more sophisticated LDA/QDA/SVM model later.

+

Now into the actual process: We're going to test out PCA dimensionality reduction from 1 - 20 dimensions, and then predict using a Gaussian Naive Bayes model. The 20 dimensions upper limit was selected because the accuracy never improves after you get beyond that (I found out by running it myself). Hopefully, we'll find that we can create a model better than the naive guess.

+ +
+
+
+
+
+
In [7]:
+
+
+
from sklearn.naive_bayes import GaussianNB
+from sklearn.decomposition import PCA
+
+X_no_bin = X.drop(b_class, 1)
+
+def evaluate_gnb(dims):
+    pca = PCA(n_components=dims)
+    X_xform = pca.fit_transform(X_no_bin)
+    
+    gnb = GaussianNB()
+    gnb.fit(X_xform, y)
+    return gnb.score(X_xform, y)
+
+dim_range = np.arange(1, 21)
+plt.plot(dim_range, [evaluate_gnb(dim) for dim in dim_range], label="Gaussian NB Accuracy")
+plt.axhline(naive_guess, label="Naive Guess", c='k')
+plt.axhline(1 - naive_guess, label="Inverse Naive Guess", c='k')
+plt.gcf().set_size_inches(12, 6)
+plt.legend();
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

**sigh...** After all the effort and computational power, we're still at square one: we have yet to beat out the naive guess threshold. With PCA in play we end up performing terribly, but not terribly enough that we can guess against ourselves.

+

Let's try one last-ditch attempt using the entire data set:

+ +
+
+
+
+
+
In [8]:
+
+
+
def evaluate_gnb_full(dims):
+    pca = PCA(n_components=dims)
+    X_xform = pca.fit_transform(X)
+    
+    gnb = GaussianNB()
+    gnb.fit(X_xform, y)
+    return gnb.score(X_xform, y)
+
+dim_range = np.arange(1, 21)
+plt.plot(dim_range, [evaluate_gnb(dim) for dim in dim_range], label="Gaussian NB Accuracy")
+plt.axhline(naive_guess, label="Naive Guess", c='k')
+plt.axhline(1 - naive_guess, label="Inverse Naive Guess", c='k')
+plt.gcf().set_size_inches(12, 6)
+plt.legend();
+
+ +
+
+
+ +
+
+ + +
+ + +
+ +
+ +
+ +
+
+ +
+
+
+
+
+
+

Nothing. It is interesting to note that the graphs are almost exactly the same: This would imply again that the variables we removed earlier (all the binary classifiers) indeed have almost no predictive power. It seems this problem is high-dimensional, but with almost no data that can actually inform our decisions.

+

Summary for Day 1

After spending a couple hours with this dataset, there seems to be a fundamental issue in play: We have very high-dimensional data, and it has no bearing on our ability to actually predict customer satisfaction. This can be a huge issue: it implies that no matter what model we use, we fundamentally can't perform well. I'm sure most of this is because I'm not an experienced data scientist. Even so, we have yet to develop a strategy that can actually beat out the village idiot; so far, the bank is best off just assuming all its customers are satisfied. Hopefully more to come soon.

+ +
+
+
+
+
+
In [9]:
+
+
+
end = datetime.now()
+print("Running time: {}".format(end - start))
+
+ +
+
+
+ +
+
+ + +
+
+
Running time: 0:00:58.715714
+
+
+
+ +
+
+ +
+
+
+
+
+
+

Appendix

Code used to split the initial training data:

+
from sklearn.cross_validation import train_test_split
+data = pd.read_csv('train.csv')
+data.index = data.ID
+
+data_train, data_validate = train_test_split(
+    data, train_size=.7)
+
+data_train.to_csv('split_train.csv')
+data_validate.to_csv('split_validate.csv')
+
+ +
+
+

+ + + + + +
+
+ + +
+ +
+ + + + + + + \ No newline at end of file diff --git a/profitability-using-the-investment-formula.html b/profitability-using-the-investment-formula.html index dc09a39..78b2fc6 100644 --- a/profitability-using-the-investment-formula.html +++ b/profitability-using-the-investment-formula.html @@ -28,6 +28,17 @@ + + + @@ -107,7 +118,7 @@
In [19]:
-
import numpy as np
+
import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from Quandl import get as qget
@@ -206,7 +217,7 @@
 
In [7]:
-
fang_df = simulate_tickers(["YAHOO/FB", "YAHOO/AAPL", "YAHOO/NFLX", "YAHOO/GOOG"])
+
fang_df = simulate_tickers(["YAHOO/FB", "YAHOO/AAPL", "YAHOO/NFLX", "YAHOO/GOOG"])
 
@@ -219,7 +230,7 @@
In [8]:
-
fang_df.xs('days', axis=1, level=1).hist()
+
fang_df.xs('days', axis=1, level=1).hist()
 plt.gcf().set_size_inches(18, 8);
 plt.gcf().suptitle("Distribution of Days Until Profitability", fontsize=18);
 
@@ -693,7 +704,7 @@ kiSpNScYJEmSJElSa04wSJIkSZKk1pxgkCRJkiRJrf1/uiw9D8O0d6EAAAAASUVORK5CYII=
In [10]:
-
fang_df.xs('score', axis=1, level=1).plot()
+
fang_df.xs('score', axis=1, level=1).plot()
 plt.gcf().set_size_inches(18, 6)
 plt.gcf().suptitle("Profitability score over time", fontsize=18);
 
@@ -2332,7 +2343,7 @@ EZIkSZIkSZIkSZIk6RIpIiRJkiRJkiRJkiRJ0iX+D91jLOlExGw8AAAAAElFTkSuQmCC
In [13]:
-
cyclic_df = simulate_tickers(["YAHOO/X", "YAHOO/CAT", "YAHOO/NFLX", "YAHOO/GOOG"])
+
cyclic_df = simulate_tickers(["YAHOO/X", "YAHOO/CAT", "YAHOO/NFLX", "YAHOO/GOOG"])
 
@@ -2345,7 +2356,7 @@ EZIkSZIkSZIkSZIk6RIpIiRJkiRJkiRJkiRJ0iX+D91jLOlExGw8AAAAAElFTkSuQmCC
In [14]:
-
cyclic_df.xs('days', axis=1, level=1).hist()
+
cyclic_df.xs('days', axis=1, level=1).hist()
 plt.gcf().set_size_inches(18, 8);
 plt.gcf().suptitle("Distribution of Days Until Profitability", fontsize=18);
 
@@ -2833,7 +2844,7 @@ JEmSJEnSwGwwSJIkSZKkgdlgkCRJkiRJA7PBIEmSJEmSBvb/AfKAew4+oiojAAAAAElFTkSuQmCC
In [15]:
-
cyclic_df.xs('score', axis=1, level=1).plot()
+
cyclic_df.xs('score', axis=1, level=1).plot()
 plt.gcf().set_size_inches(18, 6)
 plt.gcf().suptitle("Profitability score over time", fontsize=18);
 
@@ -4618,7 +4629,7 @@ hUKhUCgUpor/A9AnO0PoIItTAAAAAElFTkSuQmCC
In [21]:
-
biotech_df = simulate_tickers(['YAHOO/REGN', 'YAHOO/CELG', 'GOOG/NASDAQ_BIB', 'GOOG/NASDAQ_IBB'])
+
biotech_df = simulate_tickers(['YAHOO/REGN', 'YAHOO/CELG', 'GOOG/NASDAQ_BIB', 'GOOG/NASDAQ_IBB'])
 
@@ -4631,7 +4642,7 @@ hUKhUCgUpor/A9AnO0PoIItTAAAAAElFTkSuQmCC
In [22]:
-
biotech_df.xs('days', axis=1, level=1).hist()
+
biotech_df.xs('days', axis=1, level=1).hist()
 plt.gcf().set_size_inches(18, 8);
 plt.gcf().suptitle("Distribution of Days Until Profitability", fontsize=18);
 
@@ -5184,7 +5195,7 @@ WKBTQVcCAAAAAElFTkSuQmCC
In [23]:
-
biotech_df.xs('score', axis=1, level=1).plot()
+
biotech_df.xs('score', axis=1, level=1).plot()
 plt.gcf().set_size_inches(18, 6)
 plt.gcf().suptitle("Profitability score over time", fontsize=18);
 
@@ -6951,6 +6962,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]}}); +
+
+ + +
diff --git a/tag/algorithmic-trading.html b/tag/algorithmic-trading.html index 6a509c0..7ff94f2 100644 --- a/tag/algorithmic-trading.html +++ b/tag/algorithmic-trading.html @@ -27,6 +27,17 @@ + + + diff --git a/tag/data-science.html b/tag/data-science.html index e53d805..5979b3f 100644 --- a/tag/data-science.html +++ b/tag/data-science.html @@ -27,6 +27,17 @@ + + + @@ -71,6 +82,8 @@

data science

+
Sat 05 March 2016
+
Predicting Santander Customer Happiness
Sat 23 January 2016
Cloudy in Seattle
Sat 26 December 2015
diff --git a/tag/finance.html b/tag/finance.html index ef4f116..e73589c 100644 --- a/tag/finance.html +++ b/tag/finance.html @@ -27,6 +27,17 @@ + + + diff --git a/tag/futures.html b/tag/futures.html index f53eb7c..14ac247 100644 --- a/tag/futures.html +++ b/tag/futures.html @@ -27,6 +27,17 @@ + + + diff --git a/tag/introduction.html b/tag/introduction.html index 1231103..f19a8e7 100644 --- a/tag/introduction.html +++ b/tag/introduction.html @@ -27,6 +27,17 @@ + + + diff --git a/tag/kaggle.html b/tag/kaggle.html new file mode 100644 index 0000000..bac1597 --- /dev/null +++ b/tag/kaggle.html @@ -0,0 +1,123 @@ + + + + + + + + + + + kaggle - Bradlee Speice + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+
+ + +
+
+ + + +
+
+
+
+

: #kaggle

+
+

#kaggle

+
+
+
+
+ + +
+ + + + +
+
+

kaggle

+
+
Sat 05 March 2016
+
Predicting Santander Customer Happiness
+
+
+
+ + + + + + + \ No newline at end of file diff --git a/tag/machine-learning.html b/tag/machine-learning.html new file mode 100644 index 0000000..af43ac3 --- /dev/null +++ b/tag/machine-learning.html @@ -0,0 +1,123 @@ + + + + + + + + + + + machine learning - Bradlee Speice + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+
+ + +
+
+ + + +
+
+
+
+

: #machine learning

+
+

#machine learning

+
+
+
+
+ + +
+ + + + +
+ +
+ + + + + + + \ No newline at end of file diff --git a/tag/martingale.html b/tag/martingale.html index 862e984..218f1fd 100644 --- a/tag/martingale.html +++ b/tag/martingale.html @@ -27,6 +27,17 @@ + + + diff --git a/tag/monte-carlo.html b/tag/monte-carlo.html index bcc2679..5a140c5 100644 --- a/tag/monte-carlo.html +++ b/tag/monte-carlo.html @@ -27,6 +27,17 @@ + + + diff --git a/tag/python.html b/tag/python.html index d93c6b4..d8b7ecc 100644 --- a/tag/python.html +++ b/tag/python.html @@ -27,6 +27,17 @@ + + + diff --git a/tag/simulation.html b/tag/simulation.html index f411a70..c2ae431 100644 --- a/tag/simulation.html +++ b/tag/simulation.html @@ -27,6 +27,17 @@ + + + diff --git a/tag/strategy.html b/tag/strategy.html index 7e67507..c93f26e 100644 --- a/tag/strategy.html +++ b/tag/strategy.html @@ -27,6 +27,17 @@ + + + diff --git a/tag/trading.html b/tag/trading.html index 303e0c8..7a52cef 100644 --- a/tag/trading.html +++ b/tag/trading.html @@ -27,6 +27,17 @@ + + + diff --git a/tag/weather.html b/tag/weather.html index 201e6af..c982f23 100644 --- a/tag/weather.html +++ b/tag/weather.html @@ -27,6 +27,17 @@ + + + diff --git a/tags.html b/tags.html index e551a84..0b36e45 100644 --- a/tags.html +++ b/tags.html @@ -27,6 +27,17 @@ + + + @@ -73,7 +84,7 @@
1 article
algorithmic-trading
-
2 articles
+
3 articles
data science
1 article
finance
@@ -82,6 +93,10 @@
1 article
introduction
1 article
+
kaggle
+
1 article
+
machine learning
+
1 article
martingale
1 article
monte carlo
diff --git a/testing-cramer.html b/testing-cramer.html index 7902ef6..f1ef310 100644 --- a/testing-cramer.html +++ b/testing-cramer.html @@ -28,6 +28,17 @@ + + + @@ -81,7 +92,7 @@
In [1]:
-
import requests
+
import requests
 import pandas as pd
 import numpy as np
 from dateutil import parser as dtparser
@@ -116,7 +127,7 @@
 
In [2]:
-
class ArticleListParser(HTMLParser):
+
class ArticleListParser(HTMLParser):
     """Given a web page with articles on it, parse out the article links"""
     
     articles = []
@@ -129,7 +140,7 @@
             self.articles.append(href)
             
 base_url = "http://seekingalpha.com/author/wall-street-breakfast/articles"
-article_page_urls = [base_url] + [base_url + '/{}'.format(i) for i in range(2, 20)]
+article_page_urls = [base_url] + [base_url + '/{}'.format(i) for i in range(2, 20)]
 
 global_articles = []
 for page in article_page_urls:
@@ -151,7 +162,7 @@
 
In [3]:
-
class ArticleReturnParser(HTMLParser):
+
class ArticleReturnParser(HTMLParser):
     "Given an article, parse out the futures returns in it"
     
     record_font_tags = False
@@ -238,7 +249,7 @@
 
In [4]:
-
# article_df is sorted by date, so we get the first row.
+
# article_df is sorted by date, so we get the first row.
 start_date = article_df.sort_values(by='date').iloc[0]['date'] - relativedelta(days=1)
 SPY = Quandl.get("GOOG/NYSE_SPY", trim_start=start_date)
 DJIA = Quandl.get("GOOG/AMS_DIA", trim_start=start_date)
@@ -267,7 +278,7 @@
 
In [5]:
-
def calculate_opening_ret(frame):
+
def calculate_opening_ret(frame):
     # I'm not a huge fan of the appending for loop,
     # but it's a bit verbose for a comprehension
     data = {}
@@ -380,7 +391,7 @@ S&P     0.604478  0.597015  0.811808  0.848708
 
In [6]:
-
def calculate_closing_ret(frame):
+
def calculate_closing_ret(frame):
     # I'm not a huge fan of the appending for loop,
     # but it's a bit verbose for a comprehension
     data = {}
@@ -501,6 +512,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}});
 
 
 
+    
+
+ + +
diff --git a/welcome-and-an-algorithm.html b/welcome-and-an-algorithm.html index dbc91de..32af181 100644 --- a/welcome-and-an-algorithm.html +++ b/welcome-and-an-algorithm.html @@ -28,6 +28,17 @@ + + + @@ -146,7 +157,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar
In [1]:
-
from IPython.display import display
+
from IPython.display import display
 import Quandl
 from datetime import datetime, timedelta
 
@@ -155,7 +166,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar
 lookback = 30
 d_col = 'Close'
 
-data = {tick: Quandl.get('YAHOO/{}'.format(tick))[-lookback:] for tick in tickers}
+data = {tick: Quandl.get('YAHOO/{}'.format(tick))[-lookback:] for tick in tickers}
 market = Quandl.get(market_ticker)
 
@@ -179,7 +190,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar
In [2]:
-
returns = {tick: data[tick][d_col].pct_change() for tick in tickers}
+
returns = {tick: data[tick][d_col].pct_change() for tick in tickers}
 
 display({tick: returns[tick].mean() for tick in tickers})
 
@@ -225,7 +236,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar
In [3]:
-
market_returns = market.pct_change()
+
market_returns = market.pct_change()
 
 sharpe = lambda ret: (ret.mean() - market_returns[d_col].mean()) / ret.std()
 sharpes = {tick: sharpe(returns[tick]) for tick in tickers}
@@ -273,7 +284,7 @@ to make some tweaks over the coming weeks, and do another forward test in Januar
 
In [4]:
-
drawdown = lambda ret: ret.abs().max()
+
drawdown = lambda ret: ret.abs().max()
 drawdowns = {tick: drawdown(returns[tick]) for tick in tickers}
 
 display(drawdowns)
@@ -326,7 +337,7 @@ s.t.\ \ & \vec{1} \omega = 1\\
 
In [5]:
-
import numpy as np
+
import numpy as np
 from scipy.optimize import minimize
 
 #sharpe_limit = .1
@@ -380,13 +391,13 @@ s.t.\ \ & \vec{1} \omega = 1\\
 # Optimization time!
 display(optimal.message)
 
-display("Holdings: {}".format(list(zip(tickers, optimal.x))))
+display("Holdings: {}".format(list(zip(tickers, optimal.x))))
 
 expected_return = optimal.fun * -100  # multiply by -100 to scale, and compensate for minimizing
-display("Expected Return: {:.3f}%".format(expected_return))
+display("Expected Return: {:.3f}%".format(expected_return))
 
 expected_drawdown = sum(abs(optimal.x) * dd_a) / sum(abs(optimal.x)) * 100
-display("Expected Max Drawdown: {0:.2f}%".format(expected_drawdown))
+display("Expected Max Drawdown: {0:.2f}%".format(expected_drawdown))
 
 # TODO: Calculate expected Sharpe
 
@@ -446,6 +457,20 @@ MathJax.Hub.Config({tex2jax: {inlineMath: [['$','$'], ['\(','\)']]}}); +
+
+ + +