Add a new Rust article!

This commit is contained in:
Bradlee Speice
2016-10-22 22:29:18 -04:00
parent f9e8c08491
commit 6910e70b66
26 changed files with 2728 additions and 1520 deletions

View File

@ -108,13 +108,13 @@
<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
<span class="o">%</span><span class="k">matplotlib</span> inline
<span class="c"># Record how long it takes to run the notebook - I&#39;m curious.</span>
<span class="c1"># Record how long it takes to run the notebook - I&#39;m curious.</span>
<span class="kn">from</span> <span class="nn">datetime</span> <span class="k">import</span> <span class="n">datetime</span>
<span class="n">start</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s">&#39;split_train.csv&#39;</span><span class="p">)</span>
<span class="n">dataset</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">&#39;split_train.csv&#39;</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">ID</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">drop</span><span class="p">([</span><span class="s">&#39;TARGET&#39;</span><span class="p">,</span> <span class="s">&#39;ID&#39;</span><span class="p">,</span> <span class="s">&#39;ID.1&#39;</span><span class="p">],</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">X</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">drop</span><span class="p">([</span><span class="s1">&#39;TARGET&#39;</span><span class="p">,</span> <span class="s1">&#39;ID&#39;</span><span class="p">,</span> <span class="s1">&#39;ID.1&#39;</span><span class="p">],</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">y</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">TARGET</span>
</pre></div>
@ -244,13 +244,13 @@
<div class="prompt input_prompt">In&nbsp;[5]:</div>
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span class="c"># First we need to `binarize` the data to 0-1; some of the labels are {0, 1},</span>
<span class="c"># some are {0, 3}, etc.</span>
<div class=" highlight hl-ipython3"><pre><span class="c1"># First we need to `binarize` the data to 0-1; some of the labels are {0, 1},</span>
<span class="c1"># some are {0, 3}, etc.</span>
<span class="kn">from</span> <span class="nn">sklearn.preprocessing</span> <span class="k">import</span> <span class="n">binarize</span>
<span class="n">X_bin</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">b_class</span><span class="p">])</span>
<span class="n">accuracy</span> <span class="o">=</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">X_bin</span><span class="p">[:,</span><span class="n">i</span><span class="p">]</span> <span class="o">==</span> <span class="n">y</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">b_class</span><span class="p">))]</span>
<span class="n">acc_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s">&quot;Accuracy&quot;</span><span class="p">:</span> <span class="n">accuracy</span><span class="p">},</span> <span class="n">index</span><span class="o">=</span><span class="n">b_class</span><span class="p">)</span>
<span class="n">acc_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;Accuracy&quot;</span><span class="p">:</span> <span class="n">accuracy</span><span class="p">},</span> <span class="n">index</span><span class="o">=</span><span class="n">b_class</span><span class="p">)</span>
<span class="n">acc_df</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span>
</pre></div>
@ -334,9 +334,9 @@
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span class="n">unsat</span> <span class="o">=</span> <span class="n">y</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="s">&quot;Satisfied customers: {}; Unsatisfied customers: {}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">y</span><span class="p">)</span> <span class="o">-</span> <span class="n">unsat</span><span class="p">,</span> <span class="n">unsat</span><span class="p">))</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Satisfied customers: {}; Unsatisfied customers: {}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">y</span><span class="p">)</span> <span class="o">-</span> <span class="n">unsat</span><span class="p">,</span> <span class="n">unsat</span><span class="p">))</span>
<span class="n">naive_guess</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">y</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">y</span><span class="p">)))</span>
<span class="nb">print</span><span class="p">(</span><span class="s">&quot;Naive guess accuracy: {}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">))</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Naive guess accuracy: {}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">))</span>
</pre></div>
</div>
@ -397,9 +397,9 @@ Naive guess accuracy: 0.9608561656706882
<span class="k">return</span> <span class="n">gnb</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">dim_range</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">21</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;Gaussian NB Accuracy&quot;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s">&#39;k&#39;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;Inverse Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s">&#39;k&#39;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Gaussian NB Accuracy&quot;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Inverse Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">legend</span><span class="p">();</span>
</pre></div>
@ -683,9 +683,9 @@ rkJggg==
<span class="k">return</span> <span class="n">gnb</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
<span class="n">dim_range</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">21</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;Gaussian NB Accuracy&quot;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s">&#39;k&#39;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">&quot;Inverse Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s">&#39;k&#39;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Gaussian NB Accuracy&quot;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">&quot;Inverse Naive Guess&quot;</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;k&#39;</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span>
<span class="n">plt</span><span class="o">.</span><span class="n">legend</span><span class="p">();</span>
</pre></div>
@ -961,7 +961,7 @@ rkJggg==
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span class="n">end</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
<span class="nb">print</span><span class="p">(</span><span class="s">&quot;Running time: {}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">end</span> <span class="o">-</span> <span class="n">start</span><span class="p">))</span>
<span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Running time: {}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">end</span> <span class="o">-</span> <span class="n">start</span><span class="p">))</span>
</pre></div>
</div>
@ -990,14 +990,14 @@ rkJggg==
<div class="text_cell_render border-box-sizing rendered_html">
<h1 id="Appendix">Appendix<a class="anchor-link" href="#Appendix">&#182;</a></h1><p>Code used to split the initial training data:</p>
<div class="highlight"><pre><span class="kn">from</span> <span class="nn">sklearn.cross_validation</span> <span class="kn">import</span> <span class="n">train_test_split</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s">&#39;train.csv&#39;</span><span class="p">)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">&#39;train.csv&#39;</span><span class="p">)</span>
<span class="n">data</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">ID</span>
<span class="n">data_train</span><span class="p">,</span> <span class="n">data_validate</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span>
<span class="n">data</span><span class="p">,</span> <span class="n">train_size</span><span class="o">=.</span><span class="mi">7</span><span class="p">)</span>
<span class="n">data_train</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s">&#39;split_train.csv&#39;</span><span class="p">)</span>
<span class="n">data_validate</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s">&#39;split_validate.csv&#39;</span><span class="p">)</span>
<span class="n">data_train</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;split_train.csv&#39;</span><span class="p">)</span>
<span class="n">data_validate</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;split_validate.csv&#39;</span><span class="p">)</span>
</pre></div>
</div>