mirror of
https://github.com/bspeice/bspeice.github.io
synced 2025-07-02 06:16:45 -04:00
Add a new Rust article!
This commit is contained in:
@ -108,13 +108,13 @@
|
||||
<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
|
||||
<span class="o">%</span><span class="k">matplotlib</span> inline
|
||||
|
||||
<span class="c"># Record how long it takes to run the notebook - I'm curious.</span>
|
||||
<span class="c1"># Record how long it takes to run the notebook - I'm curious.</span>
|
||||
<span class="kn">from</span> <span class="nn">datetime</span> <span class="k">import</span> <span class="n">datetime</span>
|
||||
<span class="n">start</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
|
||||
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s">'split_train.csv'</span><span class="p">)</span>
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">'split_train.csv'</span><span class="p">)</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">ID</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">drop</span><span class="p">([</span><span class="s">'TARGET'</span><span class="p">,</span> <span class="s">'ID'</span><span class="p">,</span> <span class="s">'ID.1'</span><span class="p">],</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">drop</span><span class="p">([</span><span class="s1">'TARGET'</span><span class="p">,</span> <span class="s1">'ID'</span><span class="p">,</span> <span class="s1">'ID.1'</span><span class="p">],</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">TARGET</span>
|
||||
</pre></div>
|
||||
|
||||
@ -244,13 +244,13 @@
|
||||
<div class="prompt input_prompt">In [5]:</div>
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="c"># First we need to `binarize` the data to 0-1; some of the labels are {0, 1},</span>
|
||||
<span class="c"># some are {0, 3}, etc.</span>
|
||||
<div class=" highlight hl-ipython3"><pre><span class="c1"># First we need to `binarize` the data to 0-1; some of the labels are {0, 1},</span>
|
||||
<span class="c1"># some are {0, 3}, etc.</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.preprocessing</span> <span class="k">import</span> <span class="n">binarize</span>
|
||||
<span class="n">X_bin</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">b_class</span><span class="p">])</span>
|
||||
|
||||
<span class="n">accuracy</span> <span class="o">=</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">X_bin</span><span class="p">[:,</span><span class="n">i</span><span class="p">]</span> <span class="o">==</span> <span class="n">y</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">b_class</span><span class="p">))]</span>
|
||||
<span class="n">acc_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s">"Accuracy"</span><span class="p">:</span> <span class="n">accuracy</span><span class="p">},</span> <span class="n">index</span><span class="o">=</span><span class="n">b_class</span><span class="p">)</span>
|
||||
<span class="n">acc_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">"Accuracy"</span><span class="p">:</span> <span class="n">accuracy</span><span class="p">},</span> <span class="n">index</span><span class="o">=</span><span class="n">b_class</span><span class="p">)</span>
|
||||
<span class="n">acc_df</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
|
||||
@ -334,9 +334,9 @@
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">unsat</span> <span class="o">=</span> <span class="n">y</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s">"Satisfied customers: {}; Unsatisfied customers: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">y</span><span class="p">)</span> <span class="o">-</span> <span class="n">unsat</span><span class="p">,</span> <span class="n">unsat</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Satisfied customers: {}; Unsatisfied customers: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">y</span><span class="p">)</span> <span class="o">-</span> <span class="n">unsat</span><span class="p">,</span> <span class="n">unsat</span><span class="p">))</span>
|
||||
<span class="n">naive_guess</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">y</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">y</span><span class="p">)))</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s">"Naive guess accuracy: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Naive guess accuracy: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
@ -397,9 +397,9 @@ Naive guess accuracy: 0.9608561656706882
|
||||
<span class="k">return</span> <span class="n">gnb</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
|
||||
<span class="n">dim_range</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">21</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s">"Gaussian NB Accuracy"</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">"Naive Guess"</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s">'k'</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">"Inverse Naive Guess"</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s">'k'</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s2">"Gaussian NB Accuracy"</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"Naive Guess"</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">'k'</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"Inverse Naive Guess"</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">'k'</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">legend</span><span class="p">();</span>
|
||||
</pre></div>
|
||||
@ -683,9 +683,9 @@ rkJggg==
|
||||
<span class="k">return</span> <span class="n">gnb</span><span class="o">.</span><span class="n">score</span><span class="p">(</span><span class="n">X_xform</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
|
||||
<span class="n">dim_range</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">21</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s">"Gaussian NB Accuracy"</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">"Naive Guess"</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s">'k'</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s">"Inverse Naive Guess"</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s">'k'</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">dim_range</span><span class="p">,</span> <span class="p">[</span><span class="n">evaluate_gnb</span><span class="p">(</span><span class="n">dim</span><span class="p">)</span> <span class="k">for</span> <span class="n">dim</span> <span class="ow">in</span> <span class="n">dim_range</span><span class="p">],</span> <span class="n">label</span><span class="o">=</span><span class="s2">"Gaussian NB Accuracy"</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"Naive Guess"</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">'k'</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">axhline</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">naive_guess</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"Inverse Naive Guess"</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">'k'</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">gcf</span><span class="p">()</span><span class="o">.</span><span class="n">set_size_inches</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span> <span class="mi">6</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">legend</span><span class="p">();</span>
|
||||
</pre></div>
|
||||
@ -961,7 +961,7 @@ rkJggg==
|
||||
<div class="inner_cell">
|
||||
<div class="input_area">
|
||||
<div class=" highlight hl-ipython3"><pre><span class="n">end</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s">"Running time: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">end</span> <span class="o">-</span> <span class="n">start</span><span class="p">))</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Running time: {}"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">end</span> <span class="o">-</span> <span class="n">start</span><span class="p">))</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
@ -990,14 +990,14 @@ rkJggg==
|
||||
<div class="text_cell_render border-box-sizing rendered_html">
|
||||
<h1 id="Appendix">Appendix<a class="anchor-link" href="#Appendix">¶</a></h1><p>Code used to split the initial training data:</p>
|
||||
<div class="highlight"><pre><span class="kn">from</span> <span class="nn">sklearn.cross_validation</span> <span class="kn">import</span> <span class="n">train_test_split</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s">'train.csv'</span><span class="p">)</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">'train.csv'</span><span class="p">)</span>
|
||||
<span class="n">data</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">ID</span>
|
||||
|
||||
<span class="n">data_train</span><span class="p">,</span> <span class="n">data_validate</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span>
|
||||
<span class="n">data</span><span class="p">,</span> <span class="n">train_size</span><span class="o">=.</span><span class="mi">7</span><span class="p">)</span>
|
||||
|
||||
<span class="n">data_train</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s">'split_train.csv'</span><span class="p">)</span>
|
||||
<span class="n">data_validate</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s">'split_validate.csv'</span><span class="p">)</span>
|
||||
<span class="n">data_train</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">'split_train.csv'</span><span class="p">)</span>
|
||||
<span class="n">data_validate</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">'split_validate.csv'</span><span class="p">)</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
|
Reference in New Issue
Block a user