<?xml version="1.0" encoding="UTF-8"?>
<rss version='2.0' xmlns:dc="http://purl.org/dc/elements/1.1/"
  xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Julio Cárdenas-Rodríguez</title>
    <description>I am data scientist scientist, passionate about helping people using mathematics, programming and chemistry</description>
    <link>https://jdatascientist.silvrback.com/feed</link>
    <atom:link href="https://jdatascientist.silvrback.com/feed" rel="self" type="application/rss+xml"/>
    <category domain="jdatascientist.silvrback.com">Content Management/Blog</category>
    <language>en-us</language>
      <pubDate>Thu, 08 Feb 2018 11:43:56 -0700</pubDate>
    <managingEditor>jdatascientist@gmail.com (Julio Cárdenas-Rodríguez)</managingEditor>
      <item>
        <guid>https://jdatascientist.silvrback.com/a-simple-code-to-calculate-the-performance-metrics-of-binary-classifier#36602</guid>
          <pubDate>Thu, 08 Feb 2018 11:43:56 -0700</pubDate>
        <link>https://jdatascientist.silvrback.com/a-simple-code-to-calculate-the-performance-metrics-of-binary-classifier</link>
        <title>A simple code to calculate the performance metrics of a binary classifier
    </title>
        <description>Specificity, Sensitivity, PPV, and NPV</description>
        <content:encoded><![CDATA[<h3 id="1-code">1. Code</h3>
<div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>

<span class="k">def</span> <span class="nf">perf_metrics_2X2</span><span class="p">(</span><span class="n">yobs</span><span class="p">,</span> <span class="n">yhat</span><span class="p">):</span>
    <span class="sd">&quot;&quot;&quot;</span>
<span class="sd">    Returns the specificity, sensitivity, positive predictive value, and negeative predictive value </span>
<span class="sd">    of a 2X2 table.</span>

<span class="sd">    where:</span>
<span class="sd">    0 = negative case</span>
<span class="sd">    1 = positive case</span>

<span class="sd">    Parameters</span>
<span class="sd">    ----------</span>
<span class="sd">    yobs :  array of positive and negative ``observed`` cases</span>
<span class="sd">    yhat : array of positive and negative ``predicted`` cases</span>

<span class="sd">    Returns</span>
<span class="sd">    -------</span>
<span class="sd">    sensitivity  = TP / (TP+FN)</span>
<span class="sd">    specificity  = TN / (TN+FP)</span>
<span class="sd">    pos_pred_val = TP/ (TP+FP)</span>
<span class="sd">    neg_pred_val = TN/ (TN+FN)</span>

<span class="sd">    Author: Julio Cardenas-Rodriguez</span>
<span class="sd">    &quot;&quot;&quot;</span>
    <span class="n">TP</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="kp">sum</span><span class="p">(</span>  <span class="n">yobs</span><span class="p">[</span><span class="n">yobs</span><span class="o">==</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="n">yhat</span><span class="p">[</span><span class="n">yobs</span><span class="o">==</span><span class="mi">1</span><span class="p">]</span> <span class="p">)</span>
    <span class="n">TN</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="kp">sum</span><span class="p">(</span>  <span class="n">yobs</span><span class="p">[</span><span class="n">yobs</span><span class="o">==</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="n">yhat</span><span class="p">[</span><span class="n">yobs</span><span class="o">==</span><span class="mi">0</span><span class="p">]</span> <span class="p">)</span>
    <span class="n">FP</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="kp">sum</span><span class="p">(</span>  <span class="n">yobs</span><span class="p">[</span><span class="n">yobs</span><span class="o">==</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="n">yhat</span><span class="p">[</span><span class="n">yobs</span><span class="o">==</span><span class="mi">0</span><span class="p">]</span> <span class="p">)</span>
    <span class="n">FN</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="kp">sum</span><span class="p">(</span>  <span class="n">yobs</span><span class="p">[</span><span class="n">yobs</span><span class="o">==</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="n">yhat</span><span class="p">[</span><span class="n">yobs</span><span class="o">==</span><span class="mi">1</span><span class="p">]</span> <span class="p">)</span>

    <span class="n">sensitivity</span>  <span class="o">=</span> <span class="n">TP</span> <span class="o">/</span> <span class="p">(</span><span class="n">TP</span><span class="o">+</span><span class="n">FN</span><span class="p">)</span>
    <span class="n">specificity</span>  <span class="o">=</span> <span class="n">TN</span> <span class="o">/</span> <span class="p">(</span><span class="n">TN</span><span class="o">+</span><span class="n">FP</span><span class="p">)</span>
    <span class="n">pos_pred_val</span> <span class="o">=</span> <span class="n">TP</span><span class="o">/</span> <span class="p">(</span><span class="n">TP</span><span class="o">+</span><span class="n">FP</span><span class="p">)</span>
    <span class="n">neg_pred_val</span> <span class="o">=</span> <span class="n">TN</span><span class="o">/</span> <span class="p">(</span><span class="n">TN</span><span class="o">+</span><span class="n">FN</span><span class="p">)</span>

    <span class="k">return</span> <span class="n">sensitivity</span><span class="p">,</span> <span class="n">specificity</span><span class="p">,</span> <span class="n">pos_pred_val</span><span class="p">,</span> <span class="n">neg_pred_val</span>
</pre></div>
<h3 id="2-test">2. Test</h3>
<div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">pandas</span> <span class="kn">as</span> <span class="nn">pd</span>
<span class="n">y</span>     <span class="o">=</span>    <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">])</span>
<span class="n">y_hat</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">])</span>

<span class="n">metrics</span>  <span class="o">=</span>  <span class="n">perf_metrics_2X2</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">y_hat</span><span class="p">)</span>

<span class="k">print</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span> <span class="nb">dict</span><span class="p">(</span> <span class="n">Metric</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;Specificity&#39;</span><span class="p">,</span> <span class="s1">&#39;Sensitivity&#39;</span><span class="p">,</span> <span class="s1">&#39;PPV&#39;</span><span class="p">,</span><span class="s1">&#39;NPV&#39;</span><span class="p">],</span> 
                          <span class="n">Performance</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">metrics</span><span class="p">,</span><span class="mi">3</span><span class="p">))))</span>

        <span class="n">Metric</span>  <span class="n">Performance</span>
<span class="mi">0</span>  <span class="n">Specificity</span>        <span class="mf">0.800</span>
<span class="mi">1</span>  <span class="n">Sensitivity</span>        <span class="mf">0.600</span>
<span class="mi">2</span>          <span class="n">PPV</span>        <span class="mf">0.667</span>
<span class="mi">3</span>          <span class="n">NPV</span>        <span class="mf">0.750</span>
</pre></div>]]></content:encoded>
      </item>
      <item>
        <guid>https://jdatascientist.silvrback.com/how-to-create-an-alias-in-windows-powershell-to-launch-jupyter-and-python#35690</guid>
          <pubDate>Fri, 01 Dec 2017 09:14:38 -0700</pubDate>
        <link>https://jdatascientist.silvrback.com/how-to-create-an-alias-in-windows-powershell-to-launch-jupyter-and-python</link>
        <title>How to create an alias in Windows Powershell to launch Jupyter and Python</title>
        <description></description>
        <content:encoded><![CDATA[<h3 id="why-am-i-doing-this">- Why am I doing this?</h3>

<p>Running <code>Jupyter</code> and <code>PIP</code> becomes painful if you don&#39;t have admin privileges for your computer; A solution to this is to enter the entire path to <code>PIP</code> and/or <code>Jupyter</code> but it can very time consuming and inefficient. A way around this is to create and alias for the terminal to interpret a short command as if you were entering the entire path to <code>PIP</code> and/or <code>Jupyter</code>.</p>

<h3 id="1-find-the-location-of-your-executables">1. Find the location of your executables</h3>

<p>These are the  locations for <code>Python</code>, <code>pip</code>, and <code>jupyter</code> in my computer:</p>
<div class="highlight"><pre><span></span>&gt; ~<span class="se">\A</span>ppData<span class="se">\L</span>ocal<span class="se">\C</span>ontinuum<span class="se">\A</span>naconda3<span class="se">\p</span>ython.exe
&gt;~<span class="se">\A</span>ppData<span class="se">\L</span>ocal<span class="se">\C</span>ontinuum<span class="se">\A</span>naconda3<span class="se">\S</span>cripts<span class="se">\p</span>ip.exe
&gt; ~<span class="se">\A</span>ppData<span class="se">\L</span>ocal<span class="se">\C</span>ontinuum<span class="se">\A</span>naconda3<span class="se">\S</span>cripts<span class="se">\j</span>upyter-notebook.exe
</pre></div>
<h3 id="2-create-a-powershell-profile">2. Create a PowerShell profile</h3>

<p>Just type the following in your PowerShell terminal:</p>
<div class="highlight"><pre><span></span>&gt; New-Item -Type file -Force <span class="nv">$profile</span>
</pre></div>
<h3 id="3-find-the-location-of-your-profile-file">3. Find the location of your profile file</h3>

<p>For my computer the location is the following:</p>
<div class="highlight"><pre><span></span>&gt; <span class="se">\D</span>ocuments<span class="se">\W</span>indowsPowerShell<span class="se">\M</span>icrosoft.PowerShell_profile.ps1
</pre></div>
<h3 id="4-edit-your-profile-file-with-the-following-aliases">4. Edit your profile file with the following aliases</h3>
<div class="highlight"><pre><span></span>Set-Alias py   <span class="s2">&quot;~\AppData\Local\Continuum\Anaconda3\python.exe&quot;</span>
Set-Alias pip  <span class="s2">&quot;~\AppData\Local\Continuum\Anaconda3\Scripts\pip.exe&quot;</span>
Set-Alias jup  <span class="s2">&quot;~\AppData\Local\Continuum\Anaconda3\Scripts\jupyter-notebook.exe&quot;</span>
</pre></div>
<p>Remember that the path above are specific for my computer, you should update according to the file structure in your own computer. </p>

<h3 id="5-close-powershell-and-open-a-new-session">5. Close PowerShell and open a new session</h3>

<h3 id="6-test-the-aliases-in-powershell">6. Test the aliases in PowerShell</h3>

<p>For example, type the following to launch a Jupyter notebook in any directory:</p>
<div class="highlight"><pre><span></span>&gt; <span class="nb">cd</span> Documents
&gt; ezj 
</pre></div>]]></content:encoded>
      </item>
      <item>
        <guid>https://jdatascientist.silvrback.com/tuning-scikit-learn-parameters-using-optimization-instead-of-random-search#35366</guid>
          <pubDate>Mon, 13 Nov 2017 11:01:49 -0700</pubDate>
        <link>https://jdatascientist.silvrback.com/tuning-scikit-learn-parameters-using-optimization-instead-of-random-search</link>
        <title>Tuning scikit-learn parameters using optimization instead of random search</title>
        <description>Work smart not hard</description>
        <content:encoded><![CDATA[<p><img alt="Silvrback blog image_righ" class="sb_float" src="https://static1.squarespace.com/static/50baa49de4b0e51d69257e33/50baae28e4b078f69f31dbc5/50baae2ce4b078f69f31dc7d/1354411582995/" /></p>

<h2 id="the-problem">The problem</h2>

<p>On previous posts I described how to perform non-linear curve fitting in Ptyhon and Julia. At their core non-linear and linear curve fitting (or regression) are optimization problems in which we find the parameters that minimize an objective function. The entire field of mathematical optimization is concerned with finding the most efficient and accurate methods to minimize such functions.<br>
On the other hand, the current standard to find the optimal values for the parameters of the algorithms used in machine learning is to perform a <code>random search</code> or a <code>grid search</code> throughout the space of the possible values that such parameters can take. These approaches have several limitations:</p>

<ol>
<li>They are not computationally efficient for large data sets</li>
<li>the parameters tested are not informed in any way by the results from the previous step.</li>
</ol>

<h2 id="the-solution">The solution</h2>

<p>However, the implementation of optimization-driven approaches for <code>scikit-learn</code> is not a trivial matter. Thankfully, <a href="http://www.eng.uwaterloo.ca/%7Ejbergstr/">James Bergstra</a> and other brave souls have created <code>hyperopt</code>, a <a href="http://hyperopt.github.io/hyperopt/">Python library</a> for optimizing over awkward search spaces with real-valued, discrete, and conditional dimensions, which makes it ideal for tuning hyper parameters with <code>scikit-learn</code>.</p>

<h2 id="what-we-need">What we need</h2>

<p>In order to tune the parameters of <code>scikit-learn</code> estimator, <code>hyperopt</code> needs the following:<br>
1. Data<br>
2. The objective function to be minimized<br>
3. The search space from which to sample the parameters<br>
4. The algorithm to be used for the minimization of the objective function, and the number of time the optimization should be run</p>

<h2 id="python-implementation">Python implementation</h2>
<div class="highlight"><pre><span></span><span class="c1">#modules</span>
<span class="kn">from</span> <span class="nn">sklearn.metrics.regression</span> <span class="kn">import</span> <span class="n">mean_absolute_error</span> <span class="k">as</span> <span class="n">mae</span>
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">make_scorer</span>
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">train_test_split</span><span class="p">,</span> <span class="n">cross_val_score</span>
<span class="kn">from</span> <span class="nn">sklearn.ensemble</span> <span class="kn">import</span> <span class="n">GradientBoostingRegressor</span>
<span class="kn">from</span> <span class="nn">hyperopt</span> <span class="kn">import</span> <span class="n">hp</span><span class="p">,</span> <span class="n">fmin</span><span class="p">,</span> <span class="n">tpe</span>
<span class="kn">from</span> <span class="nn">hyperopt.pyll</span> <span class="kn">import</span> <span class="n">scope</span>

<span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">as</span> <span class="nn">np</span>

<span class="c1"># hyperopt object for </span>
<span class="n">scope</span><span class="o">.</span><span class="n">define</span><span class="p">(</span><span class="n">GradientBoostingRegressor</span><span class="p">)</span>    

<span class="k">def</span> <span class="nf">train_GradientBoostingRegressor</span><span class="p">(</span><span class="n">Xdata</span><span class="p">,</span> <span class="n">Ydata</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">&#39;ls&#39;</span> <span class="p">,</span><span class="n">alpha</span> <span class="o">=</span> <span class="mf">0.50</span><span class="p">,</span> <span class="n">cv</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">n_steps</span> <span class="o">=</span> <span class="mi">10</span><span class="p">):</span>
    <span class="sd">&quot;&quot;&quot;</span>
<span class="sd">    Trains a Gradient Boosting Regressor using bayesian optimization </span>
<span class="sd">    Parameters</span>
<span class="sd">    ----------</span>
<span class="sd">    Xdata: numpy array of size KxN and composed of floating and/or integers</span>
<span class="sd">    Ydata: numpy array of size K (1D array) of floating</span>
<span class="sd">    loss: loss function to be optimized.</span>
<span class="sd">    alpha: quantile for the quantile and hubber loss; floating &lt; 1.0 and &gt; 0.0</span>
<span class="sd">    CV: K-fold cross-validation size for the training procedure</span>
<span class="sd">    n_steps: Number of times the `hyperopt` mimizer will run to find the optimal parameters</span>

<span class="sd">    Returns</span>
<span class="sd">    -------</span>
<span class="sd">    Regressor : A sckiki-learn obkect with the trained Gradient Boosting Regressor</span>

<span class="sd">    &quot;&quot;&quot;</span>
    <span class="c1">#split data</span>
    <span class="n">X_train</span><span class="p">,</span> <span class="n">X_test</span><span class="p">,</span> <span class="n">y_train</span><span class="p">,</span> <span class="n">y_test</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span><span class="n">Xdata</span><span class="p">,</span> <span class="n">Ydata</span><span class="p">,</span> <span class="n">test_size</span><span class="o">=.</span><span class="mi">33</span><span class="p">,</span> <span class="n">random_state</span> <span class="o">=</span> <span class="mi">42</span><span class="p">)</span>

    <span class="c1"># create and objective function</span>
    <span class="k">def</span> <span class="nf">objective_function_regression</span><span class="p">(</span><span class="n">estimator</span><span class="p">):</span>
        <span class="n">mae_array</span> <span class="o">=</span> <span class="n">cross_val_score</span><span class="p">(</span> <span class="n">estimator</span><span class="p">,</span> <span class="n">X_train</span><span class="p">,</span> <span class="n">y_train</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span> <span class="n">cv</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> 
                                    <span class="n">scoring</span> <span class="o">=</span> <span class="n">make_scorer</span><span class="p">(</span><span class="n">mae</span><span class="p">)</span> <span class="p">)</span>
        <span class="k">return</span> <span class="n">mae_array</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>


    <span class="c1"># search space</span>
    <span class="n">n_estimators</span>     <span class="o">=</span> <span class="n">hp</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="s1">&#39;n_estimators&#39;</span><span class="p">,</span><span class="mi">1000</span><span class="p">)</span> 
    <span class="n">learning_rate</span>    <span class="o">=</span> <span class="n">hp</span><span class="o">.</span><span class="n">loguniform</span><span class="p">(</span><span class="s1">&#39;learning_rate&#39;</span><span class="p">,</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span><span class="mi">1</span><span class="p">)</span>
    <span class="n">max_depth</span>        <span class="o">=</span> <span class="n">hp</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="s1">&#39;max_depth&#39;</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
    <span class="n">max_features</span>     <span class="o">=</span> <span class="n">hp</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="s1">&#39;max_features&#39;</span><span class="p">,</span><span class="n">X_train</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
    <span class="n">min_samples_leaf</span> <span class="o">=</span> <span class="n">hp</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="s1">&#39;min_samples_leaf&#39;</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
    <span class="n">criterion</span>        <span class="o">=</span> <span class="n">hp</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="s1">&#39;criterion&#39;</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;friedman_mse&#39;</span><span class="p">])</span>

    <span class="c1"># model / estimator to be optimized</span>
    <span class="n">est0</span> <span class="o">=</span> <span class="p">(</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">scope</span><span class="o">.</span><span class="n">GradientBoostingRegressor</span><span class="p">(</span> <span class="n">loss</span> <span class="o">=</span> <span class="n">loss</span><span class="p">,</span>
                                                 <span class="n">alpha</span> <span class="o">=</span> <span class="n">alpha</span><span class="p">,</span>
                                                 <span class="n">n_estimators</span>  <span class="o">=</span> <span class="n">n_estimators</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span>
                                                <span class="n">learning_rate</span> <span class="o">=</span> <span class="n">learning_rate</span><span class="p">,</span>
                                                <span class="n">max_depth</span> <span class="o">=</span> <span class="n">max_depth</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span>
                                                <span class="n">max_features</span> <span class="o">=</span> <span class="n">max_features</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span>
                                                <span class="n">min_samples_leaf</span> <span class="o">=</span> <span class="n">min_samples_leaf</span> <span class="o">+</span> <span class="mi">1</span><span class="p">,</span>
                                                <span class="n">criterion</span> <span class="o">=</span> <span class="n">criterion</span><span class="p">,</span>
                                                <span class="n">random_state</span><span class="o">=</span> <span class="mi">101</span><span class="p">)</span> 
                  <span class="p">)</span>  

    <span class="c1"># search space</span>
    <span class="n">search_space_regression</span> <span class="o">=</span> <span class="n">hp</span><span class="o">.</span><span class="n">pchoice</span><span class="p">(</span><span class="s1">&#39;estimator&#39;</span><span class="p">,</span> <span class="p">[</span><span class="n">est0</span><span class="p">])</span>

    <span class="k">print</span><span class="p">(</span><span class="s1">&#39;--&#39;</span><span class="o">*</span><span class="mi">20</span><span class="p">)</span>
    <span class="k">print</span><span class="p">(</span><span class="s1">&#39;Finding optimal parameters&#39;</span><span class="p">)</span>
    <span class="c1"># perform the optimization</span>
    <span class="n">best</span> <span class="o">=</span> <span class="n">fmin</span><span class="p">(</span><span class="n">fn</span><span class="o">=</span> <span class="n">objective_function_regression</span><span class="p">,</span>
                             <span class="n">space</span><span class="o">=</span> <span class="n">search_space_regression</span><span class="p">,</span>
                              <span class="n">algo</span> <span class="o">=</span> <span class="n">tpe</span><span class="o">.</span><span class="n">suggest</span><span class="p">,</span> 
                             <span class="n">max_evals</span> <span class="o">=</span> <span class="n">n_steps</span><span class="p">,</span>
                             <span class="n">verbose</span> <span class="o">=</span> <span class="mi">0</span>  <span class="c1"># The number of iterations</span>
                             <span class="p">)</span>

    <span class="c1"># Allocate optimized parameters and apply to test data set</span>
    <span class="n">Regressor</span> <span class="o">=</span> <span class="n">GradientBoostingRegressor</span><span class="p">(</span> <span class="n">loss</span> <span class="o">=</span> <span class="n">loss</span><span class="p">,</span> <span class="n">alpha</span> <span class="o">=</span> <span class="n">alpha</span><span class="p">,</span>
                          <span class="n">learning_rate</span> <span class="o">=</span> <span class="n">best</span><span class="p">[</span><span class="s1">&#39;learning_rate&#39;</span><span class="p">],</span>
                          <span class="n">max_depth</span> <span class="o">=</span> <span class="n">best</span><span class="p">[</span><span class="s1">&#39;max_depth&#39;</span><span class="p">],</span>
                          <span class="n">max_features</span> <span class="o">=</span> <span class="n">best</span><span class="p">[</span><span class="s1">&#39;max_features&#39;</span><span class="p">],</span>
                          <span class="n">min_samples_leaf</span> <span class="o">=</span> <span class="n">best</span><span class="p">[</span><span class="s1">&#39;min_samples_leaf&#39;</span><span class="p">],</span>
                          <span class="n">n_estimators</span> <span class="o">=</span> <span class="n">best</span><span class="p">[</span><span class="s1">&#39;n_estimators&#39;</span><span class="p">],</span>
                          <span class="n">random_state</span> <span class="o">=</span> <span class="mi">101</span>
                                       <span class="p">)</span>
    <span class="c1"># fit</span>
    <span class="n">Regressor</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X_train</span><span class="p">,</span><span class="n">y_train</span><span class="p">)</span>

    <span class="c1">#evaluate</span>
    <span class="n">yhat</span> <span class="o">=</span> <span class="n">Regressor</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">X_test</span><span class="p">)</span> <span class="p">;</span>
    <span class="n">error_pct</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">yhat</span> <span class="o">-</span> <span class="n">y_test</span><span class="p">)),</span> <span class="mi">2</span><span class="p">)</span>
    <span class="c1">#print(&#39;--&#39;*20)</span>

    <span class="k">print</span><span class="p">(</span>  
        <span class="s2">&quot;{} {}&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s1">&#39;The Median Abs. Error (%) for the test set is :&#39;</span><span class="p">,</span> <span class="n">error_pct</span><span class="p">)</span> 
          <span class="p">)</span>

    <span class="k">return</span> <span class="n">Regressor</span><span class="p">,</span> <span class="n">y_test</span><span class="p">,</span> <span class="n">yhat</span>
</pre></div>
<p>Now, we can use the Boston housing data set to test our <em>beautiful</em> code:</p>
<div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">sklearn.datasets</span> <span class="kn">import</span> <span class="n">load_boston</span>
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">train_test_split</span>

<span class="n">D</span><span class="o">=</span> <span class="n">load_boston</span><span class="p">()</span>

<span class="n">R1</span><span class="p">,</span> <span class="n">ytest1</span><span class="p">,</span> <span class="n">yhat1</span> <span class="o">=</span> <span class="n">train_GradientBoostingRegressor</span><span class="p">(</span> <span class="n">D</span><span class="o">.</span><span class="n">data</span><span class="p">,</span> 
                                                                                                                     <span class="n">D</span><span class="o">.</span><span class="n">target</span><span class="p">,</span>  
                                                                                                                     <span class="n">loss</span><span class="o">=</span><span class="s1">&#39;quantile&#39;</span><span class="p">,</span> 
                                                                                                                     <span class="n">alpha</span> <span class="o">=</span> <span class="mf">0.50</span><span class="p">,</span> 
                                                                                                                     <span class="n">n_steps</span> <span class="o">=</span> <span class="mi">50</span><span class="p">)</span>
</pre></div>]]></content:encoded>
      </item>
  </channel>
</rss>