From c3fac15bef62513a80abd7e5a722c0889449c12e Mon Sep 17 00:00:00 2001 From: Morten Hjorth-Jensen Date: Sat, 23 Nov 2024 22:40:00 +0100 Subject: [PATCH] update --- doc/pub/week48/html/week48-bs.html | 102 +++-- doc/pub/week48/html/week48-reveal.html | 76 ---- doc/pub/week48/html/week48-solarized.html | 77 ---- doc/pub/week48/html/week48.html | 77 ---- doc/pub/week48/ipynb/ipynb-week48-src.tar.gz | Bin 823660 -> 823660 bytes doc/pub/week48/ipynb/week48.ipynb | 380 ++++++++----------- doc/src/week48/week48.do.txt | 56 --- 7 files changed, 204 insertions(+), 564 deletions(-) diff --git a/doc/pub/week48/html/week48-bs.html b/doc/pub/week48/html/week48-bs.html index 0a899735c..b2302ceda 100644 --- a/doc/pub/week48/html/week48-bs.html +++ b/doc/pub/week48/html/week48-bs.html @@ -111,7 +111,6 @@ 2, None, 'xgboost-extreme-gradient-boosting'), - ('Regression Case', 2, None, 'regression-case'), ('Xgboost on the Cancer Data', 2, None, @@ -300,56 +299,55 @@
  • Gradient Boosting, Examples of Regression
  • Gradient Boosting, Classification Example
  • XGBoost: Extreme Gradient Boosting
  • -
  • Regression Case
  • -
  • Xgboost on the Cancer Data
  • -
  • Gradient boosting, making our own code for a regression case
  • -
  • Summary of course
  • -
  • What? Me worry? No final exam in this course!
  • -
  • Topics we have covered this year
  • -
  • Statistical analysis and optimization of data
  • -
  • Machine learning
  • -
  • Learning outcomes and overarching aims of this course
  • -
  • Perspective on Machine Learning
  • -
  • Machine Learning Research
  • -
  • Starting your Machine Learning Project
  • -
  • Choose a Model and Algorithm
  • -
  • Preparing Your Data
  • -
  • Which activation and weights to choose in neural networks
  • -
  • Optimization Methods and Hyperparameters
  • -
  • Resampling
  • -
  • Other courses on Data science and Machine Learning at UiO
  • -
  • Additional courses of interest
  • -
  • What's the future like?
  • -
  • Types of Machine Learning, a repetition
  • -
  • Why Boltzmann machines?
  • -
  • Boltzmann Machines
  • -
  • Some similarities and differences from DNNs
  • -
  • Boltzmann machines (BM)
  • -
  • A standard BM setup
  • -
  • The structure of the RBM network
  • -
  • The network
  • -
  • Goals
  • -
  • Joint distribution
  • -
  • Network Elements, the energy function
  • -
  • Defining different types of RBMs
  • -
  • More about RBMs
  • -
  • Autoencoders: Overarching view
  • -
  • Bayesian Machine Learning
  • -
  • Reinforcement Learning
  • -
  • Transfer learning
  • -
  • Adversarial learning
  • -
  • Dual learning
  • -
  • Distributed machine learning
  • -
  • Meta learning
  • -
  • The Challenges Facing Machine Learning
  • -
  • Explainable machine learning
  • -
  • Quantum machine learning
  • -
  • Quantum machine learning algorithms based on linear algebra
  • -
  • Quantum reinforcement learning
  • -
  • Quantum deep learning
  • -
  • Social machine learning
  • -
  • The last words?
  • -
  • Best wishes to you all and thanks so much for your heroic efforts this semester
  • +
  • Xgboost on the Cancer Data
  • +
  • Gradient boosting, making our own code for a regression case
  • +
  • Summary of course
  • +
  • What? Me worry? No final exam in this course!
  • +
  • Topics we have covered this year
  • +
  • Statistical analysis and optimization of data
  • +
  • Machine learning
  • +
  • Learning outcomes and overarching aims of this course
  • +
  • Perspective on Machine Learning
  • +
  • Machine Learning Research
  • +
  • Starting your Machine Learning Project
  • +
  • Choose a Model and Algorithm
  • +
  • Preparing Your Data
  • +
  • Which activation and weights to choose in neural networks
  • +
  • Optimization Methods and Hyperparameters
  • +
  • Resampling
  • +
  • Other courses on Data science and Machine Learning at UiO
  • +
  • Additional courses of interest
  • +
  • What's the future like?
  • +
  • Types of Machine Learning, a repetition
  • +
  • Why Boltzmann machines?
  • +
  • Boltzmann Machines
  • +
  • Some similarities and differences from DNNs
  • +
  • Boltzmann machines (BM)
  • +
  • A standard BM setup
  • +
  • The structure of the RBM network
  • +
  • The network
  • +
  • Goals
  • +
  • Joint distribution
  • +
  • Network Elements, the energy function
  • +
  • Defining different types of RBMs
  • +
  • More about RBMs
  • +
  • Autoencoders: Overarching view
  • +
  • Bayesian Machine Learning
  • +
  • Reinforcement Learning
  • +
  • Transfer learning
  • +
  • Adversarial learning
  • +
  • Dual learning
  • +
  • Distributed machine learning
  • +
  • Meta learning
  • +
  • The Challenges Facing Machine Learning
  • +
  • Explainable machine learning
  • +
  • Quantum machine learning
  • +
  • Quantum machine learning algorithms based on linear algebra
  • +
  • Quantum reinforcement learning
  • +
  • Quantum deep learning
  • +
  • Social machine learning
  • +
  • The last words?
  • +
  • Best wishes to you all and thanks so much for your heroic efforts this semester
  • @@ -401,7 +399,7 @@

    Nov 23, 2024

  • 9
  • 10
  • ...
  • -
  • 74
  • +
  • 73
  • »
  • diff --git a/doc/pub/week48/html/week48-reveal.html b/doc/pub/week48/html/week48-reveal.html index d4fa5f1a5..f77f43024 100644 --- a/doc/pub/week48/html/week48-reveal.html +++ b/doc/pub/week48/html/week48-reveal.html @@ -1046,7 +1046,6 @@

    Gradient Boosting, Examples of plt.plot(polydegree, bias, label='bias') plt.plot(polydegree, variance, label='Variance') plt.legend() -save_fig("gdregression") plt.show() @@ -1104,14 +1103,11 @@

    Gradient Boosting, Classificat import scikitplot as skplt y_pred = gd_clf.predict(X_test_scaled) skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True) -save_fig("gdclassiffierconfusion") plt.show() y_probas = gd_clf.predict_proba(X_test_scaled) skplt.metrics.plot_roc(y_test, y_probas) -save_fig("gdclassiffierroc") plt.show() skplt.metrics.plot_cumulative_gain(y_test, y_probas) -save_fig("gdclassiffiercgain") plt.show() @@ -1148,73 +1144,6 @@

    XGBoost: Extreme Gradient Boosting It is now the algorithm which wins essentially all ML competitions!!!

    -
    -

    Regression Case

    - - - -
    -
    -
    -
    -
    -
    import matplotlib.pyplot as plt
    -import numpy as np
    -from sklearn.model_selection import train_test_split
    -import xgboost as xgb
    -import scikitplot as skplt
    -from sklearn.metrics import mean_squared_error
    -
    -n = 100
    -maxdegree = 6
    -
    -# Make data set.
    -x = np.linspace(-3, 3, n).reshape(-1, 1)
    -y = np.exp(-x**2) + 1.5 * np.exp(-(x-2)**2)+ np.random.normal(0, 0.1, x.shape)
    -
    -error = np.zeros(maxdegree)
    -bias = np.zeros(maxdegree)
    -variance = np.zeros(maxdegree)
    -polydegree = np.zeros(maxdegree)
    -X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    -
    -for degree in range(maxdegree):
    -    model =  xgb.XGBRegressor(objective ='reg:squarederror', colsaobjective ='reg:squarederror', colsample_bytree = 0.3, learning_rate = 0.1,max_depth = degree, alpha = 10, n_estimators = 200)
    -
    -    model.fit(X_train,y_train)
    -    y_pred = model.predict(X_test)
    -    polydegree[degree] = degree
    -    error[degree] = np.mean( np.mean((y_test - y_pred)**2) )
    -    bias[degree] = np.mean( (y_test - np.mean(y_pred))**2 )
    -    variance[degree] = np.mean( np.var(y_pred) )
    -    print('Max depth:', degree)
    -    print('Error:', error[degree])
    -    print('Bias^2:', bias[degree])
    -    print('Var:', variance[degree])
    -    print('{} >= {} + {} = {}'.format(error[degree], bias[degree], variance[degree], bias[degree]+variance[degree]))
    -
    -plt.xlim(1,maxdegree-1)
    -plt.plot(polydegree, error, label='Error')
    -plt.plot(polydegree, bias, label='bias')
    -plt.plot(polydegree, variance, label='Variance')
    -plt.legend()
    -plt.show()
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -

    Xgboost on the Cancer Data

    @@ -1257,25 +1186,20 @@

    Xgboost on the Cancer Data

    import scikitplot as skplt y_pred = xg_clf.predict(X_test_scaled) skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True) -save_fig("xdclassiffierconfusion") plt.show() y_probas = xg_clf.predict_proba(X_test_scaled) skplt.metrics.plot_roc(y_test, y_probas) -save_fig("xdclassiffierroc") plt.show() skplt.metrics.plot_cumulative_gain(y_test, y_probas) -save_fig("gdclassiffiercgain") plt.show() xgb.plot_tree(xg_clf,num_trees=0) plt.rcParams['figure.figsize'] = [50, 10] -save_fig("xgtree") plt.show() xgb.plot_importance(xg_clf) plt.rcParams['figure.figsize'] = [5, 5] -save_fig("xgparams") plt.show() diff --git a/doc/pub/week48/html/week48-solarized.html b/doc/pub/week48/html/week48-solarized.html index 6a80bf332..bff9c030e 100644 --- a/doc/pub/week48/html/week48-solarized.html +++ b/doc/pub/week48/html/week48-solarized.html @@ -138,7 +138,6 @@ 2, None, 'xgboost-extreme-gradient-boosting'), - ('Regression Case', 2, None, 'regression-case'), ('Xgboost on the Cancer Data', 2, None, @@ -1070,7 +1069,6 @@

    Gradient Boosting, Examples of plt.plot(polydegree, bias, label='bias') plt.plot(polydegree, variance, label='Variance') plt.legend() -save_fig("gdregression") plt.show() @@ -1128,14 +1126,11 @@

    Gradient Boosting, Classificat import scikitplot as skplt y_pred = gd_clf.predict(X_test_scaled) skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True) -save_fig("gdclassiffierconfusion") plt.show() y_probas = gd_clf.predict_proba(X_test_scaled) skplt.metrics.plot_roc(y_test, y_probas) -save_fig("gdclassiffierroc") plt.show() skplt.metrics.plot_cumulative_gain(y_test, y_probas) -save_fig("gdclassiffiercgain") plt.show() @@ -1171,73 +1166,6 @@

    XGBoost: Extreme Gradient Boosting It is now the algorithm which wins essentially all ML competitions!!!

    -









    -

    Regression Case

    - - - -
    -
    -
    -
    -
    -
    import matplotlib.pyplot as plt
    -import numpy as np
    -from sklearn.model_selection import train_test_split
    -import xgboost as xgb
    -import scikitplot as skplt
    -from sklearn.metrics import mean_squared_error
    -
    -n = 100
    -maxdegree = 6
    -
    -# Make data set.
    -x = np.linspace(-3, 3, n).reshape(-1, 1)
    -y = np.exp(-x**2) + 1.5 * np.exp(-(x-2)**2)+ np.random.normal(0, 0.1, x.shape)
    -
    -error = np.zeros(maxdegree)
    -bias = np.zeros(maxdegree)
    -variance = np.zeros(maxdegree)
    -polydegree = np.zeros(maxdegree)
    -X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    -
    -for degree in range(maxdegree):
    -    model =  xgb.XGBRegressor(objective ='reg:squarederror', colsaobjective ='reg:squarederror', colsample_bytree = 0.3, learning_rate = 0.1,max_depth = degree, alpha = 10, n_estimators = 200)
    -
    -    model.fit(X_train,y_train)
    -    y_pred = model.predict(X_test)
    -    polydegree[degree] = degree
    -    error[degree] = np.mean( np.mean((y_test - y_pred)**2) )
    -    bias[degree] = np.mean( (y_test - np.mean(y_pred))**2 )
    -    variance[degree] = np.mean( np.var(y_pred) )
    -    print('Max depth:', degree)
    -    print('Error:', error[degree])
    -    print('Bias^2:', bias[degree])
    -    print('Var:', variance[degree])
    -    print('{} >= {} + {} = {}'.format(error[degree], bias[degree], variance[degree], bias[degree]+variance[degree]))
    -
    -plt.xlim(1,maxdegree-1)
    -plt.plot(polydegree, error, label='Error')
    -plt.plot(polydegree, bias, label='bias')
    -plt.plot(polydegree, variance, label='Variance')
    -plt.legend()
    -plt.show()
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -









    Xgboost on the Cancer Data

    @@ -1280,25 +1208,20 @@

    Xgboost on the Cancer Data

    import scikitplot as skplt y_pred = xg_clf.predict(X_test_scaled) skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True) -save_fig("xdclassiffierconfusion") plt.show() y_probas = xg_clf.predict_proba(X_test_scaled) skplt.metrics.plot_roc(y_test, y_probas) -save_fig("xdclassiffierroc") plt.show() skplt.metrics.plot_cumulative_gain(y_test, y_probas) -save_fig("gdclassiffiercgain") plt.show() xgb.plot_tree(xg_clf,num_trees=0) plt.rcParams['figure.figsize'] = [50, 10] -save_fig("xgtree") plt.show() xgb.plot_importance(xg_clf) plt.rcParams['figure.figsize'] = [5, 5] -save_fig("xgparams") plt.show() diff --git a/doc/pub/week48/html/week48.html b/doc/pub/week48/html/week48.html index ebd73a6cc..010721e6d 100644 --- a/doc/pub/week48/html/week48.html +++ b/doc/pub/week48/html/week48.html @@ -215,7 +215,6 @@ 2, None, 'xgboost-extreme-gradient-boosting'), - ('Regression Case', 2, None, 'regression-case'), ('Xgboost on the Cancer Data', 2, None, @@ -1147,7 +1146,6 @@

    Gradient Boosting, Examples of plt.plot(polydegree, bias, label='bias') plt.plot(polydegree, variance, label='Variance') plt.legend() -save_fig("gdregression") plt.show() @@ -1205,14 +1203,11 @@

    Gradient Boosting, Classificat import scikitplot as skplt y_pred = gd_clf.predict(X_test_scaled) skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True) -save_fig("gdclassiffierconfusion") plt.show() y_probas = gd_clf.predict_proba(X_test_scaled) skplt.metrics.plot_roc(y_test, y_probas) -save_fig("gdclassiffierroc") plt.show() skplt.metrics.plot_cumulative_gain(y_test, y_probas) -save_fig("gdclassiffiercgain") plt.show() @@ -1248,73 +1243,6 @@

    XGBoost: Extreme Gradient Boosting It is now the algorithm which wins essentially all ML competitions!!!

    -









    -

    Regression Case

    - - - -
    -
    -
    -
    -
    -
    import matplotlib.pyplot as plt
    -import numpy as np
    -from sklearn.model_selection import train_test_split
    -import xgboost as xgb
    -import scikitplot as skplt
    -from sklearn.metrics import mean_squared_error
    -
    -n = 100
    -maxdegree = 6
    -
    -# Make data set.
    -x = np.linspace(-3, 3, n).reshape(-1, 1)
    -y = np.exp(-x**2) + 1.5 * np.exp(-(x-2)**2)+ np.random.normal(0, 0.1, x.shape)
    -
    -error = np.zeros(maxdegree)
    -bias = np.zeros(maxdegree)
    -variance = np.zeros(maxdegree)
    -polydegree = np.zeros(maxdegree)
    -X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    -
    -for degree in range(maxdegree):
    -    model =  xgb.XGBRegressor(objective ='reg:squarederror', colsaobjective ='reg:squarederror', colsample_bytree = 0.3, learning_rate = 0.1,max_depth = degree, alpha = 10, n_estimators = 200)
    -
    -    model.fit(X_train,y_train)
    -    y_pred = model.predict(X_test)
    -    polydegree[degree] = degree
    -    error[degree] = np.mean( np.mean((y_test - y_pred)**2) )
    -    bias[degree] = np.mean( (y_test - np.mean(y_pred))**2 )
    -    variance[degree] = np.mean( np.var(y_pred) )
    -    print('Max depth:', degree)
    -    print('Error:', error[degree])
    -    print('Bias^2:', bias[degree])
    -    print('Var:', variance[degree])
    -    print('{} >= {} + {} = {}'.format(error[degree], bias[degree], variance[degree], bias[degree]+variance[degree]))
    -
    -plt.xlim(1,maxdegree-1)
    -plt.plot(polydegree, error, label='Error')
    -plt.plot(polydegree, bias, label='bias')
    -plt.plot(polydegree, variance, label='Variance')
    -plt.legend()
    -plt.show()
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    -
    - -









    Xgboost on the Cancer Data

    @@ -1357,25 +1285,20 @@

    Xgboost on the Cancer Data

    import scikitplot as skplt y_pred = xg_clf.predict(X_test_scaled) skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True) -save_fig("xdclassiffierconfusion") plt.show() y_probas = xg_clf.predict_proba(X_test_scaled) skplt.metrics.plot_roc(y_test, y_probas) -save_fig("xdclassiffierroc") plt.show() skplt.metrics.plot_cumulative_gain(y_test, y_probas) -save_fig("gdclassiffiercgain") plt.show() xgb.plot_tree(xg_clf,num_trees=0) plt.rcParams['figure.figsize'] = [50, 10] -save_fig("xgtree") plt.show() xgb.plot_importance(xg_clf) plt.rcParams['figure.figsize'] = [5, 5] -save_fig("xgparams") plt.show() diff --git a/doc/pub/week48/ipynb/ipynb-week48-src.tar.gz b/doc/pub/week48/ipynb/ipynb-week48-src.tar.gz index 8526beceb55f7b70583b913fc73986a16253ea0f..ba9af317afb7adf07e95743b00eaf6d0ba2f8984 100644 GIT binary patch delta 53 zcmaE}$mq=?BR2VN4u*N&jcl!KjIC@;t!&J#Y%Hy8tgUQ75%yL#j#f6#RyM9yHtwx# HJZTdFpi>Q^ delta 53 zcmaE}$mq=?BR2VN4u(drMz&Tq##T0_RyO8VHkMX4)>by42zx6VM=Kj=D;rlU8~0W= Hp0tSon`jM$ diff --git a/doc/pub/week48/ipynb/week48.ipynb b/doc/pub/week48/ipynb/week48.ipynb index baa902728..f289f46c8 100644 --- a/doc/pub/week48/ipynb/week48.ipynb +++ b/doc/pub/week48/ipynb/week48.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "b8e98ac0", + "id": "fe7fd644", "metadata": { "editable": true }, @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "12cb369c", + "id": "c407135f", "metadata": { "editable": true }, @@ -29,7 +29,7 @@ }, { "cell_type": "markdown", - "id": "25b1691e", + "id": "1894285d", "metadata": { "editable": true }, @@ -39,7 +39,7 @@ }, { "cell_type": "markdown", - "id": "c213ed56", + "id": "bceac1c4", "metadata": { "editable": true }, @@ -85,7 +85,7 @@ }, { "cell_type": "markdown", - "id": "c3891e8a", + "id": "eaaa2de8", "metadata": { "editable": true }, @@ -112,7 +112,7 @@ }, { "cell_type": "markdown", - "id": "dbe2476d", + "id": "36809e3f", "metadata": { "editable": true }, @@ -123,7 +123,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "d03f9475", + "id": "ef24e54c", "metadata": { "collapsed": false, "editable": true @@ -197,7 +197,7 @@ }, { "cell_type": "markdown", - "id": "6a5a918f", + "id": "0472d908", "metadata": { "editable": true }, @@ -213,7 +213,7 @@ }, { "cell_type": "markdown", - "id": "fef4d0d0", + "id": "08e636dd", "metadata": { "editable": true }, @@ -224,7 +224,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "38b4f201", + "id": "90e2c43c", "metadata": { "collapsed": false, "editable": true @@ -239,7 +239,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "8c186195", + "id": "929962a3", "metadata": { "collapsed": false, "editable": true @@ -257,7 +257,7 @@ }, { "cell_type": "markdown", - "id": "6a6f19a9", + "id": "a2039540", "metadata": { "editable": true }, @@ -277,7 +277,7 @@ }, { "cell_type": "markdown", - "id": "ea3a24e9", + "id": "140e30ce", "metadata": { "editable": true }, @@ -291,7 +291,7 @@ }, { "cell_type": "markdown", - "id": "1a3c318c", + "id": "02cb3a63", "metadata": { "editable": true }, @@ -303,7 +303,7 @@ }, { "cell_type": "markdown", - "id": "e16cdd0d", + "id": "9924d331", "metadata": { "editable": true }, @@ -320,7 +320,7 @@ }, { "cell_type": "markdown", - "id": "8d3ad320", + "id": "6e00565f", "metadata": { "editable": true }, @@ -332,7 +332,7 @@ }, { "cell_type": "markdown", - "id": "fd25b1e3", + "id": "de290cb8", "metadata": { "editable": true }, @@ -346,7 +346,7 @@ }, { "cell_type": "markdown", - "id": "10404a9e", + "id": "fd4fac8f", "metadata": { "editable": true }, @@ -358,7 +358,7 @@ }, { "cell_type": "markdown", - "id": "124bd4cf", + "id": "1ddfce8e", "metadata": { "editable": true }, @@ -371,7 +371,7 @@ }, { "cell_type": "markdown", - "id": "8fb8c197", + "id": "9e18b5ba", "metadata": { "editable": true }, @@ -383,7 +383,7 @@ }, { "cell_type": "markdown", - "id": "ced4e7f4", + "id": "52663df1", "metadata": { "editable": true }, @@ -393,7 +393,7 @@ }, { "cell_type": "markdown", - "id": "161eec23", + "id": "a6bfc5a8", "metadata": { "editable": true }, @@ -421,7 +421,7 @@ }, { "cell_type": "markdown", - "id": "a1964067", + "id": "bd232678", "metadata": { "editable": true }, @@ -437,7 +437,7 @@ }, { "cell_type": "markdown", - "id": "1933dc63", + "id": "8f945bbd", "metadata": { "editable": true }, @@ -449,7 +449,7 @@ }, { "cell_type": "markdown", - "id": "d932cfbd", + "id": "14894a48", "metadata": { "editable": true }, @@ -460,7 +460,7 @@ }, { "cell_type": "markdown", - "id": "24f7a02a", + "id": "31d3d404", "metadata": { "editable": true }, @@ -472,7 +472,7 @@ }, { "cell_type": "markdown", - "id": "6eb9e426", + "id": "0d86e417", "metadata": { "editable": true }, @@ -482,7 +482,7 @@ }, { "cell_type": "markdown", - "id": "0a48ddf7", + "id": "dba5d81b", "metadata": { "editable": true }, @@ -494,7 +494,7 @@ }, { "cell_type": "markdown", - "id": "d0087ffb", + "id": "9946eed4", "metadata": { "editable": true }, @@ -504,7 +504,7 @@ }, { "cell_type": "markdown", - "id": "b455ce5e", + "id": "671b57e9", "metadata": { "editable": true }, @@ -516,7 +516,7 @@ }, { "cell_type": "markdown", - "id": "5aeac1a5", + "id": "2f1afb73", "metadata": { "editable": true }, @@ -526,7 +526,7 @@ }, { "cell_type": "markdown", - "id": "86dfaddf", + "id": "eb06d7bb", "metadata": { "editable": true }, @@ -538,7 +538,7 @@ }, { "cell_type": "markdown", - "id": "4826590f", + "id": "bd75f76e", "metadata": { "editable": true }, @@ -552,7 +552,7 @@ }, { "cell_type": "markdown", - "id": "3de0828a", + "id": "7a2457ae", "metadata": { "editable": true }, @@ -568,7 +568,7 @@ }, { "cell_type": "markdown", - "id": "961265b1", + "id": "88fa4f5c", "metadata": { "editable": true }, @@ -580,7 +580,7 @@ }, { "cell_type": "markdown", - "id": "a74db275", + "id": "e100093d", "metadata": { "editable": true }, @@ -596,7 +596,7 @@ }, { "cell_type": "markdown", - "id": "81d27911", + "id": "a96c6add", "metadata": { "editable": true }, @@ -608,7 +608,7 @@ }, { "cell_type": "markdown", - "id": "3b7dbe21", + "id": "4ee0fb37", "metadata": { "editable": true }, @@ -618,7 +618,7 @@ }, { "cell_type": "markdown", - "id": "e664eec9", + "id": "b7206f37", "metadata": { "editable": true }, @@ -630,7 +630,7 @@ }, { "cell_type": "markdown", - "id": "50fe52a0", + "id": "ca864761", "metadata": { "editable": true }, @@ -642,7 +642,7 @@ }, { "cell_type": "markdown", - "id": "a4cd6cb0", + "id": "b8776574", "metadata": { "editable": true }, @@ -654,7 +654,7 @@ }, { "cell_type": "markdown", - "id": "116d1038", + "id": "7afe77b7", "metadata": { "editable": true }, @@ -665,7 +665,7 @@ }, { "cell_type": "markdown", - "id": "690f6882", + "id": "1915c382", "metadata": { "editable": true }, @@ -677,7 +677,7 @@ }, { "cell_type": "markdown", - "id": "436d0a30", + "id": "57398dbb", "metadata": { "editable": true }, @@ -688,7 +688,7 @@ }, { "cell_type": "markdown", - "id": "779a3214", + "id": "b63693b2", "metadata": { "editable": true }, @@ -700,7 +700,7 @@ }, { "cell_type": "markdown", - "id": "baf4ef97", + "id": "09e68eb6", "metadata": { "editable": true }, @@ -710,7 +710,7 @@ }, { "cell_type": "markdown", - "id": "819d9a45", + "id": "bf86c857", "metadata": { "editable": true }, @@ -722,7 +722,7 @@ }, { "cell_type": "markdown", - "id": "9de7ba47", + "id": "cb3d645c", "metadata": { "editable": true }, @@ -734,7 +734,7 @@ }, { "cell_type": "markdown", - "id": "3130e2cc", + "id": "2157edce", "metadata": { "editable": true }, @@ -746,7 +746,7 @@ }, { "cell_type": "markdown", - "id": "88bee939", + "id": "b9e39b81", "metadata": { "editable": true }, @@ -758,7 +758,7 @@ }, { "cell_type": "markdown", - "id": "81de3757", + "id": "941fa37d", "metadata": { "editable": true }, @@ -768,7 +768,7 @@ }, { "cell_type": "markdown", - "id": "b1897c2e", + "id": "3ce9f347", "metadata": { "editable": true }, @@ -780,7 +780,7 @@ }, { "cell_type": "markdown", - "id": "eee8697d", + "id": "d26d3711", "metadata": { "editable": true }, @@ -790,7 +790,7 @@ }, { "cell_type": "markdown", - "id": "56355f2a", + "id": "44bce843", "metadata": { "editable": true }, @@ -802,7 +802,7 @@ }, { "cell_type": "markdown", - "id": "88a20b27", + "id": "1c997239", "metadata": { "editable": true }, @@ -812,7 +812,7 @@ }, { "cell_type": "markdown", - "id": "2a02a6d7", + "id": "216bdcdb", "metadata": { "editable": true }, @@ -824,7 +824,7 @@ }, { "cell_type": "markdown", - "id": "816d8d8f", + "id": "373c405a", "metadata": { "editable": true }, @@ -834,7 +834,7 @@ }, { "cell_type": "markdown", - "id": "a5a820fc", + "id": "c57bddad", "metadata": { "editable": true }, @@ -846,7 +846,7 @@ }, { "cell_type": "markdown", - "id": "6517fb85", + "id": "5e936ef6", "metadata": { "editable": true }, @@ -856,7 +856,7 @@ }, { "cell_type": "markdown", - "id": "4a3c7a45", + "id": "2c9eaf83", "metadata": { "editable": true }, @@ -868,7 +868,7 @@ }, { "cell_type": "markdown", - "id": "d295d135", + "id": "bdecca1e", "metadata": { "editable": true }, @@ -888,7 +888,7 @@ }, { "cell_type": "markdown", - "id": "710b2ab1", + "id": "fea06c34", "metadata": { "editable": true }, @@ -900,7 +900,7 @@ }, { "cell_type": "markdown", - "id": "1e153c11", + "id": "0025dfdf", "metadata": { "editable": true }, @@ -910,7 +910,7 @@ }, { "cell_type": "markdown", - "id": "7ca4618e", + "id": "13f97440", "metadata": { "editable": true }, @@ -926,7 +926,7 @@ }, { "cell_type": "markdown", - "id": "ce93cd0f", + "id": "ee2b62a1", "metadata": { "editable": true }, @@ -938,7 +938,7 @@ }, { "cell_type": "markdown", - "id": "3405f033", + "id": "35cc8d6f", "metadata": { "editable": true }, @@ -966,7 +966,7 @@ }, { "cell_type": "markdown", - "id": "4e254d44", + "id": "90ca86cd", "metadata": { "editable": true }, @@ -979,7 +979,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "a2372724", + "id": "d3803033", "metadata": { "collapsed": false, "editable": true @@ -1004,7 +1004,7 @@ }, { "cell_type": "markdown", - "id": "42c1e9ee", + "id": "ac68d8bc", "metadata": { "editable": true }, @@ -1015,7 +1015,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "bad891e4", + "id": "679b3966", "metadata": { "collapsed": false, "editable": true @@ -1104,7 +1104,7 @@ }, { "cell_type": "markdown", - "id": "87c499dc", + "id": "58adfdb1", "metadata": { "editable": true }, @@ -1122,7 +1122,7 @@ }, { "cell_type": "markdown", - "id": "d78bf458", + "id": "ca89c2b5", "metadata": { "editable": true }, @@ -1135,7 +1135,7 @@ }, { "cell_type": "markdown", - "id": "12318d85", + "id": "bee751db", "metadata": { "editable": true }, @@ -1147,7 +1147,7 @@ }, { "cell_type": "markdown", - "id": "2d96597d", + "id": "64ec846c", "metadata": { "editable": true }, @@ -1157,7 +1157,7 @@ }, { "cell_type": "markdown", - "id": "ca649b47", + "id": "2af461db", "metadata": { "editable": true }, @@ -1169,7 +1169,7 @@ }, { "cell_type": "markdown", - "id": "f3e57f43", + "id": "b6502394", "metadata": { "editable": true }, @@ -1179,7 +1179,7 @@ }, { "cell_type": "markdown", - "id": "d8cc18d6", + "id": "48d243dc", "metadata": { "editable": true }, @@ -1191,7 +1191,7 @@ }, { "cell_type": "markdown", - "id": "43d98194", + "id": "e03e872f", "metadata": { "editable": true }, @@ -1204,7 +1204,7 @@ }, { "cell_type": "markdown", - "id": "b35a4881", + "id": "d1613021", "metadata": { "editable": true }, @@ -1216,7 +1216,7 @@ }, { "cell_type": "markdown", - "id": "b058af39", + "id": "d5e7fe64", "metadata": { "editable": true }, @@ -1228,7 +1228,7 @@ }, { "cell_type": "markdown", - "id": "3ad897cf", + "id": "0bd5bef5", "metadata": { "editable": true }, @@ -1240,7 +1240,7 @@ }, { "cell_type": "markdown", - "id": "bd31fcbe", + "id": "ef5252a7", "metadata": { "editable": true }, @@ -1250,7 +1250,7 @@ }, { "cell_type": "markdown", - "id": "f8376f29", + "id": "7cb83769", "metadata": { "editable": true }, @@ -1262,7 +1262,7 @@ }, { "cell_type": "markdown", - "id": "be665473", + "id": "6b0d28d0", "metadata": { "editable": true }, @@ -1272,7 +1272,7 @@ }, { "cell_type": "markdown", - "id": "e1091a41", + "id": "4762090e", "metadata": { "editable": true }, @@ -1288,7 +1288,7 @@ }, { "cell_type": "markdown", - "id": "e84e0298", + "id": "59124920", "metadata": { "editable": true }, @@ -1300,7 +1300,7 @@ }, { "cell_type": "markdown", - "id": "c21417da", + "id": "70bba454", "metadata": { "editable": true }, @@ -1321,7 +1321,7 @@ }, { "cell_type": "markdown", - "id": "5525cf60", + "id": "97c28672", "metadata": { "editable": true }, @@ -1332,7 +1332,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "a06c6902", + "id": "380a6f12", "metadata": { "collapsed": false, "editable": true @@ -1378,13 +1378,12 @@ "plt.plot(polydegree, bias, label='bias')\n", "plt.plot(polydegree, variance, label='Variance')\n", "plt.legend()\n", - "save_fig(\"gdregression\")\n", "plt.show()" ] }, { "cell_type": "markdown", - "id": "b515314f", + "id": "02a1a349", "metadata": { "editable": true }, @@ -1395,7 +1394,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "4231d351", + "id": "931c8836", "metadata": { "collapsed": false, "editable": true @@ -1433,20 +1432,17 @@ "import scikitplot as skplt\n", "y_pred = gd_clf.predict(X_test_scaled)\n", "skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True)\n", - "save_fig(\"gdclassiffierconfusion\")\n", "plt.show()\n", "y_probas = gd_clf.predict_proba(X_test_scaled)\n", "skplt.metrics.plot_roc(y_test, y_probas)\n", - "save_fig(\"gdclassiffierroc\")\n", "plt.show()\n", "skplt.metrics.plot_cumulative_gain(y_test, y_probas)\n", - "save_fig(\"gdclassiffiercgain\")\n", "plt.show()" ] }, { "cell_type": "markdown", - "id": "121a1d8a", + "id": "ccd968ba", "metadata": { "editable": true }, @@ -1469,70 +1465,7 @@ }, { "cell_type": "markdown", - "id": "a9c5f373", - "metadata": { - "editable": true - }, - "source": [ - "## Regression Case" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "16f56e04", - "metadata": { - "collapsed": false, - "editable": true - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from sklearn.model_selection import train_test_split\n", - "import xgboost as xgb\n", - "import scikitplot as skplt\n", - "from sklearn.metrics import mean_squared_error\n", - "\n", - "n = 100\n", - "maxdegree = 6\n", - "\n", - "# Make data set.\n", - "x = np.linspace(-3, 3, n).reshape(-1, 1)\n", - "y = np.exp(-x**2) + 1.5 * np.exp(-(x-2)**2)+ np.random.normal(0, 0.1, x.shape)\n", - "\n", - "error = np.zeros(maxdegree)\n", - "bias = np.zeros(maxdegree)\n", - "variance = np.zeros(maxdegree)\n", - "polydegree = np.zeros(maxdegree)\n", - "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)\n", - "\n", - "for degree in range(maxdegree):\n", - " model = xgb.XGBRegressor(objective ='reg:squarederror', colsaobjective ='reg:squarederror', colsample_bytree = 0.3, learning_rate = 0.1,max_depth = degree, alpha = 10, n_estimators = 200)\n", - "\n", - " model.fit(X_train,y_train)\n", - " y_pred = model.predict(X_test)\n", - " polydegree[degree] = degree\n", - " error[degree] = np.mean( np.mean((y_test - y_pred)**2) )\n", - " bias[degree] = np.mean( (y_test - np.mean(y_pred))**2 )\n", - " variance[degree] = np.mean( np.var(y_pred) )\n", - " print('Max depth:', degree)\n", - " print('Error:', error[degree])\n", - " print('Bias^2:', bias[degree])\n", - " print('Var:', variance[degree])\n", - " print('{} >= {} + {} = {}'.format(error[degree], bias[degree], variance[degree], bias[degree]+variance[degree]))\n", - "\n", - "plt.xlim(1,maxdegree-1)\n", - "plt.plot(polydegree, error, label='Error')\n", - "plt.plot(polydegree, bias, label='bias')\n", - "plt.plot(polydegree, variance, label='Variance')\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "af8e732d", + "id": "6ec7c62b", "metadata": { "editable": true }, @@ -1544,8 +1477,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "b63afee6", + "execution_count": 8, + "id": "b18ceffd", "metadata": { "collapsed": false, "editable": true @@ -1584,31 +1517,26 @@ "import scikitplot as skplt\n", "y_pred = xg_clf.predict(X_test_scaled)\n", "skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True)\n", - "save_fig(\"xdclassiffierconfusion\")\n", "plt.show()\n", "y_probas = xg_clf.predict_proba(X_test_scaled)\n", "skplt.metrics.plot_roc(y_test, y_probas)\n", - "save_fig(\"xdclassiffierroc\")\n", "plt.show()\n", "skplt.metrics.plot_cumulative_gain(y_test, y_probas)\n", - "save_fig(\"gdclassiffiercgain\")\n", "plt.show()\n", "\n", "\n", "xgb.plot_tree(xg_clf,num_trees=0)\n", "plt.rcParams['figure.figsize'] = [50, 10]\n", - "save_fig(\"xgtree\")\n", "plt.show()\n", "\n", "xgb.plot_importance(xg_clf)\n", "plt.rcParams['figure.figsize'] = [5, 5]\n", - "save_fig(\"xgparams\")\n", "plt.show()" ] }, { "cell_type": "markdown", - "id": "84c9ab3e", + "id": "9d4b045f", "metadata": { "editable": true }, @@ -1618,8 +1546,8 @@ }, { "cell_type": "code", - "execution_count": 10, - "id": "01952655", + "execution_count": 9, + "id": "2bf430ed", "metadata": { "collapsed": false, "editable": true @@ -1706,7 +1634,7 @@ }, { "cell_type": "markdown", - "id": "9a762674", + "id": "2c3330d7", "metadata": { "editable": true }, @@ -1716,7 +1644,7 @@ }, { "cell_type": "markdown", - "id": "1b17200a", + "id": "a67ab616", "metadata": { "editable": true }, @@ -1731,7 +1659,7 @@ }, { "cell_type": "markdown", - "id": "4acb7811", + "id": "00a62442", "metadata": { "editable": true }, @@ -1747,7 +1675,7 @@ }, { "cell_type": "markdown", - "id": "6b2f2ccd", + "id": "483051ba", "metadata": { "editable": true }, @@ -1772,7 +1700,7 @@ }, { "cell_type": "markdown", - "id": "e6bdbe75", + "id": "7429c058", "metadata": { "editable": true }, @@ -1819,7 +1747,7 @@ }, { "cell_type": "markdown", - "id": "5900b979", + "id": "3ab2e37a", "metadata": { "editable": true }, @@ -1854,7 +1782,7 @@ }, { "cell_type": "markdown", - "id": "97bf13a4", + "id": "b4a54e3f", "metadata": { "editable": true }, @@ -1877,7 +1805,7 @@ }, { "cell_type": "markdown", - "id": "1e152734", + "id": "68e0ad8c", "metadata": { "editable": true }, @@ -1900,7 +1828,7 @@ }, { "cell_type": "markdown", - "id": "1ed0e449", + "id": "319b20fd", "metadata": { "editable": true }, @@ -1920,7 +1848,7 @@ }, { "cell_type": "markdown", - "id": "b24640ed", + "id": "8060a087", "metadata": { "editable": true }, @@ -1936,7 +1864,7 @@ }, { "cell_type": "markdown", - "id": "33957222", + "id": "be25b51b", "metadata": { "editable": true }, @@ -1964,7 +1892,7 @@ }, { "cell_type": "markdown", - "id": "56ba1280", + "id": "2c67fffd", "metadata": { "editable": true }, @@ -1986,7 +1914,7 @@ }, { "cell_type": "markdown", - "id": "fb062869", + "id": "6ab2cdf4", "metadata": { "editable": true }, @@ -2011,7 +1939,7 @@ }, { "cell_type": "markdown", - "id": "808e4541", + "id": "8a5e039f", "metadata": { "editable": true }, @@ -2029,7 +1957,7 @@ }, { "cell_type": "markdown", - "id": "1ac3860d", + "id": "9a3946d1", "metadata": { "editable": true }, @@ -2059,7 +1987,7 @@ }, { "cell_type": "markdown", - "id": "c9bead32", + "id": "d9e6d355", "metadata": { "editable": true }, @@ -2073,7 +2001,7 @@ }, { "cell_type": "markdown", - "id": "982d4930", + "id": "7e99ee2d", "metadata": { "editable": true }, @@ -2099,7 +2027,7 @@ }, { "cell_type": "markdown", - "id": "369bc8e7", + "id": "1f94c42f", "metadata": { "editable": true }, @@ -2128,7 +2056,7 @@ }, { "cell_type": "markdown", - "id": "539550ba", + "id": "f5e8269a", "metadata": { "editable": true }, @@ -2145,7 +2073,7 @@ }, { "cell_type": "markdown", - "id": "45a3026e", + "id": "6174662e", "metadata": { "editable": true }, @@ -2167,7 +2095,7 @@ }, { "cell_type": "markdown", - "id": "3ba33ca6", + "id": "36165057", "metadata": { "editable": true }, @@ -2185,7 +2113,7 @@ }, { "cell_type": "markdown", - "id": "512af4a3", + "id": "659b2e21", "metadata": { "editable": true }, @@ -2208,7 +2136,7 @@ }, { "cell_type": "markdown", - "id": "bafc70a8", + "id": "925e9817", "metadata": { "editable": true }, @@ -2227,7 +2155,7 @@ }, { "cell_type": "markdown", - "id": "c8db3bb3", + "id": "0bc5fe3d", "metadata": { "editable": true }, @@ -2243,7 +2171,7 @@ }, { "cell_type": "markdown", - "id": "19a62799", + "id": "68a1f231", "metadata": { "editable": true }, @@ -2258,7 +2186,7 @@ }, { "cell_type": "markdown", - "id": "dee23d4a", + "id": "4aa0e3cf", "metadata": { "editable": true }, @@ -2282,7 +2210,7 @@ }, { "cell_type": "markdown", - "id": "27b6fe1b", + "id": "e440589a", "metadata": { "editable": true }, @@ -2294,7 +2222,7 @@ }, { "cell_type": "markdown", - "id": "671ebf4b", + "id": "dd9c38a0", "metadata": { "editable": true }, @@ -2312,7 +2240,7 @@ }, { "cell_type": "markdown", - "id": "1fe52803", + "id": "5c393934", "metadata": { "editable": true }, @@ -2322,7 +2250,7 @@ }, { "cell_type": "markdown", - "id": "5a2a9f51", + "id": "ab552000", "metadata": { "editable": true }, @@ -2340,7 +2268,7 @@ }, { "cell_type": "markdown", - "id": "bad1a098", + "id": "878ae3c8", "metadata": { "editable": true }, @@ -2350,7 +2278,7 @@ }, { "cell_type": "markdown", - "id": "942d899e", + "id": "b830eb6d", "metadata": { "editable": true }, @@ -2369,7 +2297,7 @@ }, { "cell_type": "markdown", - "id": "78dc7403", + "id": "683b809a", "metadata": { "editable": true }, @@ -2381,7 +2309,7 @@ }, { "cell_type": "markdown", - "id": "7c2bfde9", + "id": "adec988a", "metadata": { "editable": true }, @@ -2395,7 +2323,7 @@ }, { "cell_type": "markdown", - "id": "2f152023", + "id": "7c370896", "metadata": { "editable": true }, @@ -2410,7 +2338,7 @@ }, { "cell_type": "markdown", - "id": "70f4a4a4", + "id": "1b07308c", "metadata": { "editable": true }, @@ -2428,7 +2356,7 @@ }, { "cell_type": "markdown", - "id": "0dc78848", + "id": "0e443612", "metadata": { "editable": true }, @@ -2442,7 +2370,7 @@ }, { "cell_type": "markdown", - "id": "831b279e", + "id": "13f2d272", "metadata": { "editable": true }, @@ -2460,7 +2388,7 @@ }, { "cell_type": "markdown", - "id": "8b2e5067", + "id": "8c9ac9f3", "metadata": { "editable": true }, @@ -2484,7 +2412,7 @@ }, { "cell_type": "markdown", - "id": "5a7fd9c4", + "id": "610c29b7", "metadata": { "editable": true }, @@ -2522,7 +2450,7 @@ }, { "cell_type": "markdown", - "id": "3fff9e22", + "id": "096e7470", "metadata": { "editable": true }, @@ -2545,7 +2473,7 @@ }, { "cell_type": "markdown", - "id": "06e08aa9", + "id": "fbf18ec6", "metadata": { "editable": true }, @@ -2581,7 +2509,7 @@ }, { "cell_type": "markdown", - "id": "564bb5de", + "id": "30ca6ef5", "metadata": { "editable": true }, @@ -2602,7 +2530,7 @@ }, { "cell_type": "markdown", - "id": "a50184f4", + "id": "99d1df35", "metadata": { "editable": true }, @@ -2624,7 +2552,7 @@ }, { "cell_type": "markdown", - "id": "0abcc34b", + "id": "9abcc525", "metadata": { "editable": true }, @@ -2644,7 +2572,7 @@ }, { "cell_type": "markdown", - "id": "8d2c9af4", + "id": "ca2124ca", "metadata": { "editable": true }, @@ -2659,7 +2587,7 @@ }, { "cell_type": "markdown", - "id": "f477a76f", + "id": "5858078e", "metadata": { "editable": true }, @@ -2677,7 +2605,7 @@ }, { "cell_type": "markdown", - "id": "6155600c", + "id": "174ee0c8", "metadata": { "editable": true }, @@ -2707,7 +2635,7 @@ }, { "cell_type": "markdown", - "id": "83336d35", + "id": "e2c44b50", "metadata": { "editable": true }, @@ -2736,7 +2664,7 @@ }, { "cell_type": "markdown", - "id": "99eb3e53", + "id": "6f4f1c4b", "metadata": { "editable": true }, @@ -2767,7 +2695,7 @@ }, { "cell_type": "markdown", - "id": "283b9930", + "id": "3b86535f", "metadata": { "editable": true }, @@ -2790,7 +2718,7 @@ }, { "cell_type": "markdown", - "id": "defb65a0", + "id": "51363de6", "metadata": { "editable": true }, @@ -2808,7 +2736,7 @@ }, { "cell_type": "markdown", - "id": "027093b9", + "id": "e8a5a5bf", "metadata": { "editable": true }, @@ -2831,7 +2759,7 @@ }, { "cell_type": "markdown", - "id": "78a77435", + "id": "0249293e", "metadata": { "editable": true }, @@ -2852,7 +2780,7 @@ }, { "cell_type": "markdown", - "id": "500291ea", + "id": "bfcd921f", "metadata": { "editable": true }, @@ -2868,7 +2796,7 @@ }, { "cell_type": "markdown", - "id": "62d8538b", + "id": "57427c59", "metadata": { "editable": true }, diff --git a/doc/src/week48/week48.do.txt b/doc/src/week48/week48.do.txt index 02de1f006..1022ae3e2 100644 --- a/doc/src/week48/week48.do.txt +++ b/doc/src/week48/week48.do.txt @@ -686,7 +686,6 @@ plt.plot(polydegree, error, label='Error') plt.plot(polydegree, bias, label='bias') plt.plot(polydegree, variance, label='Variance') plt.legend() -save_fig("gdregression") plt.show() !ec @@ -725,14 +724,11 @@ print("Test set accuracy with Gradient boosting and scaled data: {:.2f}".format( import scikitplot as skplt y_pred = gd_clf.predict(X_test_scaled) skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True) -save_fig("gdclassiffierconfusion") plt.show() y_probas = gd_clf.predict_proba(X_test_scaled) skplt.metrics.plot_roc(y_test, y_probas) -save_fig("gdclassiffierroc") plt.show() skplt.metrics.plot_cumulative_gain(y_test, y_probas) -save_fig("gdclassiffiercgain") plt.show() !ec @@ -754,53 +750,6 @@ sketch for efficient proposal calculation. It introduces a novel sparsity-aware It is now the algorithm which wins essentially all ML competitions!!! -!split -===== Regression Case ===== - -!bc pycod -import matplotlib.pyplot as plt -import numpy as np -from sklearn.model_selection import train_test_split -import xgboost as xgb -import scikitplot as skplt -from sklearn.metrics import mean_squared_error - -n = 100 -maxdegree = 6 - -# Make data set. -x = np.linspace(-3, 3, n).reshape(-1, 1) -y = np.exp(-x**2) + 1.5 * np.exp(-(x-2)**2)+ np.random.normal(0, 0.1, x.shape) - -error = np.zeros(maxdegree) -bias = np.zeros(maxdegree) -variance = np.zeros(maxdegree) -polydegree = np.zeros(maxdegree) -X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2) - -for degree in range(maxdegree): - model = xgb.XGBRegressor(objective ='reg:squarederror', colsaobjective ='reg:squarederror', colsample_bytree = 0.3, learning_rate = 0.1,max_depth = degree, alpha = 10, n_estimators = 200) - - model.fit(X_train,y_train) - y_pred = model.predict(X_test) - polydegree[degree] = degree - error[degree] = np.mean( np.mean((y_test - y_pred)**2) ) - bias[degree] = np.mean( (y_test - np.mean(y_pred))**2 ) - variance[degree] = np.mean( np.var(y_pred) ) - print('Max depth:', degree) - print('Error:', error[degree]) - print('Bias^2:', bias[degree]) - print('Var:', variance[degree]) - print('{} >= {} + {} = {}'.format(error[degree], bias[degree], variance[degree], bias[degree]+variance[degree])) - -plt.xlim(1,maxdegree-1) -plt.plot(polydegree, error, label='Error') -plt.plot(polydegree, bias, label='bias') -plt.plot(polydegree, variance, label='Variance') -plt.legend() -plt.show() - -!ec !split ===== Xgboost on the Cancer Data ===== @@ -839,25 +788,20 @@ print("Test set accuracy with Gradient Boosting and scaled data: {:.2f}".format( import scikitplot as skplt y_pred = xg_clf.predict(X_test_scaled) skplt.metrics.plot_confusion_matrix(y_test, y_pred, normalize=True) -save_fig("xdclassiffierconfusion") plt.show() y_probas = xg_clf.predict_proba(X_test_scaled) skplt.metrics.plot_roc(y_test, y_probas) -save_fig("xdclassiffierroc") plt.show() skplt.metrics.plot_cumulative_gain(y_test, y_probas) -save_fig("gdclassiffiercgain") plt.show() xgb.plot_tree(xg_clf,num_trees=0) plt.rcParams['figure.figsize'] = [50, 10] -save_fig("xgtree") plt.show() xgb.plot_importance(xg_clf) plt.rcParams['figure.figsize'] = [5, 5] -save_fig("xgparams") plt.show() !ec