Ensemble Methods: Bagging and Boosting

By Prof. Seungchul Lee
http://iai.postech.ac.kr/
Industrial AI Lab at POSTECH

Table of Contents

1. Bagging¶

Bagging = Bootstrap Aggregating
- Duplicate elements are allowed
- Ensemble model
- Voting or Regressiong

2. Boosting¶

Weighting the error

$\hat{y}$ is prediction

$ \text{error} = \hat{y}- y$

Sequential model

Learning prgress in order.

Simple classification example

import numpy as np
import matplotlib.pyplot as plt

Data generation

%matplotlib inline
## generate three simulated clusters
mu1 = np.array([1, 7])
mu2 = np.array([3, 4])
mu3 = np.array([6, 5])

SIGMA1 = 0.9*np.array([[1, 1.5],
                       [1.5, 3]])
SIGMA2 = 0.6*np.array([[2, 0],
                       [0, 2]])
SIGMA3 = 0.8*np.array([[1, -1],
                       [-1, 2]])

m = 100

X1 = np.random.multivariate_normal(mu1, SIGMA1, m)
X2 = np.random.multivariate_normal(mu2, SIGMA2, m)
X3 = np.random.multivariate_normal(mu3, SIGMA3, m)

y1 = 1*np.ones([m,1])
y2 = 2*np.ones([m,1])
y3 = 3*np.ones([m,1])

plt.figure(figsize = (10, 8))
plt.title('Generated Data', fontsize = 15)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.xlabel('$X_1$', fontsize = 15)
plt.ylabel('$X_2$', fontsize = 15)
plt.legend(fontsize = 12)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()

Single Decision Tree vs Bagging

import sklearn
from sklearn.ensemble import BaggingClassifier

Dicision Tree

X = np.vstack([X1, X2, X3])
y = np.vstack([y1, y2, y3])

clf = sklearn.tree.DecisionTreeClassifier(criterion = 'entropy', max_depth = 2, random_state = 0, max_features = 1.0)
clf.fit(X,y)

DecisionTreeClassifier(criterion='entropy', max_depth=2, max_features=1.0,
                       random_state=0)

res = 0.5
[X1gr, X2gr] = np.meshgrid(np.arange(-2,10,res), np.arange(0,12,res))

Xp = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp = np.asmatrix(Xp)

q = clf.predict(Xp)
q = np.asmatrix(q).reshape(-1,1)

C1 = np.where(q == 1)[0]
C2 = np.where(q == 2)[0]
C3 = np.where(q == 3)[0]

Bagging

bclf = BaggingClassifier(base_estimator = None, n_estimators = 50, max_samples = 1.0, bootstrap = True)
bclf.fit(X, np.ravel(y))

BaggingClassifier(n_estimators=50)

Xp1 = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp1 = np.asmatrix(Xp1)

q1 = bclf.predict(Xp1)
q1 = np.asmatrix(q1).reshape(-1,1)

C11 = np.where(q1 == 1)[0]
C21 = np.where(q1 == 2)[0]
C31 = np.where(q1 == 3)[0]

3. Gradient Boosting¶

prediction

$\bar{y} = \alpha \times \text{tree}_A + \beta \times \text{tree}_B + \gamma \times \text{tree}_C$

Objective function$ \Sigma L(y_i, \hat{y}_i) $

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

GB = GradientBoostingClassifier(learning_rate = 0.05, n_estimators = 300, subsample = 1.0, max_depth = 6)
GB.fit(X, np.ravel(y))

GradientBoostingClassifier(learning_rate=0.05, max_depth=6, n_estimators=300)

Xp2 = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp2 = np.asmatrix(Xp2)

q2 = GB.predict(Xp2)
q2 = np.asmatrix(q2).reshape(-1,1)

C12 = np.where(q2 == 1)[0]
C22 = np.where(q2 == 2)[0]
C32 = np.where(q2 == 3)[0]

4. Bagging vs Boosting¶

Comparing bagging and boosting

</br>

Plot results

plt.figure(figsize = (16, 5))

plt.subplot(1,3,1)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp[C1,0], Xp[C1,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp[C2,0], Xp[C2,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp[C3,0], Xp[C3,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Single Decision Tree', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])

plt.subplot(1,3,2)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp1[C11,0], Xp1[C11,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp1[C21,0], Xp1[C21,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp1[C31,0], Xp1[C31,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Bagging (Low variance)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])

plt.subplot(1,3,3)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp2[C12,0], Xp2[C12,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C22,0], Xp2[C22,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C32,0], Xp2[C32,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Boosting (Low bias)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()

5. XGBoost¶

$\hat{y} = \alpha \times \text{tree}_A + \beta \times \text{tree}_B + \gamma \times \text{tree}_C$

Objective function

Loss function + Regularization form

$ \Sigma L(y_i, \hat{y}_i) + \omega (f_t) $

Reduce overfitting (regularization form)

</br>

# ! pip install xgboost

import xgboost

# 'reg_lambda' is regularization form
XGB = xgboost.XGBClassifier(n_estimators = 300, learning_rate = 0.2, max_depth = 5, scoring = 'accuracy', reg_lambda = 0.05)

XGB.fit(X, np.ravel(y))

[11:56:20] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.4.0/src/learner.cc:573: 
Parameters: { "scoring" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


[11:56:20] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.4.0/src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'multi:softprob' was changed from 'merror' to 'mlogloss'. Explicitly set eval_metric if you'd like to restore the old behavior.

c:\users\seungchul\appdata\local\programs\python\python36\lib\site-packages\xgboost\sklearn.py:1146: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].
  warnings.warn(label_encoder_deprecation_msg, UserWarning)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.2, max_delta_step=0, max_depth=5,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=300, n_jobs=6, num_parallel_tree=1,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=0.05, scale_pos_weight=None, scoring='accuracy',
              subsample=1, tree_method='exact', validate_parameters=1,
              verbosity=None)

Xp3 = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp3 = np.asmatrix(Xp3)

q3 = XGB.predict(Xp3)
q3 = np.asmatrix(q3).reshape(-1,1)

C13 = np.where(q3 == 1)[0]
C23 = np.where(q3 == 2)[0]
C33 = np.where(q3 == 3)[0]

plt.figure(figsize = (16, 5))

plt.subplot(1,2,1)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp2[C12,0], Xp2[C12,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C22,0], Xp2[C22,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C32,0], Xp2[C32,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Boosting (Overfitting)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])

plt.subplot(1,2,2)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp3[C13,0], Xp3[C13,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp3[C23,0], Xp3[C23,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp3[C33,0], Xp3[C33,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('XGBoosting (Reduce overfitting)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()

6. Video Lectures¶

%%html
<center><iframe src="https://www.youtube.com/embed/9R7tee22XLA?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>

%%html
<center><iframe src="https://www.youtube.com/embed/eOgSBLIAKLY?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>

%%html
<center><iframe src="https://www.youtube.com/embed/3LCnnFoduAo?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>

%%html
<center><iframe src="https://www.youtube.com/embed/7AsL5vaOIjg?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>

%%html
<center><iframe src="https://www.youtube.com/embed/7cS7pVIb878?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>

%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')