Ensemble Methods: Bagging and Boosting

By Prof. Seungchul Lee
http://iai.postech.ac.kr/
Industrial AI Lab at POSTECH

# 1. Bagging¶

• Bagging = Bootstrap Aggregating
• Duplicate elements are allowed
• Ensemble model
• Voting or Regressiong

# 2. Boosting¶

• Weighting the error

$\hat{y}$ is prediction

$\text{error} = \hat{y}- y$
• Sequential model

• Learning prgress in order.

Simple classification example

In [1]:
import numpy as np
import matplotlib.pyplot as plt


Data generation

In [2]:
%matplotlib inline
## generate three simulated clusters
mu1 = np.array([1, 7])
mu2 = np.array([3, 4])
mu3 = np.array([6, 5])

SIGMA1 = 0.9*np.array([[1, 1.5],
[1.5, 3]])
SIGMA2 = 0.6*np.array([[2, 0],
[0, 2]])
SIGMA3 = 0.8*np.array([[1, -1],
[-1, 2]])

m = 100

X1 = np.random.multivariate_normal(mu1, SIGMA1, m)
X2 = np.random.multivariate_normal(mu2, SIGMA2, m)
X3 = np.random.multivariate_normal(mu3, SIGMA3, m)

y1 = 1*np.ones([m,1])
y2 = 2*np.ones([m,1])
y3 = 3*np.ones([m,1])

plt.figure(figsize = (10, 8))
plt.title('Generated Data', fontsize = 15)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.xlabel('$X_1$', fontsize = 15)
plt.ylabel('$X_2$', fontsize = 15)
plt.legend(fontsize = 12)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()


Single Decision Tree vs Bagging

In [3]:
import sklearn
from sklearn.ensemble import BaggingClassifier


Dicision Tree

In [4]:
X = np.vstack([X1, X2, X3])
y = np.vstack([y1, y2, y3])

clf = sklearn.tree.DecisionTreeClassifier(criterion = 'entropy', max_depth = 2, random_state = 0, max_features = 1.0)
clf.fit(X,y)

Out[4]:
DecisionTreeClassifier(criterion='entropy', max_depth=2, max_features=1.0,
random_state=0)
In [5]:
res = 0.5
[X1gr, X2gr] = np.meshgrid(np.arange(-2,10,res), np.arange(0,12,res))

In [6]:
Xp = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp = np.asmatrix(Xp)

q = clf.predict(Xp)
q = np.asmatrix(q).reshape(-1,1)

C1 = np.where(q == 1)[0]
C2 = np.where(q == 2)[0]
C3 = np.where(q == 3)[0]


Bagging

In [7]:
bclf = BaggingClassifier(base_estimator = None, n_estimators = 50, max_samples = 1.0, bootstrap = True)
bclf.fit(X, np.ravel(y))

Out[7]:
BaggingClassifier(n_estimators=50)
In [8]:
Xp1 = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp1 = np.asmatrix(Xp1)

q1 = bclf.predict(Xp1)
q1 = np.asmatrix(q1).reshape(-1,1)

C11 = np.where(q1 == 1)[0]
C21 = np.where(q1 == 2)[0]
C31 = np.where(q1 == 3)[0]


• prediction
$\bar{y} = \alpha \times \text{tree}_A + \beta \times \text{tree}_B + \gamma \times \text{tree}_C$
• Objective function
$\Sigma L(y_i, \hat{y}_i)$
In [9]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

GB = GradientBoostingClassifier(learning_rate = 0.05, n_estimators = 300, subsample = 1.0, max_depth = 6)
GB.fit(X, np.ravel(y))

Out[9]:
GradientBoostingClassifier(learning_rate=0.05, max_depth=6, n_estimators=300)
In [10]:
Xp2 = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp2 = np.asmatrix(Xp2)

q2 = GB.predict(Xp2)
q2 = np.asmatrix(q2).reshape(-1,1)

C12 = np.where(q2 == 1)[0]
C22 = np.where(q2 == 2)[0]
C32 = np.where(q2 == 3)[0]


# 4. Bagging vs Boosting¶

• Comparing bagging and boosting

</br>

Plot results

In [11]:
plt.figure(figsize = (16, 5))

plt.subplot(1,3,1)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp[C1,0], Xp[C1,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp[C2,0], Xp[C2,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp[C3,0], Xp[C3,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Single Decision Tree', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])

plt.subplot(1,3,2)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp1[C11,0], Xp1[C11,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp1[C21,0], Xp1[C21,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp1[C31,0], Xp1[C31,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Bagging (Low variance)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])

plt.subplot(1,3,3)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp2[C12,0], Xp2[C12,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C22,0], Xp2[C22,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C32,0], Xp2[C32,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Boosting (Low bias)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()


# 5. XGBoost¶

$\hat{y} = \alpha \times \text{tree}_A + \beta \times \text{tree}_B + \gamma \times \text{tree}_C$
• Objective function
• Loss function + Regularization form

$\Sigma L(y_i, \hat{y}_i) + \omega (f_t)$

• Reduce overfitting (regularization form)

</br>
In [12]:
# ! pip install xgboost

In [13]:
import xgboost

# 'reg_lambda' is regularization form
XGB = xgboost.XGBClassifier(n_estimators = 300, learning_rate = 0.2, max_depth = 5, scoring = 'accuracy', reg_lambda = 0.05)

XGB.fit(X, np.ravel(y))

[11:56:20] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.4.0/src/learner.cc:573:
Parameters: { "scoring" } might not be used.

This may not be accurate due to some parameters are only used in language bindings but
passed down to XGBoost core.  Or some parameters are not used but slip through this
verification. Please open an issue if you find above cases.

[11:56:20] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.4.0/src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'multi:softprob' was changed from 'merror' to 'mlogloss'. Explicitly set eval_metric if you'd like to restore the old behavior.

c:\users\seungchul\appdata\local\programs\python\python36\lib\site-packages\xgboost\sklearn.py:1146: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].
warnings.warn(label_encoder_deprecation_msg, UserWarning)

Out[13]:
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
importance_type='gain', interaction_constraints='',
learning_rate=0.2, max_delta_step=0, max_depth=5,
min_child_weight=1, missing=nan, monotone_constraints='()',
n_estimators=300, n_jobs=6, num_parallel_tree=1,
objective='multi:softprob', random_state=0, reg_alpha=0,
reg_lambda=0.05, scale_pos_weight=None, scoring='accuracy',
subsample=1, tree_method='exact', validate_parameters=1,
verbosity=None)
In [14]:
Xp3 = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp3 = np.asmatrix(Xp3)

q3 = XGB.predict(Xp3)
q3 = np.asmatrix(q3).reshape(-1,1)

C13 = np.where(q3 == 1)[0]
C23 = np.where(q3 == 2)[0]
C33 = np.where(q3 == 3)[0]

In [15]:
plt.figure(figsize = (16, 5))

plt.subplot(1,2,1)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp2[C12,0], Xp2[C12,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C22,0], Xp2[C22,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C32,0], Xp2[C32,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Boosting (Overfitting)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])

plt.subplot(1,2,2)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp3[C13,0], Xp3[C13,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp3[C23,0], Xp3[C23,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp3[C33,0], Xp3[C33,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('XGBoosting (Reduce overfitting)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()