Ensemble Methods: Bagging and Boosting

By Prof. Seungchul Lee
Industrial AI Lab at POSTECH

# 1. Bagging¶

• Bagging = Bootstrap Aggregating
• Duplicate elements are allowed
• Ensemble model
• Voting or Regressiong

# 2. Boosting¶

• Weighting the error

$\hat{y}$ is prediction

$\text{error} = \hat{y}- y$
• Sequential model

• Learning prgress in order.

Simple classification example

import numpy as np
import matplotlib.pyplot as plt


Data generation

## generate three simulated clusters
mu1 = np.array([1, 7])
mu2 = np.array([3, 4])
mu3 = np.array([6, 5])

SIGMA1 = 0.9*np.array([[1, 1.5],
[1.5, 3]])
SIGMA2 = 0.6*np.array([[2, 0],
[0, 2]])
SIGMA3 = 0.8*np.array([[1, -1],
[-1, 2]])

m = 100

X1 = np.random.multivariate_normal(mu1, SIGMA1, m)
X2 = np.random.multivariate_normal(mu2, SIGMA2, m)
X3 = np.random.multivariate_normal(mu3, SIGMA3, m)

y1 = 1*np.ones([m,1])
y2 = 2*np.ones([m,1])
y3 = 3*np.ones([m,1])

plt.figure(figsize = (10, 8))
plt.title('Generated Data', fontsize = 15)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.xlabel('$X_1$', fontsize = 15)
plt.ylabel('$X_2$', fontsize = 15)
plt.legend(fontsize = 12)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()


Single Decision Tree vs Bagging

import sklearn
from sklearn.ensemble import BaggingClassifier


Dicision Tree

X = np.vstack([X1, X2, X3])
y = np.vstack([y1, y2, y3])

clf = sklearn.tree.DecisionTreeClassifier(criterion = 'entropy', max_depth = 2, random_state = 0, max_features = 1.0)
clf.fit(X,y)

DecisionTreeClassifier(criterion='entropy', max_depth=2, max_features=1.0,
random_state=0)
res = 0.5
[X1gr, X2gr] = np.meshgrid(np.arange(-2,10,res), np.arange(0,12,res))

Xp = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp = np.asmatrix(Xp)

q = clf.predict(Xp)
q = np.asmatrix(q).reshape(-1,1)

C1 = np.where(q == 1)[0]
C2 = np.where(q == 2)[0]
C3 = np.where(q == 3)[0]


Bagging

bclf = BaggingClassifier(base_estimator = None, n_estimators = 50, max_samples = 1.0, bootstrap = True)
bclf.fit(X, np.ravel(y))

BaggingClassifier(n_estimators=50)
Xp1 = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp1 = np.asmatrix(Xp1)

q1 = bclf.predict(Xp1)
q1 = np.asmatrix(q1).reshape(-1,1)

C11 = np.where(q1 == 1)[0]
C21 = np.where(q1 == 2)[0]
C31 = np.where(q1 == 3)[0]


• prediction
$\bar{y} = \alpha \times \text{tree}_A + \beta \times \text{tree}_B + \gamma \times \text{tree}_C$
• Objective function
$\Sigma L(y_i, \hat{y}_i)$
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

GB = GradientBoostingClassifier(learning_rate = 0.05, n_estimators = 300, subsample = 1.0, max_depth = 6)
GB.fit(X, np.ravel(y))

GradientBoostingClassifier(learning_rate=0.05, max_depth=6, n_estimators=300)
Xp2 = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp2 = np.asmatrix(Xp2)

q2 = GB.predict(Xp2)
q2 = np.asmatrix(q2).reshape(-1,1)

C12 = np.where(q2 == 1)[0]
C22 = np.where(q2 == 2)[0]
C32 = np.where(q2 == 3)[0]


# 4. Bagging vs Boosting¶

• Comparing bagging and boosting

Plot results

plt.figure(figsize = (16, 5))

plt.subplot(1,3,1)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp[C1,0], Xp[C1,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp[C2,0], Xp[C2,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp[C3,0], Xp[C3,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Single Decision Tree', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])

plt.subplot(1,3,2)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp1[C11,0], Xp1[C11,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp1[C21,0], Xp1[C21,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp1[C31,0], Xp1[C31,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Bagging (Low variance)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])

plt.subplot(1,3,3)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp2[C12,0], Xp2[C12,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C22,0], Xp2[C22,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C32,0], Xp2[C32,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Boosting (Low bias)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()


# 5. XGBoost¶

$\hat{y} = \alpha \times \text{tree}_A + \beta \times \text{tree}_B + \gamma \times \text{tree}_C$
• Objective function
• Loss function + Regularization form

$\Sigma L(y_i, \hat{y}_i) + \omega (f_t)$

• Reduce overfitting (regularization form)

</br>
# ! pip install xgboost

import xgboost

# 'reg_lambda' is regularization form
XGB = xgboost.XGBClassifier(n_estimators = 300, learning_rate = 0.2, max_depth = 5, scoring = 'accuracy', reg_lambda = 0.05)

XGB.fit(X, np.ravel(y))

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
importance_type='gain', interaction_constraints='',
learning_rate=0.2, max_delta_step=0, max_depth=5,
min_child_weight=1, missing=nan, monotone_constraints='()',
n_estimators=300, n_jobs=6, num_parallel_tree=1,
objective='multi:softprob', random_state=0, reg_alpha=0,
reg_lambda=0.05, scale_pos_weight=None, scoring='accuracy',
subsample=1, tree_method='exact', validate_parameters=1,
verbosity=None)
Xp3 = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp3 = np.asmatrix(Xp3)

q3 = XGB.predict(Xp3)
q3 = np.asmatrix(q3).reshape(-1,1)

C13 = np.where(q3 == 1)[0]
C23 = np.where(q3 == 2)[0]
C33 = np.where(q3 == 3)[0]

plt.figure(figsize = (16, 5))

plt.subplot(1,2,1)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp2[C12,0], Xp2[C12,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C22,0], Xp2[C22,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp2[C32,0], Xp2[C32,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('Boosting (Overfitting)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])

plt.subplot(1,2,2)
plt.plot(X1[:,0], X1[:,1], '.', label = 'C1')
plt.plot(X2[:,0], X2[:,1], '.', label = 'C2')
plt.plot(X3[:,0], X3[:,1], '.', label = 'C3')
plt.plot(Xp3[C13,0], Xp3[C13,1], 's', color = 'blue', markersize = 8, alpha = 0.1)
plt.plot(Xp3[C23,0], Xp3[C23,1], 's', color = 'orange', markersize = 8, alpha = 0.1)
plt.plot(Xp3[C33,0], Xp3[C33,1], 's', color = 'green', markersize = 8, alpha = 0.1)
plt.title('XGBoosting (Reduce overfitting)', fontsize = 20)
plt.xlabel('$X11$', fontsize = 15)
plt.ylabel('$X12$', fontsize = 15)
plt.legend(fontsize = 12, loc = 1)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.axis([-2, 10, 0, 12])
plt.show()