Using Scikit-Learn
Table of Contents
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# data points in column vector [input, output]
x = np.array([0.1, 0.4, 0.7, 1.2, 1.3, 1.7, 2.2, 2.8, 3.0, 4.0, 4.3, 4.4, 4.9]).reshape(-1, 1)
y = np.array([0.5, 0.9, 1.1, 1.5, 1.5, 2.0, 2.2, 2.8, 2.7, 3.0, 3.5, 3.7, 3.9]).reshape(-1, 1)
# to plot
plt.figure(figsize=(10, 6))
plt.title('Linear Regression', fontsize=15)
plt.xlabel('X', fontsize=15)
plt.ylabel('Y', fontsize=15)
plt.plot(x, y, 'ko', label="data")
plt.xlim([0, 5])
plt.grid(alpha=0.3)
plt.axis('scaled')
plt.show()
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(x,y)
print(reg.coef_) # Coef
print(reg.intercept_) # Bias
# to plot
plt.figure(figsize=(10, 6))
plt.title('Linear Regression', fontsize=15)
plt.xlabel('X', fontsize=15)
plt.ylabel('Y', fontsize=15)
plt.plot(x, y, 'ko', label="data")
# to plot a straight line (fitted line)
xp = np.arange(0, 5, 0.01).reshape(-1, 1)
yp = reg.coef_*xp + reg.intercept_
plt.plot(xp, yp, 'r', linewidth=2, label="$L_2$")
plt.legend(fontsize=15)
plt.axis('scaled')
plt.grid(alpha=0.3)
plt.xlim([0, 5])
plt.show()
n = 100
x = -5 + 15*np.random.rand(n, 1)
noise = 10*np.random.randn(n, 1)
y = 10 + 1*x + 2*x**2 + noise
plt.figure(figsize=(10, 6))
plt.title('Nonlinear Regression', fontsize=15)
plt.xlabel('X', fontsize=15)
plt.ylabel('Y', fontsize=15)
plt.plot(x, y, 'o', markersize=4, label='actual')
plt.xlim([np.min(x), np.max(x)])
plt.grid(alpha=0.3)
plt.legend(fontsize=15)
plt.show()
from sklearn.kernel_ridge import KernelRidge
reg = KernelRidge(kernel='rbf', gamma=0.1)
reg.fit(x, y)
p = reg.predict(x)
plt.figure(figsize=(10, 6))
plt.title('Nonlinear Regression', fontsize=15)
plt.xlabel('X', fontsize=15)
plt.ylabel('Y', fontsize=15)
plt.plot(x, y, 'o', markersize=4, label='actual')
plt.plot(x, p, 'ro', markersize=4, label='predict')
plt.grid(alpha=0.3)
plt.legend(fontsize=15)
plt.xlim([np.min(x), np.max(x)])
plt.show()
x1 = 8*np.random.rand(100, 1)
x2 = 7*np.random.rand(100, 1) - 4
g0 = 0.8*x1 + x2 - 3
g1 = g0 - 1
g2 = g0 + 1
C1 = np.where(g1 >= 0)[0]
C2 = np.where(g2 < 0)[0]
X1 = np.hstack([x1[C1],x2[C1]])
X2 = np.hstack([x1[C2],x2[C2]])
n = X1.shape[0]
m = X2.shape[0]
X = np.vstack([X1, X2])
y = np.vstack([np.zeros([n, 1]), np.ones([m, 1])])
plt.figure(figsize=(10, 6))
plt.plot(x1[C1], x2[C1], 'ro', label='C1')
plt.plot(x1[C2], x2[C2], 'bo', label='C2')
plt.xlabel('$x_1$', fontsize = 20)
plt.ylabel('$x_2$', fontsize = 20)
plt.legend(loc = 4)
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
from sklearn.svm import SVC
clf = SVC(kernel='linear')
clf.fit(X, np.ravel(y))
print(clf.coef_)
print(clf.intercept_)
xp = np.linspace(0,8,100).reshape(-1,1)
yp = -clf.coef_[0,0]/clf.coef_[0,1]*xp - clf.intercept_/clf.coef_[0,1]
plt.figure(figsize=(10, 6))
plt.plot(X[0:n, 0], X[0:n, 1], 'ro', label='C1')
plt.plot(X[n:-1, 0], X[n:-1, 1], 'bo', label='C2')
plt.plot(xp, yp, '--k', label='SVM')
plt.xlabel('$x_1$', fontsize = 20)
plt.ylabel('$x_2$', fontsize = 20)
plt.legend(loc = 4)
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
m = 500
X0 = np.random.multivariate_normal([0, 0], np.eye(2), m)
X1 = np.random.multivariate_normal([10, 10], np.eye(2), m)
X = np.vstack([X0, X1])
y = np.vstack([np.zeros([m,1]), np.ones([m,1])])
plt.figure(figsize=(10, 6))
plt.plot(X0[:,0], X0[:,1], '.b', label='Class 0')
plt.plot(X1[:,0], X1[:,1], '.k', label='Class 1')
plt.title('Data Classes', fontsize=15)
plt.legend(loc='lower right', fontsize=15)
plt.xlabel('X1', fontsize=15)
plt.ylabel('X2', fontsize=15)
plt.xlim([-10,20])
plt.ylim([-4,14])
plt.grid(alpha=0.3)
plt.show()
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(X, np.ravel(y))
print(clf.coef_)
print(clf.intercept_)
xp = np.linspace(-10,20,100).reshape(-1,1)
yp = -clf.coef_[0,0]/clf.coef_[0,1]*xp - clf.intercept_/clf.coef_[0,1]
plt.figure(figsize=(10, 6))
plt.plot(X0[:,0], X0[:,1], '.b', label='Class 0')
plt.plot(X1[:,0], X1[:,1], '.k', label='Class 1')
plt.plot(xp, yp, '--k', label='Logistic')
plt.xlim([-10,20])
plt.ylim([-4,14])
plt.title('Data Classes', fontsize=15)
plt.legend(loc='lower right', fontsize=15)
plt.xlabel('X1', fontsize=15)
plt.ylabel('X2', fontsize=15)
plt.grid(alpha=0.3)
plt.show()
pred = clf.predict_proba([[0,6]])
pred
m = 200
X0 = np.random.multivariate_normal([-1, 1], np.eye(2), m)
X1 = np.random.multivariate_normal([15, 10], np.eye(2), m)
X2 = np.random.multivariate_normal([0, 6], np.eye(2), m)
X = np.vstack([X0, X1, X2])
plt.figure(figsize=(10, 6))
plt.plot(X[:,0], X[:,1], '.b')
plt.xlim([-10,20])
plt.ylim([-4,14])
plt.grid(alpha=0.3)
plt.show()
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters = 3, random_state = 0)
kmeans.fit(X)
print(kmeans.labels_)
plt.figure(figsize=(10,6))
plt.plot(X[kmeans.labels_ == 0,0],X[kmeans.labels_ == 0,1],'g.', label=0)
plt.plot(X[kmeans.labels_ == 1,0],X[kmeans.labels_ == 1,1],'k.', label=1)
plt.plot(X[kmeans.labels_ == 2,0],X[kmeans.labels_ == 2,1],'r.', label=2)
plt.xlim([-10,20])
plt.ylim([-4,14])
plt.grid(alpha=0.3)
plt.legend(loc='lower right', fontsize=15)
plt.show()
m = 5000
mu = np.array([0, 0])
sigma = np.array([[3, 1.5],
[1.5, 1]])
X = np.random.multivariate_normal(mu, sigma, m)
fig = plt.figure(figsize=(10, 6))
plt.plot(X[:,0], X[:,1], 'k.')
plt.axis('equal')
plt.show()
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(X)
plt.figure()
plt.stem(range(1,3),pca.explained_variance_ratio_)
plt.xlim([0.5, 2.5])
plt.ylim([0, 1])
plt.title('Score (%)')
plt.show()
principal_axis = pca.components_[0, :]
u1 = principal_axis/(np.linalg.norm(principal_axis))
h = u1[1]/u1[0]
xp = np.linspace(-6,6,200)
yp = xp.dot(h)
plt.figure(figsize=(10,6))
plt.plot(X[:, 0], X[:, 1],'k.')
plt.plot(xp, yp, 'r.')
plt.axis('equal')
plt.show()
n0 = 200
n1 = 200
mu = [0, 0]
sigma = [[0.9, -0.4],
[-0.4, 0.3]]
x0 = np.random.multivariate_normal([2.5,2.5], sigma, n0) # data in class 0
x1 = np.random.multivariate_normal([1,1], sigma, n1) # data in class 0
X = np.vstack([x0, x1])
y = np.vstack([np.zeros([n0, 1]), np.ones([n1, 1])])
plt.figure(figsize = (10, 6))
plt.plot(x0[:,0],x0[:,1],'r.')
plt.plot(x1[:,0],x1[:,1],'b.')
plt.axis('equal')
plt.ylim([-2, 6])
plt.xlim([-4, 8])
plt.show()
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis()
lda.fit(X, np.ravel(y))
print(lda.coef_)
print(lda.intercept_)
xp = np.linspace(-4,8,100).reshape(-1,1)
yp = -lda.coef_[0,0]/lda.coef_[0,1]*xp - lda.intercept_/lda.coef_[0,1]
projection_line = lda.coef_[0,1]/lda.coef_[0,0]*xp
plt.figure(figsize = (10, 6))
plt.plot(x0[:,0],x0[:,1],'r.')
plt.plot(x1[:,0],x1[:,1],'b.')
plt.plot(xp, yp, '--k', label='Decision Boundary')
plt.plot(xp, projection_line, 'k', label='Projection Line')
plt.axis('equal')
plt.ylim([-2, 6])
plt.xlim([-4, 8])
plt.legend(loc='lower right', fontsize=15)
plt.show()
X_new = lda.transform(X)
plt.figure(figsize = (10, 6))
plt.hist(X_new[0:200], 21, color='r', rwidth=0.5)
plt.hist(X_new[200:-1], 21, color='b', rwidth=0.5)
plt.show()
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')