Classification
Table of Contents
%%html
<center><iframe src="https://www.youtube.com/embed/zQkJAjlXcAk?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/_i2ouy9w2AI?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/qFjWJHFgHYw?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
$$h(x) = \text{sign} \left(\left( \sum\limits_{i=1}^{d}\omega_ix_i \right)- \text{threshold} \right) = \text{sign}\left(\left( \sum\limits_{i=1}^{d}\omega_ix_i \right)+ \omega_0\right)$$
Hyperplane
$$
\begin{align*}
g\left(\vec p\right) = g\left(\vec q\right) = 0 \quad
& \Rightarrow \quad \omega_0 + \omega^T \vec p = \omega_0 + \omega^T \vec q = 0 \\
& \Rightarrow \quad \omega^T \left( \vec p- \vec q \right) = 0
\end{align*}
$$
How to find $\omega$
The perceptron implements
$$h(x) = \text{sign}\left( \omega^Tx \right)$$Given the training set
$$(x_1, y_1), (x_2, y_2), \cdots, (x_N, y_N) \quad \text{where } y_i \in \{-1,1\}$$1) pick a misclassified point
$$ \text{sign}\left(\omega^Tx_n \right) \neq y_n$$2) and update the weight vector
$$\omega \leftarrow \omega + y_nx_n$$Why perceptron updates work ?
Let's look at a misclassified positive example ($y_n = +1$)
Summary
$$g(x) = \omega_0 + \omega^Tx = \omega_0 + \omega_1x_1 + \omega_2x_2 = 0$$
$$
\begin{align*}
\omega &= \begin{bmatrix} \omega_0 \\ \omega_1 \\ \omega_2\end{bmatrix}\\ \\
x &= \begin{bmatrix} \left(x^{(1)}\right)^T \\ \left(x^{(2)}\right)^T \\ \left(x^{(3)}\right)^T\\ \vdots \\ \left(x^{(m)}\right)^T \end{bmatrix} = \begin{bmatrix} 1 & x_1^{(1)} & x_2^{(1)} \\ 1 & x_1^{(2)} & x_2^{(2)} \\ 1 & x_1^{(3)} & x_2^{(3)}\\\vdots & \vdots & \vdots \\ 1 & x_1^{(m)} & x_2^{(m)}\end{bmatrix}, \qquad
y = \begin{bmatrix}y^{(1)} \\ y^{(2)} \\ y^{(3)}\\ \vdots \\ y^{(m)} \end{bmatrix}
\end{align*}$$
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#training data gerneration
m = 100
x1 = 8*np.random.rand(m, 1)
x2 = 7*np.random.rand(m, 1) - 4
g = 0.8*x1 + x2 - 3
C1 = np.where(g >= 1)
C0 = np.where(g < -1)
print(C1)
C1 = np.where(g >= 1)[0]
C0 = np.where(g < -1)[0]
print(C1.shape)
print(C0.shape)
plt.figure(figsize=(10, 8))
plt.plot(x1[C1], x2[C1], 'ro', alpha = 0.4, label = 'C1')
plt.plot(x1[C0], x2[C0], 'bo', alpha = 0.4, label = 'C0')
plt.title('Linearly Separable Classes', fontsize = 15)
plt.legend(loc = 1, fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.show()
$$
\begin{align*}
x &= \begin{bmatrix} \left(x^{(1)}\right)^T \\ \left(x^{(2)}\right)^T \\ \left(x^{(3)}\right)^T\\ \vdots \\ \left(x^{(m)}\right)^T \end{bmatrix} = \begin{bmatrix} x_1^{(1)} & x_2^{(1)} \\ x_1^{(2)} & x_2^{(2)} \\ x_1^{(3)} & x_2^{(3)}\\ \vdots & \vdots \\ x_1^{(m)} & x_2^{(m)}\end{bmatrix} \qquad
y = \begin{bmatrix}y^{(1)} \\ y^{(2)} \\ y^{(3)}\\ \vdots \\ y^{(m)} \end{bmatrix}
\end{align*}$$
X1 = np.hstack([x1[C1], x2[C1]])
X0 = np.hstack([x1[C0], x2[C0]])
X = np.vstack([X1, X0])
y = np.vstack([np.ones([C1.shape[0],1]), -np.ones([C0.shape[0],1])])
from sklearn import linear_model
clf = linear_model.Perceptron(tol=1e-3)
clf.fit(X, np.ravel(y))
clf.predict([[3, -2]])
clf.predict([[6, 2]])
clf.coef_
clf.intercept_
w0 = clf.intercept_[0]
w1 = clf.coef_[0,0]
w2 = clf.coef_[0,1]
x1p = np.linspace(0,8,100).reshape(-1,1)
x2p = - w1/w2*x1p - w0/w2
plt.figure(figsize=(10, 8))
plt.plot(x1[C1], x2[C1], 'ro', alpha = 0.4, label = 'C1')
plt.plot(x1[C0], x2[C0], 'bo', alpha = 0.4, label = 'C0')
plt.plot(x1p, x2p, c = 'k', linewidth = 4, label = 'perceptron')
plt.xlim([0, 8])
plt.xlabel('$x_1$', fontsize = 15)
plt.ylabel('$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 15)
plt.show()
$$\omega = \begin{bmatrix}\omega_1 \\ \omega_2\end{bmatrix}, \, x = \begin{bmatrix}x_1\\x_2\end{bmatrix} \; \implies g(x) = \omega_0 + \omega^Tx = \omega_0 + \omega_1x_1 + \omega_2x_2 $$
$$ \; h = \frac{g(x)}{\lVert \omega \rVert} \implies\; \text{orthogonal signed distance from the line}$$
Perceptron: make use of sign of data
We want to use distance information of all data points $\rightarrow$ logistic regression
# plot a sigmoid function
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
z = np.linspace(-4,4,100)
s = 1/(1 + np.exp(-z))
plt.figure(figsize=(10,2))
plt.plot(z, s)
plt.xlim([-4, 4])
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.show()
Benefit of mapping via the logistic function
$$P\left(y = +1 \mid x\,;\omega\right) = \frac{1}{1+e^{-\omega^T x}} \;\; \in \; [0,1]$$
Often we do note care about predicting the label $y$
Rather, we want to predict the label probabilities $P\left(y \mid x\,;\omega\right)$
$$
\begin{align*}
\omega &= \begin{bmatrix} \omega_1 \\ \omega_2\end{bmatrix}, \qquad \omega_0, \qquad x = \begin{bmatrix} x_1 \\ x_2\end{bmatrix}\\ \\
X &= \begin{bmatrix} \left(x^{(1)}\right)^T \\ \left(x^{(2)}\right)^T \\ \left(x^{(3)}\right)^T \\ \vdots\end{bmatrix} = \begin{bmatrix} x_1^{(1)} & x_2^{(1)} \\ x_1^{(2)} & x_2^{(2)} \\ x_1^{(3)} & x_2^{(3)} \\ \vdots & \vdots \\\end{bmatrix}, \qquad y = \begin{bmatrix} y^{(1)}\\ y^{(2)} \\y^{(3)} \\ \vdots \end{bmatrix}
\end{align*}
$$
# data generation
m = 100
w = np.array([[-6], [2], [1]])
X = np.hstack([np.ones([m,1]), 4*np.random.rand(m,1), 4*np.random.rand(m,1)])
w = np.asmatrix(w)
X = np.asmatrix(X)
y = 1/(1 + np.exp(-X*w)) > 0.5
C1 = np.where(y == True)[0]
C0 = np.where(y == False)[0]
y = np.empty([m,1])
y[C1] = 1
y[C0] = 0
plt.figure(figsize = (10,8))
plt.plot(X[C1,1], X[C1,2], 'ro', alpha = 0.3, label = 'C1')
plt.plot(X[C0,1], X[C0,2], 'bo', alpha = 0.3, label = 'C0')
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.xlim([0,4])
plt.ylim([0,4])
plt.show()
X = X[:,1:3]
X.shape
from sklearn import linear_model
clf = linear_model.LogisticRegression(solver='lbfgs')
clf.fit(X,np.ravel(y))
clf.coef_
clf.intercept_
w0 = clf.intercept_[0]
w1 = clf.coef_[0,0]
w2 = clf.coef_[0,1]
xp = np.linspace(0,4,100).reshape(-1,1)
yp = - w1/w2*xp - w0/w2
plt.figure(figsize = (10,8))
plt.plot(X[C1,0], X[C1,1], 'ro', alpha = 0.3, label = 'C1')
plt.plot(X[C0,0], X[C0,1], 'bo', alpha = 0.3, label = 'C0')
plt.plot(xp, yp, 'g', linewidth = 4, label = 'Logistic Regression')
plt.title('Logistic Regression')
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.xlim([0,4])
plt.ylim([0,4])
plt.show()
%%html
<center><iframe
width="420" height="315" src="https://www.youtube.com/embed/3liCbRZPrZA?rel=0" frameborder="0" allowfullscreen>
</iframe></center>
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')