Classification: Support Vector Machine
Table of Contents
Number of categories / classes
Binary: 2 different classes
Multiclass: more than 2 classes
Feature
$$\omega = \begin{bmatrix}\omega_1 \\ \omega_2\end{bmatrix}, \, x = \begin{bmatrix}x_1\\x_2\end{bmatrix} \; \implies g(x) = \omega_0 + \omega^Tx = \omega_0 + \omega_1x_1 + \omega_2x_2 $$
$$
\begin{align*}
g\left(\vec p\right) = g\left(\vec q\right) = 0 \quad
& \Rightarrow \quad \omega_0 + \omega^T \vec p = \omega_0 + \omega^T \vec q = 0 \\
& \Rightarrow \quad \omega^T \left( \vec p- \vec q \right) = 0
\end{align*}
$$
$$
\begin{align*}
& \therefore \, \omega : \text{normal to the line (orthogonal)} \\
& \implies \text{tells the direction of the line}
\end{align*}$$
$$ \begin{align*} g(x)& = \omega_0 + \omega^Tx = 0 \; \\
& \Rightarrow \omega_0 + \omega^Td\frac{\omega}{\lVert \omega \rVert} = \omega_0 + d\frac{\omega^T\omega}{\lVert \omega \rVert} = \omega_0 + d\lVert \omega \rVert = 0 \\\\
& \therefore d \, = - \frac{\omega_0}{\lVert \omega \rVert}
\end{align*}$$
$$ \omega^Tx = \omega^T \left( x_{\perp} + r \frac{\omega}{\lVert \omega \rVert}\right) = r \frac{\omega^T\omega}{\lVert \omega \rVert} = r \lVert \omega \rVert$$
$$ \begin{align*} g(x)
& = \omega_0 + \omega^Tx \\
& = \omega_0 + r \lVert \omega \rVert \qquad (r = d + h) \\
& = \omega_0 + (d +h) \lVert \omega \rVert\\
& = \omega_0 + \left(- \frac{\omega_0}{\lVert \omega \rVert} + h \right)\lVert \omega \rVert\\
& = h \lVert \omega \rVert
\end{align*}$$
$$\therefore \; h = \frac{g(x)}{\lVert \omega \rVert} \implies\; \text{orthogonal signed distance from the line}$$
Another method to find a distance between $g(x) = -1$ and $g(x) = 1$
Suppose $g(x_1) = -1,\; g(x_2) = 1$
$$ \begin{array}{I}
\omega_0 + \omega^Tx_1 = -1\\
\omega_0 + \omega^Tx_2 = 1\\
\end{array}
\; \implies \; \omega^T(x_2 - x_1) = 2$$
$$s = \langle\frac{\omega}{\lVert \omega \rVert}, x_2 - x_1 \rangle = \frac{1}{\lVert \omega \rVert}\omega^T(x_2 - x_1) = \frac {2}{\lVert \omega \rVert}$$
$$x = \begin{bmatrix}x_1 \\ x_2 \end{bmatrix}$$
$$
\begin{align*}
\omega_0 + \omega_1x_1 + \omega_2x_2 &= 0 \\\\
\omega_0 + \begin{bmatrix}\omega_1 & \omega_2 \end{bmatrix}
\begin{bmatrix}x_1 \\ x_2 \end{bmatrix} &= 0\\\\
\omega_0 + \omega^Tx &= 0
\end{align*}
$$
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
#training data gerneration
x1 = 8*np.random.rand(100, 1)
x2 = 7*np.random.rand(100, 1) - 4
g = 0.8*x1 + x2 - 3
g1 = g - 1
g0 = g + 1
C1 = np.where(g1 >= 0)[0]
C0 = np.where(g0 < 0)[0]
xp = np.linspace(-1,9,100).reshape(-1,1)
ypt = -0.8*xp + 3
plt.figure(figsize=(10, 8))
plt.plot(x1[C1], x2[C1], 'ro', alpha = 0.4, label = 'C1')
plt.plot(x1[C0], x2[C0], 'bo', alpha = 0.4, label = 'C0')
plt.plot(xp, ypt, 'k', linewidth = 3, label = 'True')
plt.title('Linearly and Strictly Separable Classes', fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
Find $\omega$ and $\omega_0$ such that $x$ given $\omega_0 + \omega^Tx = 0$
or
$$
\begin{align*}
&\omega_0 + \omega^Tx > b, \quad (b>0)\\ \\
\Longleftrightarrow \;&\frac{\omega_0}{b} + \frac{\omega^T}{b}x > 1 \\\\
\Longleftrightarrow \;&\omega'_0 + \omega'^Tx > 1
\end{align*}$$
# see how data are generated
xp = np.linspace(-1,9,100).reshape(-1,1)
ypt = -0.8*xp + 3
plt.figure(figsize=(10, 8))
plt.plot(x1[C1], x2[C1], 'ro', alpha = 0.4, label = 'C1')
plt.plot(x1[C0], x2[C0], 'bo', alpha = 0.4, label = 'C0')
plt.plot(xp, ypt, 'k', linewidth = 3, label = 'True')
plt.plot(xp, ypt-1, '--k')
plt.plot(xp, ypt+1, '--k')
plt.title('Linearly and Strictly Separable Classes', fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
$n \;(=2)$ features
$m = N + M$ data points in training set
$$ x^{(i)} = \begin{bmatrix} x^{(i)}_1 \\ x^{(i)}_2 \end{bmatrix} \;\text{with}\; \omega = \begin{bmatrix} \omega_1 \\ \omega_2 \end{bmatrix}\qquad \text{or} \qquad x^{(i)} = \begin{bmatrix} 1 \\ x^{(i)}_1 \\ x^{(i)}_2 \end{bmatrix} \;\; \text{with}\; \omega = \begin{bmatrix} \omega_0 \\ \omega_1 \\ \omega_2 \end{bmatrix}$$
$N$ belongs to $C_1$ in training set
$M$ belongs to $C_0$ in training set
Code (CVXPY)
Form 1
$$\begin{align*} \text{minimize} \quad & \text{something} \\ \text{subject to} \quad & \omega_0 + X_1\omega \geq 1 \\ & \omega_0 + X_0\omega \leq -1 \end{align*}$$# CVXPY using simple classification
import cvxpy as cvx
X1 = np.hstack([x1[C1], x2[C1]])
X0 = np.hstack([x1[C0], x2[C0]])
X1 = np.asmatrix(X1)
X0 = np.asmatrix(X0)
N = X1.shape[0]
M = X0.shape[0]
w0 = cvx.Variable([1,1])
w = cvx.Variable([2,1])
obj = cvx.Minimize(1)
const = [w0 + X1*w >= 1, w0 + X0*w <= -1]
prob = cvx.Problem(obj, const).solve()
w0 = w0.value
w = w.value
xp = np.linspace(-1,9,100).reshape(-1,1)
yp = - w[0,0]/w[1,0]*xp - w0/w[1,0]
plt.figure(figsize=(10, 8))
plt.plot(x1[C1], x2[C1], 'ro', alpha = 0.4, label = 'C1')
plt.plot(x1[C0], x2[C0], 'bo', alpha = 0.4, label = 'C0')
plt.plot(xp, ypt, 'k', alpha = 0.3, label = 'True')
plt.plot(xp, ypt-1, '--k', alpha = 0.3)
plt.plot(xp, ypt+1, '--k', alpha = 0.3)
plt.plot(xp, yp, 'g', linewidth = 3, label = 'Attempt 1')
plt.title('Linearly and Strictly Separable Classes', fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1)
plt.axis('equal')
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
Form 2
$$\begin{align*} \text{minimize} \quad & \text{something} \\ \text{subject to} \quad & X_1\omega \geq 1 \\ & X_0\omega \leq -1 \end{align*}$$import cvxpy as cvx
N = C1.shape[0]
M = C0.shape[0]
X1 = np.hstack([np.ones([N,1]), x1[C1], x2[C1]])
X0 = np.hstack([np.ones([M,1]), x1[C0], x2[C0]])
X1 = np.asmatrix(X1)
X0 = np.asmatrix(X0)
w = cvx.Variable([3,1])
obj = cvx.Minimize(1)
const = [X1*w >= 1, X0*w <= -1]
prob = cvx.Problem(obj, const).solve()
w = w.value
xp = np.linspace(-1,9,100).reshape(-1,1)
yp = - w[1,0]/w[2,0]*xp - w[0,0]/w[2,0]
plt.figure(figsize=(10, 8))
plt.plot(x1[C1], x2[C1], 'ro', alpha = 0.4, label = 'C1')
plt.plot(x1[C0], x2[C0], 'bo', alpha = 0.4, label = 'C0')
plt.plot(xp, ypt, 'k', alpha = 0.3, label = 'True')
plt.plot(xp, ypt-1, '--k', alpha = 0.3)
plt.plot(xp, ypt+1, '--k', alpha = 0.3)
plt.plot(xp, yp, 'g', linewidth = 3, label = 'Attempt 1')
plt.title('Linearly and Strictly Separable Classes', fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
X1 = np.hstack([np.ones([N,1]), x1[C1], x2[C1]])
X0 = np.hstack([np.ones([M,1]), x1[C0], x2[C0]])
outlier = np.array([1, 3, 2]).reshape(1,-1)
X0 = np.vstack([X0, outlier])
X1 = np.asmatrix(X1)
X0 = np.asmatrix(X0)
plt.figure(figsize=(10,8))
plt.plot(X1[:,1], X1[:,2], 'ro', alpha = 0.4, label = 'C1')
plt.plot(X0[:,1], X0[:,2], 'bo', alpha = 0.4, label = 'C0')
plt.title('When Outliers Exist', fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
w = cvx.Variable([3,1])
obj = cvx.Minimize(1)
const = [X1*w >= 1, X0*w <= -1]
prob = cvx.Problem(obj, const).solve()
print(w.value)
$n \;(=2)$ features
$m = N+M$ data points in a training set
$N$ belongs to $C_1$ in training set
$M$ belongs to $C_0$ in training set
For the non-separable case, we relex the above constraints
Need slack variables $u$ and $\upsilon$ where all are positive
The optimization problem for the non-separable case
$$\begin{align*} \text{minimize} \quad & \sum\limits_{i=1}^{N}u_i + \sum\limits_{i=1}^{M}\upsilon_i \\ \\
\text{subject to} \quad
& \begin{cases}
\omega^Tx^{(1)} \geq 1-u_1\\
\omega^Tx^{(2)} \geq 1-u_2\\
\quad \quad \vdots \\
\omega^Tx^{(N)} \geq 1-u_N\\
\end{cases} \\\\
& \begin{cases}
\omega^Tx^{(N+1)} \leq {-(1-\upsilon_1)}\\
\omega^Tx^{(N+2)} \leq {-(1-\upsilon_2)}\\
\quad \quad \vdots \\
\omega^Tx^{(N+M)} \leq {-(1-\upsilon_M)}\\
\end{cases} \\\\
& \begin{cases}
u \geq 0\\
v \geq 0\\
\end{cases}
\end{align*}$$
X1 = np.hstack([np.ones([N,1]), x1[C1], x2[C1]])
X0 = np.hstack([np.ones([M,1]), x1[C0], x2[C0]])
outlier = np.array([1, 2, 2]).reshape(1,-1)
X0 = np.vstack([X0, outlier])
X1 = np.asmatrix(X1)
X0 = np.asmatrix(X0)
N = X1.shape[0]
M = X0.shape[0]
w = cvx.Variable([3,1])
u = cvx.Variable([N,1])
v = cvx.Variable([M,1])
obj = cvx.Minimize(np.ones((1,N))*u + np.ones((1,M))*v)
const = [X1*w >= 1-u, X0*w <= -(1-v), u >= 0, v >= 0 ]
prob = cvx.Problem(obj, const).solve()
w = w.value
xp = np.linspace(-1,9,100).reshape(-1,1)
yp = - w[1,0]/w[2,0]*xp - w[0,0]/w[2,0]
plt.figure(figsize=(10, 8))
plt.plot(X1[:,1], X1[:,2], 'ro', alpha = 0.4, label = 'C1')
plt.plot(X0[:,1], X0[:,2], 'bo', alpha = 0.4, label = 'C0')
plt.plot(xp, ypt, 'k', alpha = 0.1, label = 'True')
plt.plot(xp, ypt-1, '--k', alpha = 0.1)
plt.plot(xp, ypt+1, '--k', alpha = 0.1)
plt.plot(xp, yp, 'g', linewidth = 3, label = 'Attempt 2')
plt.plot(xp, yp-1/w[2,0], '--g')
plt.plot(xp, yp+1/w[2,0], '--g')
plt.title('When Outliers Exist', fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
Further improvement
$$\begin{align*} \text{minimize} \quad
& \lVert \omega \rVert_2 + \gamma(1^Tu + 1^T\upsilon) \\
\text{subject to} \quad
& X_1\omega \geq 1-u \\
& X_0\omega \leq -(1-\upsilon) \\
& u \geq 0 \\
& \upsilon \geq 0
\end{align*}$$
g = 2
w = cvx.Variable([3,1])
u = cvx.Variable([N,1])
v = cvx.Variable([M,1])
obj = cvx.Minimize(cvx.norm(w,2) + g*(np.ones((1,N))*u + np.ones((1,M))*v))
const = [X1*w >= 1-u, X0*w <= -(1-v), u >= 0, v >= 0 ]
prob = cvx.Problem(obj, const).solve()
w = w.value
xp = np.linspace(-1,9,100).reshape(-1,1)
yp = - w[1,0]/w[2,0]*xp - w[0,0]/w[2,0]
plt.figure(figsize=(10, 8))
plt.plot(X1[:,1], X1[:,2], 'ro', alpha = 0.4, label = 'C1')
plt.plot(X0[:,1], X0[:,2], 'bo', alpha = 0.4, label = 'C0')
plt.plot(xp, ypt, 'k', alpha = 0.1, label = 'True')
plt.plot(xp, ypt-1, '--k', alpha = 0.1)
plt.plot(xp, ypt+1, '--k', alpha = 0.1)
plt.plot(xp, yp, 'g', linewidth = 3, label = 'SVM')
plt.plot(xp, yp-1/w[2,0], '--g')
plt.plot(xp, yp+1/w[2,0], '--g')
plt.title('When Outliers Exist', fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
$$\begin{align*} \text{minimize} \quad
& \lVert \omega \rVert_2 + \gamma(1^Tu + 1^T\upsilon) \\
\text{subject to} \quad
& X_1\omega \geq 1-u \\
& X_0\omega \leq -(1-\upsilon) \\
& u \geq 0 \\
& \nu \geq 0
\end{align*}$$
$$
\begin{align*}
\omega^T x_n &\geq 1 \;\text{for }\; y_n = +1 \\
\omega^T x_n &\leq -1 \;\text{for }\; y_n = -1
\end{align*}
\Longleftrightarrow y_n \cdot \left( \omega^T x_n \right) \geq 1
$$
$$\begin{align*} \text{minimize} \quad
& \lVert \omega \rVert_2 + \gamma(1^T \xi) \\
\text{subject to} \quad
& y_n \cdot \left( \omega^T x_n \right) \geq 1 - \xi_n \\
& \xi \geq 0 \\
\end{align*}$$
X = np.vstack([X1, X0])
y = np.vstack([np.ones([N,1]), -np.ones([M,1])])
m = N + M
g = 2
w = cvx.Variable([3,1])
d = cvx.Variable([m,1])
obj = cvx.Minimize(cvx.norm(w,2) + g*(np.ones([1,m])*d))
const = [cvx.multiply(y, X*w) >= 1-d, d >= 0]
prob = cvx.Problem(obj, const).solve()
w = w.value
xp = np.linspace(-1,9,100).reshape(-1,1)
yp = - w[1,0]/w[2,0]*xp - w[0,0]/w[2,0]
plt.figure(figsize=(10, 8))
plt.plot(X1[:,1], X1[:,2], 'ro', alpha = 0.4, label = 'C1')
plt.plot(X0[:,1], X0[:,2], 'bo', alpha = 0.4, label = 'C0')
plt.plot(xp, ypt, 'k', alpha = 0.1, label = 'True')
plt.plot(xp, ypt-1, '--k', alpha = 0.1)
plt.plot(xp, ypt+1, '--k', alpha = 0.1)
plt.plot(xp, yp, 'g', linewidth = 3, label = 'SVM')
plt.plot(xp, yp-1/w[2,0], '--g')
plt.plot(xp, yp+1/w[2,0], '--g')
plt.title('Support Vector Machine (SVM)', fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.xlim([0, 8])
plt.ylim([-4, 3])
plt.show()
%%html
<center><iframe
width="420" height="315" src="https://www.youtube.com/embed/3liCbRZPrZA?rel=0" frameborder="0" allowfullscreen>
</iframe></center>
X1 = np.array([[-1.1,0],[-0.3,0.1],[-0.9,1],[0.8,0.4],[0.4,0.9],[0.3,-0.6],
[-0.5,0.3],[-0.8,0.6],[-0.5,-0.5]])
X0 = np.array([[-1,-1.3], [-1.6,2.2],[0.9,-0.7],[1.6,0.5],[1.8,-1.1],[1.6,1.6],
[-1.6,-1.7],[-1.4,1.8],[1.6,-0.9],[0,-1.6],[0.3,1.7],[-1.6,0],[-2.1,0.2]])
X1 = np.asmatrix(X1)
X0 = np.asmatrix(X0)
plt.figure(figsize=(10, 8))
plt.plot(X1[:,0], X1[:,1], 'ro', label = 'C1')
plt.plot(X0[:,0], X0[:,1], 'bo', label = 'C0')
plt.title('SVM for Nonlinear Data', fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.show()
$$
x = \begin{bmatrix} x_1 \\ x_2 \end{bmatrix} \quad \Longrightarrow \quad
z = \phi(x) = \begin{bmatrix} 1\\ x_1^2 \\ \sqrt{2}x_1 x_2 \\x_2^2 \end{bmatrix}$$
N = X1.shape[0]
M = X0.shape[0]
X = np.vstack([X1, X0])
y = np.vstack([np.ones([N,1]), -np.ones([M,1])])
X = np.asmatrix(X)
y = np.asmatrix(y)
m = N + M
Z = np.hstack([np.ones([m,1]), np.square(X[:,0]), np.sqrt(2)*np.multiply(X[:,0],X[:,1]), np.square(X[:,1])])
g = 10
w = cvx.Variable([4, 1])
d = cvx.Variable([m, 1])
obj = cvx.Minimize(cvx.norm(w, 2) + g*np.ones([1,m])*d)
const = [cvx.multiply(y, Z*w) >= 1-d, d>=0]
prob = cvx.Problem(obj, const).solve()
w = w.value
print(w)
# to plot
[X1gr, X2gr] = np.meshgrid(np.arange(-3,3,0.1), np.arange(-3,3,0.1))
Xp = np.hstack([X1gr.reshape(-1,1), X2gr.reshape(-1,1)])
Xp = np.asmatrix(Xp)
m = Xp.shape[0]
Zp = np.hstack([np.ones([m,1]), np.square(Xp[:,0]), np.sqrt(2)*np.multiply(Xp[:,0],Xp[:,1]), np.square(Xp[:,1])])
q = Zp*w
B = []
for i in range(m):
if q[i,0] > 0:
B.append(Xp[i,:])
B = np.vstack(B)
plt.figure(figsize=(10, 8))
plt.plot(X1[:,0], X1[:,1], 'ro', label = 'C1')
plt.plot(X0[:,0], X0[:,1], 'bo', label = 'C0')
plt.plot(B[:,0], B[:,1], 'gs', markersize = 10, alpha = 0.1, label = 'SVM')
plt.title('SVM with Kernel', fontsize = 15)
plt.xlabel(r'$x_1$', fontsize = 15)
plt.ylabel(r'$x_2$', fontsize = 15)
plt.legend(loc = 1, fontsize = 12)
plt.axis('equal')
plt.show()
%%html
<center><iframe src="https://www.youtube.com/embed/uegAloa4woI?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/AzIb7EgS6rU?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/dBlCNzHyPYE?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/8S3q_ySxhuw?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/jmUzF9D1WCM?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/lXxZbzF-rD0?rel=0"
width="420" height="315" frameborder="0" allowfullscreen></iframe></center>
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')