Autoencoder
Table of Contents
Definition
Dimension Reduction
It is like 'deep learning version' of unsupervised learning.
Definition
Encoder and Decoder
$$ \mathbb{E} \left[ \lVert X - g \circ f(X) \rVert^2 \right] \approx 0$$
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import accuracy_score
Only use (1, 5, 6) digits to visualize latent space in 2-D
Download data files
train_x = np.load('./data_files/mnist_train_images.npy')
train_y = np.load('./data_files/mnist_train_labels.npy')
test_x = np.load('./data_files/mnist_test_images.npy')
test_y = np.load('./data_files/mnist_test_labels.npy')
n_train = train_x.shape[0]
n_test = test_x.shape[0]
print ("The number of training images : {}, shape : {}".format(n_train, train_x.shape))
print ("The number of testing images : {}, shape : {}".format(n_test, test_x.shape))
idx = np.random.randint(train_x.shape[0])
img = train_x[idx].reshape(28,28)
plt.figure(figsize = (6,6))
plt.imshow(img,'gray')
plt.title("Label : {}".format(np.argmax(train_y[idx,:])))
plt.xticks([])
plt.yticks([])
plt.show()
# Shape of input and latent variable
n_input = 28*28
# Encoder structure
n_encoder1 = 500
n_encoder2 = 300
n_latent = 2
# Decoder structure
n_decoder2 = 300
n_decoder1 = 500
Encoder
tanh
for a nonlinear activation functionlatent
is not applied with a nonlinear activation functionDecoder
tanh
for a nonlinear activation functionreconst
is not applied with a nonlinear activation function
Loss
Optimizer
reg = MLPRegressor(hidden_layer_sizes = (n_encoder1, n_encoder2, n_latent, n_decoder2, n_decoder1),
activation = 'tanh',
solver = 'adam',
learning_rate_init = 0.0001,
max_iter = 20,
tol = 0.0000001,
verbose = True)
reg.fit(train_x, train_x)
idx = np.random.randint(test_x.shape[0])
x_reconst = reg.predict(test_x[idx].reshape(-1,784))
plt.figure(figsize = (10,8))
plt.subplot(1,2,1)
plt.imshow(test_x[idx].reshape(28,28), 'gray')
plt.title('Imput Image', fontsize = 15)
plt.xticks([])
plt.yticks([])
plt.subplot(1,2,2)
plt.imshow(x_reconst.reshape(28,28), 'gray')
plt.title('Reconstructed Image', fontsize = 15)
plt.xticks([])
plt.yticks([])
plt.show()
def encoder(data):
data = np.asmatrix(data)
encoder1 = data*reg.coefs_[0] + reg.intercepts_[0]
encoder1 = (np.exp(encoder1) - np.exp(-encoder1))/(np.exp(encoder1) + np.exp(-encoder1))
encoder2 = encoder1*reg.coefs_[1] + reg.intercepts_[1]
encoder2 = (np.exp(encoder2) - np.exp(-encoder2))/(np.exp(encoder2) + np.exp(-encoder2))
latent = encoder2*reg.coefs_[2] + reg.intercepts_[2]
latent = (np.exp(latent) - np.exp(-latent))/(np.exp(latent) + np.exp(-latent))
return np.asarray(latent)
test_latent = encoder(test_x)
plt.figure(figsize = (10,10))
plt.scatter(test_latent[np.argmax(test_y, axis = 1) == 1,0], test_latent[np.argmax(test_y, axis = 1) == 1,1], label = '1')
plt.scatter(test_latent[np.argmax(test_y, axis = 1) == 5,0], test_latent[np.argmax(test_y, axis = 1) == 5,1], label = '5')
plt.scatter(test_latent[np.argmax(test_y, axis = 1) == 6,0], test_latent[np.argmax(test_y, axis = 1) == 6,1], label = '6')
plt.title('Latent Space', fontsize=15)
plt.xlabel('Z1', fontsize=15)
plt.ylabel('Z2', fontsize=15)
plt.legend(fontsize = 15)
plt.axis('equal')
plt.show()
Data Generation
def decoder(new_data):
new_data = np.asmatrix(new_data)
decoder2 = new_data*reg.coefs_[3] + reg.intercepts_[3]
decoder2 = (np.exp(decoder2) - np.exp(-decoder2))/(np.exp(decoder2) + np.exp(-decoder2))
decoder1 = decoder2*reg.coefs_[4] + reg.intercepts_[4]
decoder1 = (np.exp(decoder1) - np.exp(-decoder1))/(np.exp(decoder1) + np.exp(-decoder1))
reconst = decoder1*reg.coefs_[5] + reg.intercepts_[5]
reconst = (np.exp(reconst) - np.exp(-reconst))/(np.exp(reconst) + np.exp(-reconst))
return np.asarray(reconst)
latent = np.array([[0.5, 0.5]])
reconst = decoder(latent)
plt.figure(figsize=(16,7))
plt.subplot(1,2,1)
plt.scatter(test_latent[np.argmax(test_y, axis = 1) == 1,0], test_latent[np.argmax(test_y, axis = 1) == 1,1], label = '1')
plt.scatter(test_latent[np.argmax(test_y, axis = 1) == 5,0], test_latent[np.argmax(test_y, axis = 1) == 5,1], label = '5')
plt.scatter(test_latent[np.argmax(test_y, axis = 1) == 6,0], test_latent[np.argmax(test_y, axis = 1) == 6,1], label = '6')
plt.scatter(latent[:,0], latent[:,1], c = 'k', marker = 'o', s = 200, label = 'new data')
plt.title('Latent Space', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.legend(loc = 2, fontsize = 12)
plt.axis('equal')
plt.subplot(1,2,2)
plt.imshow(reconst.reshape(28,28), 'gray')
plt.title('Generated Fake Image', fontsize = 15)
plt.xticks([])
plt.yticks([])
plt.show()
Image Generation
# Initialize canvas
nx = 20
ny = 20
x_values = np.linspace(-1, 1, nx)
y_values = np.linspace(-1, 1, ny)
canvas = np.empty((28*ny, 28*nx))
for i, yi in enumerate(y_values):
for j, xi in enumerate(x_values):
latent = np.array([[xi, yi]])
reconst = decoder(latent)
canvas[(nx-i-1)*28:(nx-i)*28,j*28:(j+1)*28] = reconst.reshape(28, 28)
plt.figure(figsize = (16, 7))
plt.subplot(1,2,1)
plt.scatter(test_latent[np.argmax(test_y, axis = 1) == 1,0], test_latent[np.argmax(test_y, axis = 1) == 1,1], label = '1')
plt.scatter(test_latent[np.argmax(test_y, axis = 1) == 5,0], test_latent[np.argmax(test_y, axis = 1) == 5,1], label = '5')
plt.scatter(test_latent[np.argmax(test_y, axis = 1) == 6,0], test_latent[np.argmax(test_y, axis = 1) == 6,1], label = '6')
plt.title('Latent Space', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.legend(fontsize = 12)
plt.axis('equal')
plt.subplot(1,2,2)
plt.imshow(canvas, 'gray')
plt.title('Manifold', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.xticks([])
plt.yticks([])
plt.show()
To get an intuition of the latent representation, we can pick two samples 𝑥 and 𝑥′ at random and interpolate samples along the line in the latent space
$$g((1-\alpha)f(x) + \alpha f(x'))$$%%html
<center><iframe src="https://www.youtube.com/embed/QujriOAtps4?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/nTt_ajul8NY?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/H1AllrJ-_30?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')