Autoencoder
Table of Contents
Definition
Dimension Reduction
It is like 'deep learning version' of unsupervised learning.
Definition
Encoder and Decoder
$$ \mathbb{E} \left[ \lVert X - g \circ f(X) \rVert^2 \right] \approx 0$$
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
train_idx = ((np.argmax(mnist.train.labels, 1) == 1) | \
(np.argmax(mnist.train.labels, 1) == 5) | \
(np.argmax(mnist.train.labels, 1) == 6))
test_idx = ((np.argmax(mnist.test.labels, 1) == 1) | \
(np.argmax(mnist.test.labels, 1) == 5) | \
(np.argmax(mnist.test.labels, 1) == 6))
train_imgs = mnist.train.images[train_idx]
train_labels = mnist.train.labels[train_idx]
test_imgs = mnist.test.images[test_idx]
test_labels = mnist.test.labels[test_idx]
n_train = train_imgs.shape[0]
n_test = test_imgs.shape[0]
print ("The number of training images : {}, shape : {}".format(n_train, train_imgs.shape))
print ("The number of testing images : {}, shape : {}".format(n_test, test_imgs.shape))
# Shape of input and latent variable
n_input = 28*28
# Encoder structure
n_encoder1 = 500
n_encoder2 = 300
n_latent = 2
# Decoder structure
n_decoder2 = 300
n_decoder1 = 500
weights = {
'encoder1' : tf.Variable(tf.random_normal([n_input, n_encoder1], stddev = 0.1)),
'encoder2' : tf.Variable(tf.random_normal([n_encoder1, n_encoder2], stddev = 0.1)),
'latent' : tf.Variable(tf.random_normal([n_encoder2, n_latent], stddev = 0.1)),
'decoder2' : tf.Variable(tf.random_normal([n_latent, n_decoder2], stddev = 0.1)),
'decoder1' : tf.Variable(tf.random_normal([n_decoder2, n_decoder1], stddev = 0.1)),
'reconst' : tf.Variable(tf.random_normal([n_decoder1, n_input], stddev = 0.1))
}
biases = {
'encoder1' : tf.Variable(tf.random_normal([n_encoder1], stddev = 0.1)),
'encoder2' : tf.Variable(tf.random_normal([n_encoder2], stddev = 0.1)),
'latent' : tf.Variable(tf.random_normal([n_latent], stddev = 0.1)),
'decoder2' : tf.Variable(tf.random_normal([n_decoder2], stddev = 0.1)),
'decoder1' : tf.Variable(tf.random_normal([n_decoder1], stddev = 0.1)),
'reconst' : tf.Variable(tf.random_normal([n_input], stddev = 0.1))
}
x = tf.placeholder(tf.float32, [None, n_input])
Encoder
tanh
for a nonlinear activation functionlatent
is not applied with a nonlinear activation functionDecoder
tanh
for a nonlinear activation functionreconst
is not applied with a nonlinear activation function
def encoder(x, weights, biases):
encoder1 = tf.add(tf.matmul(x, weights['encoder1']), biases['encoder1'])
encoder1 = tf.nn.tanh(encoder1)
encoder2 = tf.add(tf.matmul(encoder1, weights['encoder2']), biases['encoder2'])
encoder2 = tf.nn.tanh(encoder2)
latent = tf.add(tf.matmul(encoder2, weights['latent']), biases['latent'])
return latent
def decoder(latent, weights, biases):
decoder2 = tf.add(tf.matmul(latent, weights['decoder2']), biases['decoder2'])
decoder2 = tf.nn.tanh(decoder2)
decoder1 = tf.add(tf.matmul(decoder2, weights['decoder1']), biases['decoder1'])
decoder1 = tf.nn.tanh(decoder1)
reconst = tf.add(tf.matmul(decoder1, weights['reconst']), biases['reconst'])
return reconst
Loss
Optimizer
LR = 0.0001
latent = encoder(x, weights, biases)
reconst = decoder(latent, weights, biases)
loss = tf.square(tf.subtract(x, reconst))
loss = tf.reduce_mean(loss)
optm = tf.train.AdamOptimizer(LR).minimize(loss)
n_batch
: batch size for mini-batch gradient descentn_iter
: the number of iteration stepsn_prt
: check loss for every n_prt
iterationn_batch = 50
n_iter = 2500
n_prt = 250
def train_batch_maker(batch_size):
random_idx = np.random.randint(n_train, size = batch_size)
return train_imgs[random_idx], train_labels[random_idx]
def test_batch_maker(batch_size):
random_idx = np.random.randint(n_test, size = batch_size)
return test_imgs[random_idx], test_labels[random_idx]
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
loss_record_train = []
loss_record_test = []
for epoch in range(n_iter):
train_x, _ = train_batch_maker(n_batch)
sess.run(optm, feed_dict = {x : train_x})
if epoch % n_prt == 0:
test_x, _ = test_batch_maker(n_batch)
c1 = sess.run(loss, feed_dict = {x: train_x})
c2 = sess.run(loss, feed_dict = {x: test_x})
loss_record_train.append(c1)
loss_record_test.append(c2)
print ("Iter : {}".format(epoch))
print ("Cost : {}".format(c1))
plt.figure(figsize=(10,8))
plt.plot(np.arange(len(loss_record_train))*n_prt, loss_record_train, label = 'training')
plt.plot(np.arange(len(loss_record_test))*n_prt, loss_record_test, label = 'testing')
plt.xlabel('iteration', fontsize = 15)
plt.ylabel('loss', fontsize = 15)
plt.legend(fontsize = 12)
plt.ylim([0,np.max(loss_record_train)])
plt.show()
test_x, _ = test_batch_maker(1)
x_reconst = sess.run(reconst, feed_dict = {x: test_x})
plt.figure(figsize = (10,8))
plt.subplot(1,2,1)
plt.imshow(test_x.reshape(28,28), 'gray')
plt.title('Input Image', fontsize = 15)
plt.xticks([])
plt.yticks([])
plt.subplot(1,2,2)
plt.imshow(x_reconst.reshape(28,28), 'gray')
plt.title('Reconstructed Image', fontsize = 15)
plt.xticks([])
plt.yticks([])
plt.show()
test_x, test_y = test_batch_maker(500)
test_y = np.argmax(test_y, axis = 1)
test_latent = sess.run(latent, feed_dict = {x: test_x})
plt.figure(figsize = (10,10))
plt.scatter(test_latent[test_y == 1,0], test_latent[test_y == 1,1], label = '1')
plt.scatter(test_latent[test_y == 5,0], test_latent[test_y == 5,1], label = '5')
plt.scatter(test_latent[test_y == 6,0], test_latent[test_y == 6,1], label = '6')
plt.title('Latent Space', fontsize=15)
plt.xlabel('Z1', fontsize=15)
plt.ylabel('Z2', fontsize=15)
plt.legend(fontsize = 15)
plt.axis('equal')
plt.show()
Data Generation
It generates something that makes sense.
These results are unsatisfying, because the density model used on the latent space ℱ is too simple and inadequate.
Building a “good” model amounts to our original problem of modeling an empirical distribution, although it may now be in a lower dimension space.
This is a motivation to VAE or GAN.
new_data = np.array([[-4, 0]])
latent_input = tf.placeholder(tf.float32, [None, n_latent])
reconst = decoder(latent_input, weights, biases)
fake_image = sess.run(reconst, feed_dict = {latent_input: new_data})
plt.figure(figsize=(16,7))
plt.subplot(1,2,1)
plt.scatter(test_latent[test_y == 1,0], test_latent[test_y == 1,1], label = '1')
plt.scatter(test_latent[test_y == 5,0], test_latent[test_y == 5,1], label = '5')
plt.scatter(test_latent[test_y == 6,0], test_latent[test_y == 6,1], label = '6')
plt.scatter(new_data[:,0], new_data[:,1], c = 'k', marker = 'o', s = 200, label = 'new data')
plt.title('Latent Space', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.legend(loc = 2, fontsize = 12)
plt.axis('equal')
plt.subplot(1,2,2)
plt.imshow(fake_image.reshape(28,28), 'gray')
plt.title('Generated Fake Image', fontsize = 15)
plt.xticks([])
plt.yticks([])
plt.show()
Image Generation
# Initialize canvas
nx = 20
ny = 20
x_values = np.linspace(-8, 4, nx)
y_values = np.linspace(-4, 6, ny)
canvas = np.empty((28*ny, 28*nx))
# Define placeholder
latent_input = tf.placeholder(tf.float32, [None, n_latent])
reconst = decoder(latent_input, weights, biases)
for i, yi in enumerate(y_values):
for j, xi in enumerate(x_values):
latent_ = np.array([[xi, yi]])
reconst_ = sess.run(reconst, feed_dict = {latent_input: latent_})
canvas[(nx-i-1)*28:(nx-i)*28,j*28:(j+1)*28] = reconst_.reshape(28, 28)
plt.figure(figsize = (16, 7))
plt.subplot(1,2,1)
plt.scatter(test_latent[test_y == 1,0], test_latent[test_y == 1,1], label = '1')
plt.scatter(test_latent[test_y == 5,0], test_latent[test_y == 5,1], label = '5')
plt.scatter(test_latent[test_y == 6,0], test_latent[test_y == 6,1], label = '6')
plt.title('Latent Space', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.legend(fontsize = 12)
plt.axis('equal')
plt.subplot(1,2,2)
plt.imshow(canvas, 'gray')
plt.title('Manifold', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.xticks([])
plt.yticks([])
plt.show()
To get an intuition of the latent representation, we can pick two samples 𝑥 and 𝑥′ at random and interpolate samples along the line in the latent space
$$g((1-\alpha)f(x) + \alpha f(x'))$$%%html
<center><iframe src="https://www.youtube.com/embed/QujriOAtps4?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/nTt_ajul8NY?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe src="https://www.youtube.com/embed/H1AllrJ-_30?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')