Autoencoder
Table of Contents
Definition
Dimension Reduction
It is like 'deep learning version' of dimension reduction in unsupervised learning.
Definition
Encoder and Decoder
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline
# Load Data
mnist = tf.keras.datasets.mnist
(train_x, train_y), (test_x, test_y) = mnist.load_data()
train_x, test_x = train_x.reshape(-1, 784)/255.0, test_x.reshape(-1, 784)/255.0
# Use only 1,5,6 digits to visualize latent sapce
train_idx1 = np.array(np.where(train_y == 1))
train_idx5 = np.array(np.where(train_y == 5))
train_idx6 = np.array(np.where(train_y == 6))
train_idx = np.sort(np.concatenate((train_idx1, train_idx5, train_idx6), axis = None))
test_idx1 = np.array(np.where(test_y == 1))
test_idx5 = np.array(np.where(test_y == 5))
test_idx6 = np.array(np.where(test_y == 6))
test_idx = np.sort(np.concatenate((test_idx1, test_idx5, test_idx6), axis = None))
train_imgs = train_x[train_idx]
train_labels = train_y[train_idx]
test_imgs = test_x[test_idx]
test_labels = test_y[test_idx]
n_train = train_imgs.shape[0]
n_test = test_imgs.shape[0]
print ("The number of training images : {}, shape : {}".format(n_train, train_imgs.shape))
print ("The number of testing images : {}, shape : {}".format(n_test, test_imgs.shape))
# Define Structure
# Encoder Structure
encoder = tf.keras.models.Sequential([
tf.keras.layers.Dense(units = 500, activation = 'relu', input_dim = 784),
tf.keras.layers.Dense(units = 300, activation = 'relu'),
tf.keras.layers.Dense(units = 2, activation = None)
])
# Decoder Structure
decoder = tf.keras.models.Sequential([
tf.keras.layers.Dense(units = 300, activation = 'relu', input_shape = (2,)),
tf.keras.layers.Dense(units = 500, activation = 'relu'),
tf.keras.layers.Dense(units = 28*28, activation = None)
])
# Autoencoder = Encoder + Decoder
autoencoder = tf.keras.models.Sequential([encoder, decoder])
Loss
Optimizer
autoencoder.compile(optimizer = tf.keras.optimizers.Adam(0.001),
loss = 'mean_squared_error',
metrics = ['mse'])
# Train Model & Evaluate Test Data
training = autoencoder.fit(train_imgs, train_imgs, batch_size = 50, epochs = 10)
test_scores = autoencoder.evaluate(test_imgs, test_imgs, verbose = 2)
print('Test loss: {}'.format(test_scores[0]))
print('Mean Squared Error: {} %'.format(test_scores[1]*100))
# Visualize Evaluation on Test Data
rand_idx = np.random.randint(1, test_imgs.shape[0])
# rand_idx = 6
test_img = test_imgs[rand_idx]
reconst_img = autoencoder.predict(test_img.reshape(1, 28*28))
plt.figure(figsize = (8, 4))
plt.subplot(1,2,1)
plt.imshow(test_img.reshape(28,28), 'gray')
plt.title('Input Image', fontsize = 12)
plt.xticks([])
plt.yticks([])
plt.subplot(1,2,2)
plt.imshow(reconst_img.reshape(28,28), 'gray')
plt.title('Reconstructed Image', fontsize = 12)
plt.xticks([])
plt.yticks([])
plt.show()
idx = np.random.randint(0, len(test_labels), 500)
test_x, test_y = test_imgs[idx], test_labels[idx]
test_latent = encoder.predict(test_x)
plt.figure(figsize = (6, 6))
plt.scatter(test_latent[test_y == 1,0], test_latent[test_y == 1,1], label = '1')
plt.scatter(test_latent[test_y == 5,0], test_latent[test_y == 5,1], label = '5')
plt.scatter(test_latent[test_y == 6,0], test_latent[test_y == 6,1], label = '6')
plt.title('Latent Space', fontsize = 12)
plt.xlabel('Z1', fontsize = 12)
plt.ylabel('Z2', fontsize = 12)
plt.legend(fontsize = 12)
plt.axis('equal')
plt.show()
Data Generation
It generates something that makes sense.
These results are unsatisfying, because the density model used on the latent space ℱ is too simple and inadequate.
Building a “good” model amounts to our original problem of modeling an empirical distribution, although it may now be in a lower dimension space.
This is a motivation to VAE, GAN, or diffusion model.
new_data = np.array([[2, -4]])
fake_image = decoder.predict(new_data)
plt.figure(figsize = (9, 4))
plt.subplot(1,2,1)
plt.scatter(test_latent[test_y == 1,0], test_latent[test_y == 1,1], label = '1')
plt.scatter(test_latent[test_y == 5,0], test_latent[test_y == 5,1], label = '5')
plt.scatter(test_latent[test_y == 6,0], test_latent[test_y == 6,1], label = '6')
plt.scatter(new_data[:,0], new_data[:,1], c = 'k', marker = 'o', s = 200, label = 'new data')
plt.title('Latent Space', fontsize = 10)
plt.xlabel('Z1', fontsize = 10)
plt.ylabel('Z2', fontsize = 10)
plt.legend(loc = 2, fontsize = 10)
plt.axis('equal')
plt.subplot(1,2,2)
plt.imshow(fake_image.reshape(28,28), 'gray')
plt.title('Generated Fake Image', fontsize = 10)
plt.xticks([])
plt.yticks([])
plt.show()
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')