Table of Contents
Correlation of Two Random Variables
Correlation Coefficient Plot
Covariance Matrix
How?
idea: highly correlated data contains redundant features
Now consider a change of axes
Each example $x$ has 2 features $\{u_1,u_2\}$
Consider ignoring the feature $u_2$ for each example
Each 2-dimensional example $x$ now becomes 1-dimensional $x = \{u_1\}$
Are we losing much information by throwing away $u_2$ ?
No. Most of the data spread is along $u_1$ (very little variance along $u_2$)
Find unit vector $u$ such that maximizes variance of projections
Note: $m \approx m-1$ for big data
Choose top $k$ (orthonormal) eigenvectors, $U = [u_1, u_2, \cdots, u_k]$
Project $x_i$ onto span $\{ u_1, u_2, \cdots, u_k\}$
$\qquad \qquad \qquad \qquad x^{(i)} \rightarrow$ projection onto unit vector $u \implies u^Tx^{(i)} = $ distance from the origin along $u$
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# data generation
m = 5000
mu = np.array([0, 0])
sigma = np.array([[3, 1.5],
[1.5, 1]])
X = np.random.multivariate_normal(mu, sigma, m)
X = np.asmatrix(X)
fig = plt.figure(figsize = (6, 4))
plt.plot(X[:,0], X[:,1], 'k.', alpha = 0.3)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.show()
S = 1/(m-1)*X.T*X
D, U = np.linalg.eig(S)
# idx = np.argsort(-D)
# D = D[idx]
# U = U[:,idx]
print(D, '\n')
print(U)
h = U[1,0]/U[0,0]
xp = np.arange(-6, 6, 0.1)
yp = h*xp
plt.figure(figsize = (6, 4))
plt.plot(X[:,0], X[:,1], 'k.', alpha = 0.3)
plt.plot(xp, yp, 'r', linewidth = 3)
plt.axis('equal')
plt.grid(alpha = 0.3)
plt.show()
Z = X*U[:,0]
plt.figure(figsize = (6, 4))
plt.hist(Z, 51)
plt.xlim([-8, 8])
plt.show()
Z = X*U[:,1]
plt.figure(figsize = (6, 4))
plt.hist(Z, 51)
plt.xlim([-8, 8])
plt.show()
from sklearn.decomposition import PCA
X = np.array(X)
pca = PCA(n_components = 1)
pca.fit(X)
u = pca.transform(X)
plt.figure(figsize = (6, 4))
plt.hist(u, 51)
plt.xlim([-8, 8])
plt.show()
%%html
<center><iframe
width="560" height="315" src="https://www.youtube.com/embed/Pkit-64g0eU" frameborder="0" allowfullscreen>
</iframe></center>
%%html
<center><iframe
width="560" height="315" src="https://www.youtube.com/embed/x4lvjVjUUqg" frameborder="0" allowfullscreen>
</iframe></center>
%%html
<center><iframe
width="560" height="315" src="https://www.youtube.com/embed/2t62WkNIqxY" frameborder="0" allowfullscreen>
</iframe></center>
Download files
from google.colab import drive
drive.mount('/content/drive')
from six.moves import cPickle
X = cPickle.load(open('/content/drive/MyDrive/ML_Colab/ML_data/pca_spring.pkl','rb'))
X = np.asmatrix(X.T)
print(X.shape)
m = X.shape[0]
plt.figure(figsize = (6, 4))
plt.subplot(1,3,1)
plt.plot(X[:, 0], -X[:, 1], 'r')
plt.axis('equal')
plt.title('Camera 1')
plt.subplot(1,3,2)
plt.plot(X[:, 2], -X[:, 3], 'b')
plt.axis('equal')
plt.title('Camera 2')
plt.subplot(1,3,3)
plt.plot(X[:, 4], -X[:, 5], 'k')
plt.axis('equal')
plt.title('Camera 3')
plt.show()
X = X - np.mean(X, axis = 0)
S = 1/(m-1)*X.T*X
D, U = np.linalg.eig(S)
idx = np.argsort(-D)
D = D[idx]
U = U[:,idx]
print(D, '\n')
print(U)
plt.figure(figsize = (6, 4))
plt.stem(np.sqrt(D))
plt.grid(alpha = 0.3)
plt.show()
# relative magnitutes of the principal components
Z = X*U
xp = np.arange(0, m)/24 # 24 frame rate
plt.figure(figsize = (6, 4))
plt.plot(xp, Z)
plt.yticks([])
plt.show()
## projected onto the first principal component
# 6 dim -> 1 dim (dim reduction)
# relative magnitute of the first principal component
Z = X*U[:,0]
plt.figure(figsize = (6, 4))
plt.plot(Z)
plt.yticks([])
plt.show()
Geometry of Linear Maps
Matrix $A$ (or linear transformation) = rotate + stretch/compress
an extremely important fact:
Singular Values and Singular Vectors
the numbers $\sigma_1, \cdots, \sigma_n$ are called the singular values of $A$ by convention, $\sigma_i > 0$
the vectors $u_1, \cdots, u_n$ are unit vectors along the principal semiaxes of $AS$
the vectors $v_1, \cdots, v_n$ are the preimages of the principal semiaxes, defined so that
Thin Singular Value Decomposition
We have $A \in \mathbb{R}^{m \times n}$, skinny and full rank (i.e., $r = n$), and
let
in matrix form, the above equation is $AV = \hat U \hat \Sigma$ and since $V$ is orthogonal
called the thin (or reduced) SVD of $A$
This is the (full) singular value decomposition of $A$
Every matrix $A$ has a singular value decomposition.
If $A$ is not full rank, then some of the diagonal entries of $\Sigma$ will be zero
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
[U, S, VT] = np.linalg.svd(A, full_matrices = True)
print(U.shape, S.shape, VT.shape, '\n')
print(U, '\n')
print(S, '\n')
print(VT)
$$ VS V^T \quad \left(\text{where }S = \frac{1}{m}X^TX = \left(\frac{X}{\sqrt{m}}\right)^T \frac{X}{\sqrt{m}} = A^TA \right) $$
A = np.array([[4, 4],
[-3, 3]])
A = np.asmatrix(A)
[D, V] = np.linalg.eig(A.T*A)
print(V, '\n')
print(D)
[D, U] = np.linalg.eig(A*A.T)
print(U, '\n')
print(D)
%%html
<center><iframe src="https://www.youtube.com/embed/Nx0lRBaXoz4?rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
Learn how to reduce the amount of storage needed for an image using the singular value decomposition.
Approximation of $A$
Downlaod the image file
from google.colab import drive
drive.mount('/content/drive')
import cv2
img = cv2.imread('/content/drive/MyDrive/ML_Colab/ML_data/kaist_me.jpg', 0)
print(img.shape)
plt.figure(figsize = (6, 4))
plt.imshow(img, 'gray')
plt.axis('off')
plt.show()
A = img
[U, S, VT] = np.linalg.svd(A, full_matrices = False)
U = np.asmatrix(U)
S = np.asmatrix(np.diag(S))
VT = np.asmatrix(VT)
print(U.shape, S.shape, VT.shape, '\n')
sig_val = np.diag(S)
plt.figure(figsize = (6, 4))
plt.stem(sig_val)
plt.title('Singular Values')
plt.show()
Approximated image when k = 20
k = 20
low_U = U[:, :k]
low_S = S[:k, :k]
low_VT = VT[:k, :]
A_hat = low_U*low_S*low_VT
plt.figure(figsize = (8, 4))
plt.subplot(1,2,1), plt.imshow(A, 'gray'), plt.axis('off'), plt.title('Original image')
plt.subplot(1,2,2), plt.imshow(A_hat, 'gray'), plt.axis('off'), plt.title('Approximated image w/ rank = 20')
plt.show()
Approximated images with k varied
K = [1, 2, 3, 4, 10, 20, 30, 100]
plt.figure(figsize = (10, 4))
for i in range(len(K)):
low_U = U[:, :K[i]]
low_S = S[:K[i], :K[i]]
low_VT = VT[:K[i], :]
A_hat = low_U*low_S*low_VT
plt.subplot(2,4,i+1), plt.imshow(A_hat, 'gray'), plt.axis('off')
plt.title('rank {}'.format(K[i]))
plt.show()
The data provided in this problem is a set of sequential pictures (or frames) taken from a video of people walking. The total number of pictures is 20. In this problem, you are asked to get rid of people in the pictures and only extract background from pictures.
(a) Plot 'capimage_0.jpg' image in grayscale.
from google.colab import drive
drive.mount('/content/drive')
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
data_path = '/content/drive/MyDrive/ML_Colab/ML_data'
img = cv2.imread(data_path + '/capimage_0.jpg', 0)
plt.figure(figsize = (6, 4))
plt.imshow(img, 'gray')
plt.axis('off')
plt.show()
print(img.shape)
row = img.shape[0]
col = img.shape[1]
(b) Load the image into grayscale and reshape each picture as (row $\times$ col, 1), then horizontally stack them to form matrix $A$ with a shape of (row $\times$ col, 20).
for i in range(20):
img = cv2.imread(data_path + '/capimage_' + str(i) + '.jpg', 0)
flat_img = img.reshape(-1, 1)
if i == 0:
A = flat_img
else:
A = np.hstack((A, flat_img))
A.shape
(c) Compute SVD of matrix $A$ and plot the sigular values.
U, S, VT = np.linalg.svd(A, full_matrices = False)
U = np.asmatrix(U)
S = np.asmatrix(np.diag(S))
VT = np.asmatrix(VT)
print(U.shape, S.shape, VT.shape, '\n')
plt.figure(figsize = (6, 4))
plt.stem(np.diag(S))
plt.title('Singular Values')
plt.show()
(d) Apply low rank approximation to matrix $A$ with $k=1$, and then plot $\hat A[:,0]$ with a reshape of (row, col).
A_hat = U[:,0] * S[0,0] * VT[0,:]
plt.figure(figsize = (6, 8))
plt.subplot(2,1,1)
plt.imshow(A[:, 0].reshape(row, col), 'gray')
plt.axis('off')
plt.subplot(2,1,2)
plt.imshow(A_hat[:, 0].reshape(row, col), 'gray')
plt.axis('off')
plt.show()
Suppose we have data that is a function of both space and time
How can use this data to decompose this system into its building blocks?
To start to answer these questions, we can start with a simple separation of variables:
Arrangement of Data
Suppose we collect data at spatial locations $x_1, x_2, \cdots, x_n$
Suppose we collect data at times $t_1, t_2, \cdots, t_m$
We can start by assembling the data $y(x,t)$ into an $n \times m$ matrix
$\quad\;\;\;$(typically after first subtracting a mean or equilibrium state)
POD Algorithm
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# data generated from the function
x = np.linspace(0, 5, 100)
Nx = np.size(x)
dt = 0.01
t = np.arange(0, 5, dt)
k = 2
omega = 4*np.pi
Y = np.zeros([np.size(x), np.size(t)], dtype = 'd')
for i in range(0, np.size(x)):
for j in range(0, np.size(t)):
Y[i, j] = np.sin(omega*t[j] - k*x[i]) + 0.1*np.random.randn()
# Let's see the signal in time
from matplotlib.animation import FuncAnimation
import time
from IPython import display
fig = plt.figure()
ax1 = fig.add_subplot(1, 1, 1)
def animate(i):
ax1.clear()
ax1.plot(x, Y[:,i], linewidth = 2)
ax1.set_xlabel('x')
ax1.set_xlim(0, 5)
ax1.set_ylim(-2, 2)
ax1.grid()
ani = FuncAnimation(fig, animate, frames = int(np.size(t)/1), interval = 10)
video = ani.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()
U, S, VT = np.linalg.svd(Y, full_matrices = False)
print(U.shape)
print(S.shape)
print(VT.shape)
plt.semilogy(S, '-o')
plt.xlim(0, 5)
plt.ylabel('Singular Value', fontsize = 15)
plt.xlabel('Index', fontsize = 15)
plt.show()
plt.plot(x, U[:,0])
plt.title('POD Mode 1', fontsize = 12)
plt.xlabel('x', fontsize = 15)
plt.ylim([-2, 2])
plt.grid()
plt.show()
plt.plot(x, U[:,1])
plt.title('POD Mode 2', fontsize = 12)
plt.xlabel('x', fontsize = 15)
plt.ylim([-2, 2])
plt.grid()
plt.show()
plt.plot(t, S[0]*VT[0,:], label = 'a1(t)', linewidth = 2)
plt.plot(t, S[1]*VT[1,:], label = 'a2(t)', linewidth = 2)
plt.xlim(0, 5)
plt.xlabel('time', fontsize = 15)
plt.legend()
plt.title('Mode Coefficients', fontsize = 12)
plt.grid()
plt.show()
U = np.asmatrix(U)
diagS = np.asmatrix(np.diag(S))
VT = np.asmatrix(VT)
print(U.shape, diagS.shape, VT.shape, '\n')
k = 2
low_U = U[:, :k]
low_S = diagS[:k, :k]
low_VT = VT[:k, :]
low_Y = low_U*low_S*low_VT
print(low_U.shape, low_S.shape, low_VT.shape, '\n')
# Let's see the signal in time
from matplotlib.animation import FuncAnimation
import time
from IPython import display
fig = plt.figure()
ax1 = fig.add_subplot(1, 1, 1)
def animate(i):
ax1.clear()
ax1.plot(x, low_Y[:,i], linewidth = 2)
ax1.set_xlim(0, 5)
ax1.set_ylim(-2, 2)
ax1.grid()
ani = FuncAnimation(fig, animate, frames = int(np.size(t)/1), interval = 10)
video = ani.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()
Data Desciription and its Visualization
ALL.mat - 151 snapshots of u and v velocity as well as vorticity (UALL, VALL, VORTALL).
Datat set for fluid flow past at cylinder at RE = 100
Steven L. Brunton
May 12, 2016
from google.colab import drive
drive.mount('/content/drive')
from scipy import io
flows = io.loadmat('/content/drive/MyDrive/tutorials/CAE및응용역학/24년_춘계학술대회/CYLINDER_ALL.mat')
flows_mat_u = flows['UALL']
flows_mat_v = flows['VALL']
flows_mat_vort = flows['VORTALL']
flows_mat_u.shape
nx = 449
ny = 199
flows_mat_u[:,0].reshape(nx, ny),
plt.subplot(1,3,1)
plt.imshow(flows_mat_u[:,150].reshape(nx, ny), 'gray')
plt.title('u velocity')
plt.axis('off')
plt.subplot(1,3,2)
plt.imshow(flows_mat_v[:,150].reshape(nx, ny), 'gray')
plt.title('v velocity')
plt.axis('off')
plt.subplot(1,3,3)
plt.imshow(flows_mat_vort[:,150].reshape(nx, ny), 'gray')
plt.title('vorticity')
plt.axis('off')
plt.show()
# Let's see the flow in time
from matplotlib.animation import FuncAnimation
import time
from IPython import display
fig = plt.figure()
ax1 = fig.add_subplot(1, 3, 1)
ax2 = fig.add_subplot(1, 3, 2)
ax3 = fig.add_subplot(1, 3, 3)
def animate(i):
ax1.clear()
ax1.imshow(flows_mat_u[:,i].reshape(nx, ny), 'gray')
ax1.set_title('u velocity')
ax1.axis('off')
ax2.clear()
ax2.imshow(flows_mat_v[:,i].reshape(nx, ny), 'gray')
ax2.set_title('v velocity')
ax2.axis('off')
ax3.clear()
ax3.imshow(flows_mat_vort[:,i].reshape(nx, ny), 'gray')
ax3.set_title('vorticity')
ax3.axis('off')
ani = FuncAnimation(fig, animate, frames = int(150/1), interval = 10)
video = ani.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()
Typically after first subtracting a mean or equilibrium state
flows_mat_vort_mean = flows_mat_vort.mean(axis = 1)
flows_mat_vort_centered = flows_mat_vort - flows_mat_vort_mean[:, None]
flows_mat_vort_centered_normalized = (flows_mat_vort_centered - flows_mat_vort_centered.min()) / (flows_mat_vort_centered.max() - flows_mat_vort_centered.min())
plt.imshow(flows_mat_vort_mean.reshape(nx, ny), 'gray')
plt.title('vorticity')
plt.axis('off')
plt.show()
flows_mat = flows_mat_vort_centered_normalized
U, S, VT = np.linalg.svd(flows_mat, full_matrices = False)
print(U.shape)
print(S.shape)
print(VT.shape)
plt.semilogy(S, '-o')
plt.ylabel('Singular Value', fontsize = 15)
plt.xlabel('Index', fontsize = 15)
plt.show()
plt.subplot(1,4,1)
plt.imshow(U[:,0].reshape(nx, ny), 'gray')
plt.title('POD Mode 1')
plt.axis('off')
plt.subplot(1,4,2)
plt.imshow(U[:,1].reshape(nx, ny), 'gray')
plt.title('POD Mode 2')
plt.axis('off')
plt.subplot(1,4,3)
plt.imshow(U[:,4].reshape(nx, ny), 'gray')
plt.title('POD Mode 5')
plt.axis('off')
plt.subplot(1,4,4)
plt.imshow(U[:,12].reshape(nx, ny), 'gray')
plt.title('POD Mode 13')
plt.axis('off')
plt.show()
U = np.asmatrix(U)
diagS = np.asmatrix(np.diag(S))
VT = np.asmatrix(VT)
print(U.shape, diagS.shape, VT.shape, '\n')
k = 5
low_U = U[:, :k]
low_S = diagS[:k, :k]
low_VT = VT[:k, :]
print(low_U.shape, low_S.shape, low_VT.shape, '\n')
low_flows_mat = low_U*low_S*low_VT
# Let's see the flow in time
from matplotlib.animation import FuncAnimation
import time
from IPython import display
fig = plt.figure()
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)
def animate(i):
ax1.clear()
ax1.imshow(flows_mat[:,i].reshape(nx, ny), 'gray')
ax1.axis('off')
ax2.clear()
ax2.imshow(low_flows_mat[:,i].reshape(nx, ny), 'gray')
ax2.axis('off')
ani = FuncAnimation(fig, animate, frames = int(150/1), interval = 10)
video = ani.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()
MNIST example
Use only (1, 5, 6) digits to visualize in 2-D
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline
# Load Data
mnist = tf.keras.datasets.mnist
(train_x, train_y), (test_x, test_y) = mnist.load_data()
train_x, test_x = train_x.reshape(-1, 784)/255.0, test_x.reshape(-1, 784)/255.0
# Use only 1,5,6 digits to visualize latent sapce
train_idx1 = np.array(np.where(train_y == 1))
train_idx5 = np.array(np.where(train_y == 5))
train_idx6 = np.array(np.where(train_y == 6))
train_idx = np.sort(np.concatenate((train_idx1, train_idx5, train_idx6), axis = None))
test_idx1 = np.array(np.where(test_y == 1))
test_idx5 = np.array(np.where(test_y == 5))
test_idx6 = np.array(np.where(test_y == 6))
test_idx = np.sort(np.concatenate((test_idx1, test_idx5, test_idx6), axis = None))
train_imgs = train_x[train_idx]
train_labels = train_y[train_idx]
test_imgs = test_x[test_idx]
test_labels = test_y[test_idx]
n_train = train_imgs.shape[0]
n_test = test_imgs.shape[0]
print ("The number of training images : {}, shape : {}".format(n_train, train_imgs.shape))
print ("The number of testing images : {}, shape : {}".format(n_test, test_imgs.shape))
Define a Structure of an Autoencoder
# Define Structure
# Encoder Structure
encoder = tf.keras.models.Sequential([
tf.keras.layers.Dense(units = 500, activation = 'relu', input_dim = 784),
tf.keras.layers.Dense(units = 300, activation = 'relu'),
tf.keras.layers.Dense(units = 2, activation = None)
])
# Decoder Structure
decoder = tf.keras.models.Sequential([
tf.keras.layers.Dense(units = 300, activation = 'relu', input_shape = (2,)),
tf.keras.layers.Dense(units = 500, activation = 'relu'),
tf.keras.layers.Dense(units = 28*28, activation = None)
])
# Autoencoder = Encoder + Decoder
autoencoder = tf.keras.models.Sequential([encoder, decoder])
Define Loss and Optimizer
Loss
Optimizer
autoencoder.compile(optimizer = tf.keras.optimizers.Adam(0.001),
loss = 'mean_squared_error',
metrics = ['mse'])
# Train Model & Evaluate Test Data
training = autoencoder.fit(train_imgs, train_imgs, batch_size = 50, epochs = 10)
Test or Evaluate
test_scores = autoencoder.evaluate(test_imgs, test_imgs, verbose = 2)
print('Test loss: {}'.format(test_scores[0]))
print('Mean Squared Error: {} %'.format(test_scores[1]*100))
# Visualize Evaluation on Test Data
rand_idx = np.random.randint(1, test_imgs.shape[0])
# rand_idx = 6
test_img = test_imgs[rand_idx]
reconst_img = autoencoder.predict(test_img.reshape(1, 28*28))
plt.figure(figsize = (8, 4))
plt.subplot(1,2,1)
plt.imshow(test_img.reshape(28,28), 'gray')
plt.title('Input Image', fontsize = 12)
plt.xticks([])
plt.yticks([])
plt.subplot(1,2,2)
plt.imshow(reconst_img.reshape(28,28), 'gray')
plt.title('Reconstructed Image', fontsize = 12)
plt.xticks([])
plt.yticks([])
plt.show()
idx = np.random.randint(0, len(test_labels), 500)
test_x, test_y = test_imgs[idx], test_labels[idx]
test_latent = encoder.predict(test_x)
plt.figure(figsize = (6, 6))
plt.scatter(test_latent[test_y == 1,0], test_latent[test_y == 1,1], label = '1')
plt.scatter(test_latent[test_y == 5,0], test_latent[test_y == 5,1], label = '5')
plt.scatter(test_latent[test_y == 6,0], test_latent[test_y == 6,1], label = '6')
plt.title('Latent Space', fontsize = 12)
plt.xlabel('Z1', fontsize = 12)
plt.ylabel('Z2', fontsize = 12)
plt.legend(fontsize = 12)
plt.axis('equal')
plt.show()
Data Generation
It generates something that makes sense.
These results are unsatisfying, because the density model used on the latent space ℱ is too simple and inadequate.
Building a “good” model amounts to our original problem of modeling an empirical distribution, although it may now be in a lower dimension space.
This is a motivation to VAE, GAN, or diffusion model.
new_data = np.array([[2, -4]])
fake_image = decoder.predict(new_data)
plt.figure(figsize = (9, 4))
plt.subplot(1,2,1)
plt.scatter(test_latent[test_y == 1,0], test_latent[test_y == 1,1], label = '1')
plt.scatter(test_latent[test_y == 5,0], test_latent[test_y == 5,1], label = '5')
plt.scatter(test_latent[test_y == 6,0], test_latent[test_y == 6,1], label = '6')
plt.scatter(new_data[:,0], new_data[:,1], c = 'k', marker = 'o', s = 200, label = 'new data')
plt.title('Latent Space', fontsize = 10)
plt.xlabel('Z1', fontsize = 10)
plt.ylabel('Z2', fontsize = 10)
plt.legend(loc = 2, fontsize = 10)
plt.axis('equal')
plt.subplot(1,2,2)
plt.imshow(fake_image.reshape(28,28), 'gray')
plt.title('Generated Fake Image', fontsize = 10)
plt.xticks([])
plt.yticks([])
plt.show()
Convolutional Autoencoders
from google.colab import drive
drive.mount('/content/drive')
from scipy import io
flows = io.loadmat('/content/drive/MyDrive/tutorials/CAE및응용역학/24년_춘계학술대회/CYLINDER_ALL.mat')
flows_mat_u = flows['UALL']
flows_mat_v = flows['VALL']
flows_mat_vort = flows['VORTALL']
flows_mat_u.shape
import numpy as np
from numpy import linalg
import matplotlib.pyplot as plt
%matplotlib inline
flows_mat_u_mean = flows_mat_u.mean(axis = 1)
flows_mat_u_centered = flows_mat_u - flows_mat_u_mean[:, None]
print(flows_mat_u_centered.shape)
nx = 449
ny = 199
plt.imshow(flows_mat_u_mean.reshape(nx, ny), 'gray')
plt.title('u velocity')
plt.axis('off')
flows_mat_u_centered_normalized = (flows_mat_u_centered - flows_mat_u_centered.min()) / (flows_mat_u_centered.max() - flows_mat_u_centered.min())
train_x = flows_mat_u_centered_normalized.T.reshape(-1, nx, ny, 1)
import tensorflow as tf
def create_and_train_model(lat_dim):
encoder = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters = 64,
kernel_size = (11, 11),
strides = (5, 5),
activation = 'relu',
padding = 'SAME',
input_shape = (449, 199, 1)),
tf.keras.layers.Conv2D(filters = 64,
kernel_size = (7, 7),
strides = (5, 5),
activation = 'relu',
padding = 'SAME'),
tf.keras.layers.Conv2D(filters = 32,
kernel_size = (5, 5),
strides = (3, 3),
activation = 'relu',
padding = 'SAME'),
tf.keras.layers.Conv2D(filters = lat_dim,
kernel_size = (6, 3),
strides = (6, 3),
padding = 'VALID')
])
decoder = tf.keras.models.Sequential([
tf.keras.layers.Conv2DTranspose(filters = 32,
kernel_size = (6, 3),
strides = (6, 3),
activation = 'relu',
padding = 'VALID',
input_shape = (1, 1, lat_dim)),
tf.keras.layers.Conv2DTranspose(filters = 64,
kernel_size = (5, 5),
strides = (3, 3),
activation = 'relu',
padding = 'SAME'),
tf.keras.layers.Conv2DTranspose(filters = 64,
kernel_size = (7, 7),
strides = (5, 5),
activation = 'relu',
padding = 'SAME'),
tf.keras.layers.Conv2DTranspose(filters = 1,
kernel_size = (11, 11),
strides = (5, 5),
padding = 'SAME'),
tf.keras.layers.Cropping2D(cropping = ((1, 0), (13, 13)))
])
latent = encoder.output
result = decoder(latent)
model = tf.keras.Model(inputs = encoder.input, outputs = result)
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
model.fit(train_x, train_x, epochs = 500, verbose = 0)
return model, encoder
# model30, encoder30 = create_and_train_model(30)
# model10, encoder10 = create_and_train_model(10)
# model5, encoder5 = create_and_train_model(5)
model2, encoder2 = create_and_train_model(2)
Encode Datasets to the Latent Space and Reconstruct to the Original Image with Decoders
# predict30 = model30.predict(train_x)
# predict30 = predict30.reshape(151, 89351)
# train_30 = train_x.reshape(151, 89351)
# predict10 = model10.predict(train_x)
# predict10 = predict10.reshape(151, 89351)
# train_10 = train_x.reshape(151, 89351)
# predict5 = model5.predict(train_x)
# predict5 = predict5.reshape(151, 89351)
# train_5 = train_x.reshape(151, 89351)
predict2 = model2.predict(train_x)
predict2 = predict2.reshape(151, 89351)
train_2 = train_x.reshape(151, 89351)
Reconstruction Results from the Latent Space
Latent Space with 30 Dimensions
# Let's see the flow in time
from matplotlib.animation import FuncAnimation
import time
from IPython import display
fig = plt.figure()
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)
def animate(i):
ax1.clear()
ax1.imshow(train_30[i,:].reshape(nx, ny), 'gray')
ax1.set_title('Ground Truth')
ax1.axis('off')
ax2.clear()
ax2.imshow(predict30[i,:].reshape(nx, ny), 'gray')
ax2.set_title('Reconstructed')
ax2.axis('off')
ani = FuncAnimation(fig, animate, frames = int(150/1), interval = 10)
video = ani.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()
Latent Space with 10 Dimensions
# Let's see the flow in time
from matplotlib.animation import FuncAnimation
import time
from IPython import display
fig = plt.figure()
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)
def animate(i):
ax1.clear()
ax1.imshow(train_10[i,:].reshape(nx, ny), 'gray')
ax1.set_title('Ground Truth')
ax1.axis('off')
ax2.clear()
ax2.imshow(predict10[i,:].reshape(nx, ny), 'gray')
ax2.set_title('Reconstructed')
ax2.axis('off')
ani = FuncAnimation(fig, animate, frames = int(150/1), interval = 10)
video = ani.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()
Latent Space with 5 Dimensions
# Let's see the flow in time
from matplotlib.animation import FuncAnimation
import time
from IPython import display
fig = plt.figure()
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)
def animate(i):
ax1.clear()
ax1.imshow(train_5[i,:].reshape(nx, ny), 'gray')
ax1.set_title('Ground Truth')
ax1.axis('off')
ax2.clear()
ax2.imshow(predict5[i,:].reshape(nx, ny), 'gray')
ax2.set_title('Reconstructed')
ax2.axis('off')
ani = FuncAnimation(fig, animate, frames = int(150/1), interval = 10)
video = ani.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()
Latent Space with 2 Dimensions
# Let's see the flow in time
from matplotlib.animation import FuncAnimation
import time
from IPython import display
fig = plt.figure(figsize = (8, 10))
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 1, 2)
lat = encoder2.predict(train_x)
lat = lat.reshape(151, 2)
def animate(i):
ax1.clear()
ax1.imshow(train_2[i,:].reshape(nx, ny), 'gray')
ax1.set_title('Ground Truth')
ax1.axis('off')
ax2.clear()
ax2.imshow(predict2[i,:].reshape(nx, ny), 'gray')
ax2.set_title('Reconstructed Time Step: {}'.format(i))
ax2.axis('off')
ax3.clear()
ax3.scatter(lat[:,0], lat[:,1], color = 'k')
ax3.scatter(lat[i,0], lat[i,1], color = 'r', s = 50)
ax3.set_title('Latent Space Visualization (Time Step {})'.format(i))
ax3.set_xticks([])
ax3.set_yticks([])
#ax3.axis('off')
ani = FuncAnimation(fig, animate, frames = int(150/1), interval = 100)
video = ani.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')