Anomaly Detection
Table of Contents
Causes of Anomalies
Data from different class of object or underlying mechanism
Data measurement and collection errors
Natural variation
Anomaly Detection
Applications of Anomaly Detection
Difficulties with Anomaly Detection
Scarcity of Anomalies
Diverse Types of Anomalies
Use of Data Labels in Anomaly Detection
Supervised Anomaly Detection
Semi-supervised Anomaly Detection
Unsupervised Anomaly Detection
Output of Anomaly Detection
Label
Score
Variants of Anomaly Detection Problem
Given a dataset $D$, find all the data points $x \in D$ with anomaly scores greater than some threshold $t$
Given a dataset $D$, find all the data points $x \in D$ having the top-n largest anomaly scores
Given a dataset $D$, containing mostly normal data points, and a test point $x$, compute the anomaly score of $x$ with respect to $D$
Anomalies (outliers) are objects that are fit poorly by a statistical model
Estimate a parametric model describing the distribution of the data
Apply a statistical test that depends on
Univariate Gaussian Distribution
$\quad \;$ where $\bar x$ is a sample mean and $s$ is a sample variance.
Multivariate Gaussian Distribution
Pros and Cons
Pros
Cons
Train autoencoders only with normal data
Test with (normal + anomaly) data
Convolutional Autoencoder (CAE)
Training using normal data
Anomaly detection with test data
Anomaly Score
Reconstruction Error
Root mean squared error (RMSE)
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import random
Load MNIST Data
(train_imgs, train_labels), (test_imgs, test_labels) = tf.keras.datasets.mnist.load_data()
train_imgs, test_imgs = train_imgs/255.0, test_imgs/255.0
print('shape of x_train:', train_imgs.shape)
print('shape of y_train:', train_labels.shape)
print('shape of x_test:', test_imgs.shape)
print('shape of y_test:', test_labels.shape)
Seperate Normal and Abnormal Data
normal_train_index = np.hstack([np.where(train_labels == 7)])[0]
normal_test_index = np.hstack([np.where(test_labels == 7)])[0]
abnormal_test_index = np.hstack([np.where(test_labels == 5)])[0]
normal_train_x = train_imgs[normal_train_index].reshape(-1,28,28,1)
normal_train_y = train_labels[normal_train_index]
normal_test_x = test_imgs[normal_test_index].reshape(-1,28,28,1)
normal_test_y = test_labels[normal_test_index]
abnormal_test_x = test_imgs[abnormal_test_index].reshape(-1,28,28,1)
abnormal_test_y = test_labels[abnormal_test_index]
print('shape of normal_train_x:', normal_train_x.shape)
print('shape of normal_test_x:', normal_test_x.shape)
print('shape of abnormal_test_x:', abnormal_test_x.shape)
Plot Normal and Abnormal Data
random.seed(6)
idx = random.sample(range(normal_train_x.shape[0]), 4)
plt.figure(figsize = (8, 3))
for i in range(4):
plt.subplot(1,4,i+1)
plt.imshow(normal_train_x[idx[i]], 'gray')
plt.title('Normal')
plt.axis('off')
plt.tight_layout()
plt.show()
random.seed(11)
idx = random.sample(range(abnormal_test_x.shape[0]), 4)
plt.figure(figsize = (8, 3))
for i in range(4):
plt.subplot(1,4,i+1)
plt.imshow(abnormal_test_x[idx[i]], 'gray')
plt.title('Abnormal')
plt.axis('off')
plt.tight_layout()
plt.show()
Build a Model
# Encoder
encoder = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters = 32,
kernel_size = (3, 3),
strides = (2, 2),
activation = 'relu',
padding = 'SAME',
input_shape = (28, 28, 1)),
tf.keras.layers.Conv2D(filters = 64,
kernel_size = (3, 3),
strides = (2, 2),
activation = 'relu',
padding = 'SAME',
input_shape = (14, 14, 32)),
tf.keras.layers.Conv2D(filters = 2,
kernel_size = (7, 7),
padding = 'VALID',
input_shape = (7, 7, 64))
])
encoder.summary()
# Decoder
decoder = tf.keras.models.Sequential([
tf.keras.layers.Conv2DTranspose(filters = 64,
kernel_size = (7, 7),
strides = (1, 1),
activation = 'relu',
padding = 'VALID',
input_shape = (1, 1, 2)),
tf.keras.layers.Conv2DTranspose(filters = 32,
kernel_size = (3, 3),
strides = (2, 2),
activation = 'relu',
padding = 'SAME',
input_shape = (7, 7, 64)),
tf.keras.layers.Conv2DTranspose(filters = 1,
kernel_size = (7, 7),
strides = (2, 2),
padding = 'SAME',
input_shape = (14,14,32))
])
decoder.summary()
latent = encoder.output
result = decoder(latent)
cae_model = tf.keras.Model(inputs = encoder.input, outputs = result)
cae_model.compile(optimizer = 'adam',
loss = 'mean_squared_error')
cae_model.fit(normal_train_x, normal_train_x, epochs = 10)
Look at Latent Space
random.seed(2)
idx_n = np.random.choice(normal_test_x.shape[0], 1000)
idx_a = np.random.choice(abnormal_test_x.shape[0], 50)
test_x_n, test_y_n = normal_test_x[idx_n], normal_test_y[idx_n]
test_x_a, test_y_a = abnormal_test_x[idx_a], abnormal_test_y[idx_a]
normal_latent = encoder.predict(test_x_n)
normal_latent = normal_latent.reshape(-1,2)
abnormal_latent = encoder.predict(test_x_a)
abnormal_latent = abnormal_latent.reshape(-1,2)
plt.figure(figsize = (6, 6))
plt.scatter(normal_latent[test_y_n == 7, 0], normal_latent[test_y_n == 7, 1], label = 'Normal 7')
plt.scatter(abnormal_latent[:, 0], abnormal_latent[:, 1], label = 'Abnormal')
plt.title('Latent Space', fontsize = 15)
plt.xlabel('Z1', fontsize = 15)
plt.ylabel('Z2', fontsize = 15)
plt.legend(fontsize = 15)
plt.show()
Test
# Normal
normal_input = normal_test_x[0].reshape(-1,28,28,1)
normal_recon = cae_model.predict(normal_input)
n_recon_err = cae_model.evaluate(normal_input, normal_input)
plt.figure(figsize = (8, 4))
plt.subplot(1,2,1)
plt.imshow(normal_input[0], 'gray')
plt.title('Input image')
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(normal_recon[0], 'gray')
plt.title('Reconstructed image')
plt.axis('off')
plt.show()
print('Reconstruciton error: ', n_recon_err)
# Abnormal
abnormal_input = abnormal_test_x[0].reshape(-1,28,28,1)
abnormal_recon = cae_model.predict(abnormal_input)
ab_recon_err = cae_model.evaluate(abnormal_input, abnormal_input)
plt.figure(figsize = (8, 4))
plt.subplot(1,2,1)
plt.imshow(abnormal_input[0], 'gray')
plt.title('Input image')
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(abnormal_recon[0], 'gray')
plt.title('Reconstructed image')
plt.axis('off')
plt.show()
print('Reconstruciton error: ', ab_recon_err)
Anomaly Detection
normal_err = []
abnormal_err = []
for i in range(200):
img = normal_test_x[i].reshape(-1,28,28,1)
normal_err.append(cae_model.evaluate(img, img, verbose = 0))
for j in range(200):
img = abnormal_test_x[j].reshape(-1,28,28,1)
abnormal_err.append(cae_model.evaluate(img, img, verbose = 0))
import scipy.stats as st
threshold = 0.05
plt.figure(figsize = (6, 4))
plt.plot(normal_err, '.', label = 'Normal')
plt.plot(abnormal_err, '.', label = 'Abnormal')
plt.xlabel('Data point index')
plt.ylabel('Reconstruction error')
plt.axhline(y = threshold, color = 'r', linestyle = '-')
plt.legend()
plt.show()
Train only normal (healthy) data, no abnormal data
How can I find an anomaly with a ‘generative model’?
After generating data randomly, optimize data as similar as possible to the target data through iteration.
Train only normal (healthy) data, no abnormal data
Input data (unseen) to a well-trained GAN model → Compare anomaly scores to categorize
AnoGAN requires an iterative procedure to find the latent $z$ that generates the target data.
Train only normal (healthy) data, no abnormal data
Train an additional encoder model to predict latent z from images
Generator is fixed
Query data is regenerated directly through the encoder and generator
If data is normal (trained), data will be regenerated well.
Otherwise, anomaly score will be high
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import random
Data Load
Train Dataset: digit 2 only (normal images)
Test Dataset: digit 2 and digit 6 (normal + anomaly images)
(train_imgs, train_labels), (test_imgs, test_labels) = tf.keras.datasets.mnist.load_data()
train_imgs, test_imgs = train_imgs/127.5 - 1.0, test_imgs/127.5 - 1.0
normal_train_index = np.hstack([np.where(train_labels == 2)])[0]
normal_test_index = np.hstack([np.where(test_labels == 2)])[0]
abnormal_test_index = np.hstack([np.where(test_labels == 6)])[0]
normal_train_x = train_imgs[normal_train_index].reshape(-1,28,28,1)
normal_train_y = train_labels[normal_train_index]
normal_test_x = test_imgs[normal_test_index].reshape(-1,28,28,1)
normal_test_y = test_labels[normal_test_index]
abnormal_test_x = test_imgs[abnormal_test_index].reshape(-1,28,28,1)
abnormal_test_y = test_labels[abnormal_test_index]
print('shape of normal_train_x:', normal_train_x.shape)
print('shape of normal_test_x:', normal_test_x.shape)
print('shape of abnormal_test_x:', abnormal_test_x.shape)
idx = random.sample(range(normal_train_x.shape[0]), 4)
plt.figure(figsize = (8, 3))
for i in range(4):
plt.subplot(1,4,i+1)
plt.imshow(normal_train_x[idx[i]], 'gray')
plt.title('Normal')
plt.axis('off')
plt.tight_layout()
plt.show()
idx = random.sample(range(abnormal_test_x.shape[0]), 4)
plt.figure(figsize = (8, 3))
for i in range(4):
plt.subplot(1,4,i+1)
plt.imshow(abnormal_test_x[idx[i]], 'gray')
plt.title('Abnormal')
plt.axis('off')
plt.tight_layout()
plt.show()
Build GAN Model
Generator
generator = tf.keras.models.Sequential([
tf.keras.layers.Dense(7*7*256,
use_bias = False,
input_shape = (100,)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Reshape((7, 7, 256)),
tf.keras.layers.Conv2DTranspose(128,
kernel_size = 5,
strides = 1,
padding = 'same',
use_bias = False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Conv2DTranspose(64,
kernel_size = 5,
strides = 2,
padding = 'same',
use_bias = False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Conv2DTranspose(1,
kernel_size = 5,
strides = 2,
padding = 'same',
use_bias = False,
activation = 'tanh')
])
generator.summary()
Discriminator
discriminator = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(64,
kernel_size = 5,
strides = 2,
padding = 'same',
input_shape = (28, 28, 1)),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Conv2D(128,
kernel_size = 5,
strides = 2,
padding = 'same'),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1, activation = 'sigmoid')
])
discriminator.summary()
Model (Generator + Discriminator) Compile
discriminator.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001),
loss = 'binary_crossentropy')
combined_input = tf.keras.layers.Input(shape = (100,))
generated = generator(combined_input)
discriminator.trainable = False
combined_output = discriminator(generated)
combined = tf.keras.models.Model(inputs = combined_input, outputs = combined_output)
combined.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001),
loss = 'binary_crossentropy')
Train GAN
def make_noise(samples):
return np.random.normal(0, 1, [samples, 100])
def plot_generated_images(generator, samples = 3):
noise = make_noise(samples)
generated_images = generator.predict(noise)
generated_images = generated_images.reshape(samples, 28, 28)
for i in range(samples):
plt.subplot(1, samples, i+1)
plt.imshow(generated_images[i], 'gray', interpolation = 'nearest')
plt.axis('off')
plt.tight_layout()
plt.show()
n_iter = 20000
batch_size = 256
fake = np.zeros(batch_size)
real = np.ones(batch_size)
for i in range(n_iter+1):
# Train Discriminator
noise = make_noise(batch_size)
generated_images = generator.predict(noise, verbose = 0)
idx = np.random.randint(0, normal_train_x.shape[0], batch_size)
real_images = normal_train_x[idx]
D_loss_real = discriminator.train_on_batch(real_images, real)
D_loss_fake = discriminator.train_on_batch(generated_images, fake)
D_loss = D_loss_real + D_loss_fake
# Train Generator
noise = make_noise(batch_size)
G_loss = combined.train_on_batch(noise, real)
if i % 2000 == 0:
print('Discriminator Loss: ', D_loss)
print('Generator Loss: ', G_loss)
plot_generated_images(generator)
Build Encoder for fast-AnoGAN
Encoder
Encoder = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(32,
kernel_size = 4,
strides = 2,
padding = 'same',
input_shape = (28, 28, 1)),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Conv2D(64,
kernel_size = 4,
strides = 2,
padding = 'same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Conv2D(128,
kernel_size = 4,
strides = 2,
padding = 'same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Conv2D(100,
kernel_size = 4,
strides = 1,
padding = 'valid'),
tf.keras.layers.Flatten()
])
Encoder.summary()
Model (Encoder + Generator) Compile
encoder_combined_input = tf.keras.layers.Input(shape = (28, 28, 1))
latentz = Encoder(encoder_combined_input)
generator.trainable = False
regenerated_output = generator(latentz)
e_g_combined = tf.keras.models.Model(inputs = encoder_combined_input, outputs = regenerated_output)
e_g_combined.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0001),
loss = 'mean_squared_error')
Train Encoder
n_iter = 20000
batch_size = 32
e_losses = []
for i in range(n_iter+1):
idx = np.random.randint(0, normal_train_x.shape[0], batch_size)
real_images = normal_train_x[idx]
recon_loss = e_g_combined.train_on_batch(real_images, real_images)
if i % 100 == 0:
e_losses.append(recon_loss)
if i % 2000 == 0:
print('recon_loss: ', recon_loss)
Calculate Anomaly Score
def compare_images(cls, real_img, generated_img, score, threshold=50):
real_img = ((real_img+1.0)*255).squeeze()
generated_img = ((generated_img+1.0)*255).squeeze()
negative = np.zeros_like(real_img)
diff_img = real_img - generated_img
diff_img[diff_img <= threshold] = 0
anomaly_img = np.zeros(shape=(28, 28, 3))
anomaly_img[:, :, 0] = real_img - diff_img
anomaly_img[:, :, 1] = real_img - diff_img
anomaly_img[:, :, 2] = real_img - diff_img
anomaly_img[:, :, 0] = anomaly_img[:,:,0] + diff_img
anomaly_img = anomaly_img.astype(np.uint8)
fig, plots = plt.subplots(1, 4)
fig.suptitle(f'Class: {cls} - (anomaly score: {score:.4})')
fig.set_figwidth(9)
fig.set_tight_layout(True)
plots = plots.reshape(-1)
plots[0].imshow(real_img, cmap='gray', label='real')
plots[1].imshow(generated_img, cmap='gray')
plots[2].imshow(diff_img, cmap='gray')
plots[3].imshow(anomaly_img)
plots[0].set_title('real')
plots[1].set_title('generated')
plots[2].set_title('difference')
plots[3].set_title('Anomaly Detection')
plt.show()
def calculate_anomaly_score(test_image, sample_num, plot_options = True):
generator.trainable = False
Encoder.trainable = False
anomaly_score_list = []
for i in range(sample_num):
idx = np.random.randint(0, test_image.shape[0], 1)
real_img = test_image[idx]
real_z = Encoder(real_img)
fake_img = generator(real_z)
img_difference = np.sum((real_img - fake_img)**2)/(28**2)
anomaly_score = img_difference
anomaly_score_list.append(anomaly_score)
if not plot_options:
continue
if anomaly_score >= 0.05:
cls = 'Abnormal'
else:
cls = 'Normal'
compare_images(cls, real_img, fake_img.numpy(), anomaly_score, threshold = 50)
if not plot_options:
return np.array(anomaly_score_list)
Anomaly Score of Normal Data
calculate_anomaly_score(normal_test_x, 2)
Anomaly Score of Abnormal Data
calculate_anomaly_score(abnormal_test_x, 2)
Plot Anomaly Scores
normal_scores = calculate_anomaly_score(normal_test_x, 100, plot_options = False)
abnormal_scores = calculate_anomaly_score(abnormal_test_x, 100, plot_options = False)
plt.figure(figsize = (6, 4))
plt.plot(normal_scores, '.', label = 'Normal')
plt.plot(abnormal_scores, '.', label = 'Abnormal')
plt.xlabel('Data point index')
plt.ylabel('Anomaly score')
plt.axhline(y = 0.05, color = 'r', linestyle = '-')
plt.legend()
plt.show()
Examples
NASA Bearing Dataset
Prognostic Dataset for Predictive/Preventive Maintenance
https://www.kaggle.com/datasets/vinayak123tyagi/bearing-dataset (3rd_test)
Download AD_bearing.npy
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from google.colab import drive
drive.mount('/content/drive')
AD_bearing = np.load('/content/drive/MyDrive/DL_Colab/DL_data/AD_bearing.npy')
print("Shape of total data: ", AD_bearing.shape)
plt.figure(figsize = (8, 6))
plt.plot(AD_bearing[:,0], label = 'Bearing 1', color = 'b', linewidth = 2)
plt.plot(AD_bearing[:,1], label = 'Bearing 2', color = 'r', linewidth = 2)
plt.plot(AD_bearing[:,2], label = 'Bearing 3', color = 'g', linewidth = 2)
plt.plot(AD_bearing[:,3], label = 'Bearing 4', color = 'b', linewidth = 2)
plt.legend(loc = 'upper left')
plt.title('Bearing Sensor Training Data')
plt.show()
AD_bearing[:,2]
)bearing_3 = AD_bearing[:,2]
train = bearing_3[0:4000].reshape(-1, 1)
test = bearing_3[4000:].reshape(-1, 1)
print("Training dataset shape:", train.shape)
print("Test dataset shape:", test.shape)
plt.figure(figsize = (8, 6))
plt.plot(np.arange(0, train.shape[0]), train, label = 'Bearing 3_train', linewidth = 2)
plt.plot(np.arange(4000, 6324), test, label = 'Bearing 3_test', linewidth = 2)
plt.legend(loc = 'upper left', fontsize = 16)
plt.title('Bearing Sensor Train and Test Data', fontsize = 16)
plt.xlabel('Data points')
plt.show()
LSTM Model
n_step = 20
n_input = 50
# LSTM shape
n_lstm1 = 300
n_lstm2 = 300
n_lstm3 = 300
# fully connected
n_hidden = 300
n_output = 50
lstm_network = tf.keras.models.Sequential([
tf.keras.layers.Input(shape = (n_step, n_input)),
tf.keras.layers.LSTM(n_lstm1, return_sequences = True),
tf.keras.layers.LSTM(n_lstm2, return_sequences = True),
tf.keras.layers.LSTM(n_lstm3),
tf.keras.layers.Dense(n_hidden, activation = 'relu'),
tf.keras.layers.Dense(n_output),
])
lstm_network.summary()
lstm_network.compile(optimizer = 'adam',
loss = 'mean_squared_error',
metrics = ['mse'])
Train/Test Data Split
def dataset(train, test, n_samples, n_step = n_step, n_input = n_input, n_output = n_output):
train_x_list = []
train_y_list = []
n_data = train.shape[0]
random.seed(0)
start_point = random.sample(list(np.arange(0, n_data-(n_step+1)*n_input)), n_samples)
for i in start_point:
train_x_list.append(train[i:i + n_step*n_input].reshape(n_step, n_input))
train_y_list.append(train[i + n_step*n_input:i + n_step*n_input + n_output])
train_data = np.array(train_x_list)
train_label = np.array(train_y_list)
test_data = test[0:n_step*n_input]
test_data = test_data.reshape(1, n_step, n_input)
test_label = test[n_step*n_input:n_step*n_input+n_output].ravel()
return train_data, train_label, test_data, test_label
train_data, train_label, test_data, test_label = dataset(train, test, 2000)
print('Train data shape:', train_data.shape)
Model Training
lstm_network.fit(train_data, train_label, epochs = 15)
Results
test_pred = lstm_network.predict(train_data[0:1]).ravel()
plt.figure(figsize = (8, 6))
plt.plot(np.arange(0, n_step*n_input + n_output), np.hstack([train_data[0:1].ravel(), train_label[0:1].ravel()]), 'b', label = 'Ground truth')
plt.plot(np.arange(n_step*n_input, n_step*n_input + n_output), test_pred, 'r', label = 'Prediction')
plt.vlines(n_step*n_input, 0.05, 0.06, colors = 'r', linestyles = 'dashed')
plt.ylim([0.04, 0.07])
plt.legend(fontsize = 13, loc = 'upper left')
plt.xlabel('Data points')
plt.show()
Difference Between Predicted and Measured Signal
gen_signal = []
for i in range((test.shape[0]-n_step*n_input)//n_output):
test_pred = lstm_network.predict(test_data, verbose = 0)
gen_signal.append(test_pred.ravel())
test_pred = test_pred[:, np.newaxis, :]
test_data = test_data[:, 1:, :]
test_data = np.concatenate([test_data, test_pred], axis = 1)
gen_signal = np.concatenate(gen_signal)
test_label = test[n_step*n_input:n_step*n_input+n_output*(i+1)]
plt.figure(figsize = (8, 6))
plt.plot(test_label, 'b', label = 'Measured signal')
plt.plot(gen_signal, 'r', label = 'Prediction')
plt.legend(fontsize = 15, loc = 'upper left')
plt.xlabel('Data points')
plt.show()
plt.figure(figsize = (8, 6))
plt.plot(np.abs(test_label.reshape(-1) - gen_signal), label = 'Anomaly score')
plt.legend(fontsize = 15, loc = 'upper left')
plt.hlines(0.005, 0, 1300, colors = 'r', linestyles = 'dashed')
plt.xlabel('Data points')
plt.ylabel('Anomaly score (difference)')
plt.show()
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')