Pre-trained CNNs and Transfer Learning
Table of Contents
from IPython.display import YouTubeVideo
YouTubeVideo('7JcSo0jCLdE?si=d530KtZ2bu7pNTxe&start=23', width = "560", height = "315")
LeNet
AlexNet
Simplified version of Krizhevsky, Alex, Sutskever, and Hinton. "Imagenet classification with deep convolutional neural networks." NIPS 2012
LeNet-style backbone, plus:
VGG-16/19
Simonyan, Karen, and Zisserman. "Very deep convolutional networks for large-scale image recognition." (2014)
Simply “Very Deep”!
GoogleNet/Inception
ResNet
Skip Connection and Residual Net
A direct connection between 2 non-consecutive layers
No gradient vanishing
Parameters are optimized to learn a residual, that is the difference between the value before the block and the one needed after.
A skip connection is a connection that bypasses at least one layer.
Here, it is often used to transfer local information by concatenating or summing feature maps from the downsampling path with feature maps from the upsampling path.
Merging features from various resolution levels helps combining context information with spatial information.
def residual_net(x):
conv1 = tf.keras.layers.Conv2D(filters = 32,
kernel_size = (3, 3),
padding = "SAME",
activation = 'relu')(x)
conv2 = tf.keras.layers.Conv2D(filters = 32,
kernel_size = (3, 3),
padding = "SAME",
activation = 'relu')(conv1)
maxp2 = tf.keras.layers.MaxPool2D(pool_size = (2, 2),
strides = 2)(conv2 + x)
flat = tf.keras.layers.Flatten()(maxp2)
hidden = tf.keras.layers.Dense(units = n_hidden,
activation='relu')(flat)
output = tf.keras.layers.Dense(units = n_output)(hidden)
return output
DenseNets
U-Net
The U-Net owes its name to its symmetric shape
The U-Net architecture is built upon the Fully Convolutional Network and modified in a way that it yields better segmentation in medical imaging.
Compared to FCN-8, the two main differences are
These skip connections intend to provide local information to the global information while upsampling. Because of its symmetry, the network has a large number of feature maps in the upsampling path, which allows to transfer information.
List of Available Models
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
%matplotlib inline
Model Selection
# model_type = tf.keras.applications.densenet
# model_type = tf.keras.applications.inception_resnet_v2
# model_type = tf.keras.applications.inception_v3
model_type = tf.keras.applications.mobilenet
# model_type = tf.keras.applications.mobilenet_v2
# model_type = tf.keras.applications.nasnet
# model_type = tf.keras.applications.resnet50
# model_type = tf.keras.applications.vgg16
# model_type = tf.keras.applications.vgg19
Model Summary
model = model_type.MobileNet() # Change Model (hint : use capital name)
model.summary()
from google.colab import drive
drive.mount('/content/drive')
# img = cv2.imread('/content/drive/MyDrive/DL/DL_data/ILSVRC2017_test_00000005.JPEG')
img = cv2.imread('/content/drive/MyDrive/DL/DL_data/ILSVRC2017_test_00005381.JPEG')
print(img.shape)
plt.figure(figsize = (6, 6))
plt.imshow(img)
plt.axis('off')
plt.show()
resized_img = cv2.resize(img, (224, 224)).reshape(1, 224, 224, 3)
plt.figure(figsize = (6, 6))
plt.imshow(resized_img[0])
plt.axis('off')
plt.show()
input_img = model_type.preprocess_input(resized_img)
pred = model.predict(input_img, verbose = 0)
label = model_type.decode_predictions(pred)[0]
print('%s (%.2f%%)\n' % (label[0][1], label[0][2]*100))
print('%s (%.2f%%)\n' % (label[1][1], label[1][2]*100))
print('%s (%.2f%%)\n' % (label[2][1], label[2][2]*100))
print('%s (%.2f%%)\n' % (label[3][1], label[3][2]*100))
print('%s (%.2f%%)\n' % (label[4][1], label[4][2]*100))
from IPython.display import YouTubeVideo
YouTubeVideo('7JcSo0jCLdE?si=7IuLwj5L5lxk6lxI&start=2003', width = "560", height = "315")
Training a model on ImageNet from scratch takes days or weeks.
Many models trained on ImageNet and their weights are publicly available!
Transfer learning
Import Library
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
Load Data
from google.colab import drive
drive.mount('/content/drive')
# Change file paths if necessary
train_imgs = np.load('/content/drive/MyDrive/DL/DL_data/tranfer_learning_train_images.npy')
train_labels = np.load('/content/drive/MyDrive/DL/DL_data/tranfer_learning_train_labels.npy')
test_imgs = np.load('/content/drive/MyDrive/DL/DL_data/tranfer_learning_test_images.npy')
test_labels = np.load('/content/drive/MyDrive/DL/DL_data/tranfer_learning_test_labels.npy')
print(train_imgs.shape)
print(train_labels[0]) # one-hot-encoded 5 classes
# remove one-hot-encoding
train_labels = np.argmax(train_labels, axis = 1)
test_labels = np.argmax(test_labels, axis = 1)
n_train = train_imgs.shape[0]
n_test = test_imgs.shape[0]
# very small dataset
print(n_train)
print(n_test)
Dict = ['Hat','Cube','Card','Torch','Screw']
plt.figure(figsize = (8, 6))
plt.subplot(2,3,1)
plt.imshow(train_imgs[1])
plt.title("Label: {}".format(Dict[train_labels[1]]))
plt.axis('off')
plt.subplot(2,3,2)
plt.imshow(train_imgs[2])
plt.title("Label: {}".format(Dict[train_labels[2]]))
plt.axis('off')
plt.subplot(2,3,3)
plt.imshow(train_imgs[3])
plt.title("Label: {}".format(Dict[train_labels[3]]))
plt.axis('off')
plt.subplot(2,3,4)
plt.imshow(train_imgs[18])
plt.title("Label: {}".format(Dict[train_labels[18]]))
plt.axis('off')
plt.subplot(2,3,5)
plt.imshow(train_imgs[25])
plt.title("Label: {}".format(Dict[train_labels[25]]))
plt.axis('off')
plt.show()
Load VGG16 Model
model_type = tf.keras.applications.vgg16
base_model = model_type.VGG16()
base_model.trainable = False
base_model.summary()
Testing for Target Data
idx = np.random.randint(n_test)
pred = base_model.predict(test_imgs[idx].reshape(-1, 224, 224, 3), verbose = 0)
label = model_type.decode_predictions(pred)[0]
print('%s (%.2f%%)' % (label[0][1], label[0][2]*100))
print('%s (%.2f%%)' % (label[1][1], label[1][2]*100))
print('%s (%.2f%%)' % (label[2][1], label[2][2]*100))
print('%s (%.2f%%)' % (label[3][1], label[3][2]*100))
print('%s (%.2f%%)' % (label[4][1], label[4][2]*100))
plt.figure(figsize = (4, 4))
plt.imshow(test_imgs[idx])
plt.title("Label : {}".format(Dict[test_labels[idx]]))
plt.axis('off')
plt.show()
Pre-trained Weights, Biases
vgg16_weights = base_model.get_weights()
Build a Transfer Learning Model
# replace new and trainable classifier layer
fc2_layer = base_model.layers[-2].output
output = tf.keras.layers.Dense(units = 5, activation = 'softmax')(fc2_layer)
# define new model
TL_model = tf.keras.Model(inputs = base_model.inputs, outputs = output)
TL_model.summary()
Define Loss and Optimizer
TL_model.compile(optimizer = 'adam',
loss = 'sparse_categorical_crossentropy',
metrics = ['accuracy'])
Optimize
TL_model.fit(train_imgs, train_labels, batch_size = 10, epochs = 10)
Test and Evaluate
test_loss, test_acc = TL_model.evaluate(test_imgs, test_labels)
test_x = test_imgs[np.random.choice(n_test, 1)]
pred = np.argmax(TL_model.predict(test_x, verbose = 0))
plt.figure(figsize = (4, 4))
plt.imshow(test_x.reshape(224, 224, 3))
plt.axis('off')
plt.show()
print('Prediction : {}'.format(Dict[pred]))
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')