1. Pre-trained Model (VGG16)¶

Training a model on ImageNet from scratch takes days or weeks.
Many models trained on ImageNet and their weights are publicly available!

Transfer learning
- Use pre-trained weights, remove last layers to compute representations of images
- The network is used as a generic feature extractor
- Train a classification model from these features on a new classification task
- Pre- trained models can extract more general image features that can help identify edges, textures, shapes, and object composition
- Better than handcrafted feature extraction on natural images

1.1. Import Library¶

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

1.2. Load Data¶

Download data files

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

# Change file paths if necessary

train_imgs = np.load('/content/drive/MyDrive/DL_Colab/DL_data/tranfer_learning_train_images.npy')
train_labels = np.load('/content/drive/MyDrive/DL_Colab/DL_data/tranfer_learning_train_labels.npy')

test_imgs = np.load('/content/drive/MyDrive/DL_Colab/DL_data/tranfer_learning_test_images.npy')
test_labels = np.load('/content/drive/MyDrive/DL_Colab/DL_data/tranfer_learning_test_labels.npy')

print(train_imgs.shape)
print(train_labels[0]) # one-hot-encoded 5 classes

# remove one-hot-encoding
train_labels = np.argmax(train_labels, axis = 1)
test_labels = np.argmax(test_labels, axis = 1)

(65, 224, 224, 3)
[1. 0. 0. 0. 0.]

n_train = train_imgs.shape[0]
n_test = test_imgs.shape[0]

# very small dataset
print(n_train)
print(n_test)

65
9

Dict = ['Hat','Cube','Card','Torch','Screw']

plt.figure(figsize = (8, 6))
plt.subplot(2,3,1)
plt.imshow(train_imgs[1])
plt.title("Label: {}".format(Dict[train_labels[1]]))
plt.axis('off')
plt.subplot(2,3,2)
plt.imshow(train_imgs[2])
plt.title("Label: {}".format(Dict[train_labels[2]]))
plt.axis('off')
plt.subplot(2,3,3)
plt.imshow(train_imgs[3])
plt.title("Label: {}".format(Dict[train_labels[3]]))
plt.axis('off')
plt.subplot(2,3,4)
plt.imshow(train_imgs[18])
plt.title("Label: {}".format(Dict[train_labels[18]]))
plt.axis('off')
plt.subplot(2,3,5)
plt.imshow(train_imgs[25])
plt.title("Label: {}".format(Dict[train_labels[25]]))
plt.axis('off')
plt.show()

1.3. Load VGG16 Model¶

model_type = tf.keras.applications.vgg16
base_model = model_type.VGG16()
base_model.trainable = False
base_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
553467096/553467096 [==============================] - 27s 0us/step
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 56, 56, 256)       295168    
                                                                 
 block3_conv2 (Conv2D)       (None, 56, 56, 256)       590080    
                                                                 
 block3_conv3 (Conv2D)       (None, 56, 56, 256)       590080    
                                                                 
 block3_pool (MaxPooling2D)  (None, 28, 28, 256)       0         
                                                                 
 block4_conv1 (Conv2D)       (None, 28, 28, 512)       1180160   
                                                                 
 block4_conv2 (Conv2D)       (None, 28, 28, 512)       2359808   
                                                                 
 block4_conv3 (Conv2D)       (None, 28, 28, 512)       2359808   
                                                                 
 block4_pool (MaxPooling2D)  (None, 14, 14, 512)       0         
                                                                 
 block5_conv1 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_conv2 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_conv3 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_pool (MaxPooling2D)  (None, 7, 7, 512)         0         
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 fc1 (Dense)                 (None, 4096)              102764544 
                                                                 
 fc2 (Dense)                 (None, 4096)              16781312  
                                                                 
 predictions (Dense)         (None, 1000)              4097000   
                                                                 
=================================================================
Total params: 138357544 (527.79 MB)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 138357544 (527.79 MB)
_________________________________________________________________

1.4. Testing for Target Data¶

idx = np.random.randint(n_test)
pred = base_model.predict(test_imgs[idx].reshape(-1, 224, 224, 3), verbose = 0)
label = model_type.decode_predictions(pred)[0]

print('%s (%.2f%%)' % (label[0][1], label[0][2]*100))
print('%s (%.2f%%)' % (label[1][1], label[1][2]*100))
print('%s (%.2f%%)' % (label[2][1], label[2][2]*100))
print('%s (%.2f%%)' % (label[3][1], label[3][2]*100))
print('%s (%.2f%%)' % (label[4][1], label[4][2]*100))

plt.figure(figsize = (4, 4))
plt.imshow(test_imgs[idx])
plt.title("Label : {}".format(Dict[test_labels[idx]]))
plt.axis('off')
plt.show()

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json
35363/35363 [==============================] - 0s 0us/step
mosquito_net (6.94%)
toilet_tissue (3.43%)
Band_Aid (1.53%)
envelope (1.46%)
shower_curtain (1.39%)

2. Transfer Learning¶

We assume that these model parameters contain the knowledge learned from the source data set and that this knowledge will be equally applicable to the target data set.
We will train the output layer from scratch, while the parameters of all remaining layers are fine tuned based on the parameters of the source model.
Or initialize all weights from pre-trained model, then train them with target data

3.1. Pre-trained Weights, Biases¶

vgg16_weights = base_model.get_weights()

2.2. Build a Transfer Learning Model¶

# replace new and trainable classifier layer
fc2_layer = base_model.layers[-2].output
output = tf.keras.layers.Dense(units = 5, activation = 'softmax')(fc2_layer)

# define new model
TL_model = tf.keras.Model(inputs = base_model.inputs, outputs = output)

TL_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 56, 56, 256)       295168    
                                                                 
 block3_conv2 (Conv2D)       (None, 56, 56, 256)       590080    
                                                                 
 block3_conv3 (Conv2D)       (None, 56, 56, 256)       590080    
                                                                 
 block3_pool (MaxPooling2D)  (None, 28, 28, 256)       0         
                                                                 
 block4_conv1 (Conv2D)       (None, 28, 28, 512)       1180160   
                                                                 
 block4_conv2 (Conv2D)       (None, 28, 28, 512)       2359808   
                                                                 
 block4_conv3 (Conv2D)       (None, 28, 28, 512)       2359808   
                                                                 
 block4_pool (MaxPooling2D)  (None, 14, 14, 512)       0         
                                                                 
 block5_conv1 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_conv2 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_conv3 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_pool (MaxPooling2D)  (None, 7, 7, 512)         0         
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 fc1 (Dense)                 (None, 4096)              102764544 
                                                                 
 fc2 (Dense)                 (None, 4096)              16781312  
                                                                 
 dense (Dense)               (None, 5)                 20485     
                                                                 
=================================================================
Total params: 134281029 (512.24 MB)
Trainable params: 20485 (80.02 KB)
Non-trainable params: 134260544 (512.16 MB)
_________________________________________________________________

2.3. Define Loss and Optimizer¶

TL_model.compile(optimizer = 'adam',
                 loss = 'sparse_categorical_crossentropy',
                 metrics = 'accuracy')

2.4. Optimize¶

TL_model.fit(train_imgs, train_labels, batch_size = 10, epochs = 10)

Epoch 1/10
7/7 [==============================] - 5s 233ms/step - loss: 1.8993 - accuracy: 0.2462
Epoch 2/10
7/7 [==============================] - 0s 53ms/step - loss: 1.6406 - accuracy: 0.4462
Epoch 3/10
7/7 [==============================] - 0s 54ms/step - loss: 1.2547 - accuracy: 0.4462
Epoch 4/10
7/7 [==============================] - 0s 56ms/step - loss: 0.9955 - accuracy: 0.6000
Epoch 5/10
7/7 [==============================] - 0s 56ms/step - loss: 0.7194 - accuracy: 0.8615
Epoch 6/10
7/7 [==============================] - 0s 55ms/step - loss: 0.6324 - accuracy: 0.8769
Epoch 7/10
7/7 [==============================] - 0s 54ms/step - loss: 0.6120 - accuracy: 0.8462
Epoch 8/10
7/7 [==============================] - 0s 55ms/step - loss: 0.4729 - accuracy: 0.9692
Epoch 9/10
7/7 [==============================] - 0s 54ms/step - loss: 0.4166 - accuracy: 0.9538
Epoch 10/10
7/7 [==============================] - 0s 53ms/step - loss: 0.3716 - accuracy: 0.9846

<keras.src.callbacks.History at 0x7aeb01ff3a30>

2.5. Test and Evaluate¶

test_loss, test_acc = TL_model.evaluate(test_imgs, test_labels)

1/1 [==============================] - 2s 2s/step - loss: 0.2379 - accuracy: 1.0000

test_x = test_imgs[np.random.choice(n_test, 1)]
pred = np.argmax(TL_model.predict(test_x, verbose = 0))

plt.figure(figsize = (4, 4))
plt.imshow(test_x.reshape(224, 224, 3))
plt.axis('off')
plt.show()

print('Prediction : {}'.format(Dict[pred]))

Prediction : Screw

%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')