Object Detection
Table of Contents
Contents are retireved from
%%html
<center><iframe src="https://www.youtube.com/embed/8-PA11R3e9c?end=1198&rel=0"
width="560" height="315" frameborder="0" allowfullscreen></iframe></center>
%%html
<center><iframe
width="560" height="315" src="https://www.youtube.com/embed/Cgxsv1riJhI" frameborder="0" allowfullscreen>
</iframe><center>
%%html
<center><iframe
width="560" height="315" src="https://www.youtube.com/embed/vRqSO6RsptU" frameborder="0">
</iframe><center>
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib inline
Download data from here
train_imgs = np.load('data_files/object_detction_trn_data.npy')
train_labels = np.load('data_files/object_detction_trn_label.npy')
test_imgs = np.load('data_files/object_detction_eval_data.npy')
test_labels = np.load('data_files/object_detction_eval_label.npy')
# input image: 240 by 320
# output label: class, x, y, h, w
classes = ['Axis',
'Bearing',
'Bearing_Box',
'Distance_Tube',
'F20_20_B']
print(train_imgs.shape)
print(train_labels.shape)
print(test_imgs.shape)
print(test_labels.shape)
Five classes images are availabe: Axis, bearing, bearing box, distance tube, beam
250 images are used for training (50 images per class)
50 images are avalialbe for evaluation (10 images per class)
One object per image (240 by 320)
Labeled with class and bounding box location(normalizsed): class, $x, y, h, \omega$
idx = 138
train_img = train_imgs[idx]
c, x, y, h, w = train_labels[idx]
# rescaling
x, w = x*320, w*320
y, h = y*240, h*240
rect = patches.Rectangle((x, y),
w,
h,
linewidth = 2,
edgecolor = 'r',
facecolor = 'none')
fig, ax = plt.subplots(figsize = (8,8))
plt.title(classes[int(c)])
plt.imshow(train_img)
ax.add_patch(rect)
plt.axis('off')
plt.show()
# rescaling output labels
train_labels = np.multiply(train_labels, [1, 320, 240, 320, 240])
test_labels = np.multiply(test_labels, [1, 320, 240, 320, 240])
feature_extractor = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters = 32,
kernel_size = (3,3),
activation = 'relu',
padding = 'SAME',
input_shape = (240, 320, 3)),
tf.keras.layers.MaxPool2D(pool_size = (2,2)),
tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', padding = 'SAME'),
tf.keras.layers.MaxPool2D((2,2)),
tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', padding = 'SAME'),
tf.keras.layers.MaxPool2D((2,2)),
tf.keras.layers.Conv2D(128, (3,3), activation = 'relu', padding = 'SAME'),
tf.keras.layers.MaxPool2D((2,2)),
tf.keras.layers.Conv2D(128, (3,3), activation = 'relu', padding = 'SAME'),
tf.keras.layers.MaxPool2D((2,2)),
tf.keras.layers.Conv2D(256, (3,3), activation = 'relu', padding = 'SAME'),
tf.keras.layers.GlobalAveragePooling2D()
])
classifier = tf.keras.layers.Dense(256, activation = 'relu')(feature_extractor.output)
classifier = tf.keras.layers.Dense(256, activation = 'relu')(classifier)
classifier = tf.keras.layers.Dense(5, activation = 'softmax', name = 'cls')(classifier)
bb_regressor = tf.keras.layers.Dense(256, activation = 'relu')(feature_extractor.output)
bb_regressor = tf.keras.layers.Dense(256, activation = 'relu')(bb_regressor)
bb_regressor = tf.keras.layers.Dense(4, name = 'bbox')(bb_regressor)
object_detection = tf.keras.models.Model(inputs = feature_extractor.input,
outputs = [classifier, bb_regressor])
object_detection.summary()
object_detection.compile(optimizer = 'adam',
loss = {'cls': 'sparse_categorical_crossentropy',
'bbox': 'mse'},
loss_weights = {'cls': 1,
'bbox': 1})
# divide labels to cls and bbox labels
train_cls = train_labels[:,:1]
train_bbox = train_labels[:,1:]
print(train_labels.shape)
print(train_cls.shape)
print(train_bbox.shape)
object_detection.fit(x = train_imgs,
y = {'cls': train_cls, 'bbox': train_bbox},
epochs = 100)
idx = 50
# true label
c_label, x_label, y_label, h_label, w_label = test_labels[idx]
rect_label = patches.Rectangle((x_label, y_label),
w_label,
h_label,
linewidth = 2,
edgecolor = 'r',
facecolor = 'none')
# predict
c_pred, bbox = object_detection.predict(test_imgs[[idx]])
x, y, h, w = bbox[0]
rect = patches.Rectangle((x, y),
w,
h,
linewidth = 2,
edgecolor = 'b',
facecolor = 'none')
print(classes[int(c_label)])
print(classes[np.argmax(c_pred)])
fig, ax = plt.subplots(figsize = (8,8))
plt.imshow(test_imgs[idx])
ax.add_patch(rect_label)
ax.add_patch(rect)
plt.axis('off')
plt.show()
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')