Object Detection
Table of Contents
Contents are retireved from
tf.keras.layers.Conv2D(filters, kernel_size, strides, padding, activation, kernel_regularizer, input_shape)
filters = 32
kernel_size = (3,3)
strides = (1,1)
padding = 'SAME'
activeation='relu'
kernel_regularizer=tf.keras.regularizers.l2(0.04)
input_shape = tensor of shape([input_h, input_w, input_ch])
kernel_size
stride
padding
'SAME'
: enable zero padding'VALID'
: disable zero paddingkernel_regularizer
input and output channels
Examples
input = [None, 4, 4, 1]
filter size = [3, 3, 1, 1]
strides = [1, 1, 1, 1]
padding = 'VALID'
input = [None, 5, 5, 1]
filter size = [3, 3, 1, 1]
strides = [1, 1, 1, 1]
padding = 'SAME'
%%html
<center><iframe
width="560" height="315" src="https://www.youtube.com/embed/Cgxsv1riJhI" frameborder="0" allowfullscreen>
</iframe><center>
%%html
<center><iframe
width="560" height="315" src="https://www.youtube.com/embed/vRqSO6RsptU" frameborder="0">
</iframe><center>
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib inline
Download data from here
train_imgs = np.load('data_files/object_detction_trn_data.npy')
train_labels = np.load('data_files/object_detction_trn_label.npy')
test_imgs = np.load('data_files/object_detction_eval_data.npy')
test_labels = np.load('data_files/object_detction_eval_label.npy')
# input image: 240 by 320
# output label: class, x, y, h, w
classes = ['Axis',
'Bearing',
'Bearing_Box',
'Distance_Tube',
'F20_20_B']
print(train_imgs.shape)
print(train_labels.shape)
print(test_imgs.shape)
print(test_labels.shape)
Five classes images are availabe: Axis, bearing, bearing box, distance tube, beam
250 images are used for training (50 images per class)
50 images are avalialbe for evaluation (10 images per class)
One object per image (240 by 320)
Labeled with class and bounding box location(normalizsed): class, $x, y, h, \omega$
idx = 138
train_img = train_imgs[idx]
c, x, y, h, w = train_labels[idx]
# rescaling
x, w = x*320, w*320
y, h = y*240, h*240
rect = patches.Rectangle((x, y),
w,
h,
linewidth = 2,
edgecolor = 'r',
facecolor = 'none')
fig, ax = plt.subplots(figsize = (8,8))
plt.title(classes[int(c)])
plt.imshow(train_img)
ax.add_patch(rect)
plt.axis('off')
plt.show()
# rescaling output labels
train_labels = np.multiply(train_labels, [1, 320, 240, 320, 240])
test_labels = np.multiply(test_labels, [1, 320, 240, 320, 240])
feature_extractor = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters = 32,
kernel_size = (3,3),
activation = 'relu',
padding = 'SAME',
input_shape = (240, 320, 3)),
tf.keras.layers.MaxPool2D(pool_size = (2,2)),
tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', padding = 'SAME'),
tf.keras.layers.MaxPool2D((2,2)),
tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', padding = 'SAME'),
tf.keras.layers.MaxPool2D((2,2)),
tf.keras.layers.Conv2D(128, (3,3), activation = 'relu', padding = 'SAME'),
tf.keras.layers.MaxPool2D((2,2)),
tf.keras.layers.Conv2D(128, (3,3), activation = 'relu', padding = 'SAME'),
tf.keras.layers.MaxPool2D((2,2)),
tf.keras.layers.Conv2D(256, (3,3), activation = 'relu', padding = 'SAME'),
tf.keras.layers.GlobalAveragePooling2D()
])
classifier = tf.keras.layers.Dense(256, activation = 'relu')(feature_extractor.output)
classifier = tf.keras.layers.Dense(256, activation = 'relu')(classifier)
classifier = tf.keras.layers.Dense(5, activation = 'softmax', name = 'cls')(classifier)
bb_regressor = tf.keras.layers.Dense(256, activation = 'relu')(feature_extractor.output)
bb_regressor = tf.keras.layers.Dense(256, activation = 'relu')(bb_regressor)
bb_regressor = tf.keras.layers.Dense(4, name = 'bbox')(bb_regressor)
object_detection = tf.keras.models.Model(inputs = feature_extractor.input,
outputs = [classifier, bb_regressor])
object_detection.summary()
object_detection.compile(optimizer = 'adam',
loss = {'cls': 'sparse_categorical_crossentropy',
'bbox': 'mse'},
loss_weights = {'cls': 1,
'bbox': 1})
# divide labels to cls and bbox labels
train_cls = train_labels[:,:1]
train_bbox = train_labels[:,1:]
print(train_labels.shape)
print(train_cls.shape)
print(train_bbox.shape)
object_detection.fit(x = train_imgs,
y = {'cls': train_cls, 'bbox': train_bbox},
epochs = 100)
idx = 110
# true label
c_label, x_label, y_label, h_label, w_label = train_labels[idx]
rect_label = patches.Rectangle((x_label, y_label),
w_label,
h_label,
linewidth = 2,
edgecolor = 'r',
facecolor = 'none')
# predict
c_pred, bbox = object_detection.predict(train_imgs[[idx]])
x, y, h, w = bbox[0]
rect = patches.Rectangle((x, y),
w,
h,
linewidth = 2,
edgecolor = 'b',
facecolor = 'none')
print(classes[int(c_label)])
print(classes[np.argmax(c_pred)])
fig, ax = plt.subplots(figsize = (8,8))
plt.imshow(train_imgs[idx])
ax.add_patch(rect_label)
ax.add_patch(rect)
plt.axis('off')
plt.show()
idx = 50
# true label
c_label, x_label, y_label, h_label, w_label = test_labels[idx]
rect_label = patches.Rectangle((x_label, y_label),
w_label,
h_label,
linewidth = 2,
edgecolor = 'r',
facecolor = 'none')
# predict
c_pred, bbox = object_detection.predict(test_imgs[[idx]])
x, y, h, w = bbox[0]
rect = patches.Rectangle((x, y),
w,
h,
linewidth = 2,
edgecolor = 'b',
facecolor = 'none')
print(classes[int(c_label)])
print(classes[np.argmax(c_pred)])
fig, ax = plt.subplots(figsize = (8,8))
plt.imshow(test_imgs[idx])
ax.add_patch(rect_label)
ax.add_patch(rect)
plt.axis('off')
plt.show()
%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')