This assignment lets you practice how to train a Keras model on the horses_or_humans dataset with the entire training process performed in graph mode. These steps include:
Import TensorFlow 2.0:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_hub as hub
import matplotlib.pyplot as plt
Load the horses to human dataset, splitting 80% for the training set and 20% for the test set.
splits, info = tfds.load('horses_or_humans', as_supervised=True, with_info=True, split=['train[:80%]', 'train[80%:]', 'test'], data_dir='./data')
(train_examples, validation_examples, test_examples) = splits
num_examples = info.splits['train'].num_examples
num_classes = info.features['label'].num_classes
BATCH_SIZE = 32
IMAGE_SIZE = 224
You’ll define a mapping function that resizes the image to a height of 224 by 224, and normalizes the pixels to the range of 0 to 1. Note that pixels range from 0 to 255.
# Create a autograph pre-processing function to resize and normalize an image
### START CODE HERE ###
@tf.function
def map_fn(img, label):
image_height = 224
image_width = 224
### START CODE HERE ###
# resize the image
img = tf.image.resize(img, (image_height, image_width))
# normalize the image
img /= 255.0
### END CODE HERE
return img, label
## TEST CODE:
test_image, test_label = list(train_examples)[0]
test_result = map_fn(test_image, test_label)
print(test_result[0].shape)
print(test_result[1].shape)
del test_image, test_label, test_result
(224, 224, 3)
()
Expected Output:
(224, 224, 3)
()
Apply the following steps to the training_examples:
map_fn
to the training_examples.shuffle(buffer_size=)
and set the buffer size to the number of examples..batch()
and set the batch size given by the parameter.Hint: You can look at how validation_examples and test_examples are pre-processed to get a sense of how to chain together multiple function calls.
# Prepare train dataset by using preprocessing with map_fn, shuffling and batching
def prepare_dataset(train_examples, validation_examples, test_examples, num_examples, map_fn, batch_size):
### START CODE HERE ###
train_ds = train_examples.map(map_fn).shuffle(buffer_size=num_examples).batch(batch_size)
### END CODE HERE ###
valid_ds = validation_examples.map(map_fn).batch(batch_size)
test_ds = test_examples.map(map_fn).batch(batch_size)
return train_ds, valid_ds, test_ds
train_ds, valid_ds, test_ds = prepare_dataset(train_examples, validation_examples, test_examples, num_examples, map_fn, BATCH_SIZE)
## TEST CODE:
test_train_ds = list(train_ds)
print(len(test_train_ds))
print(test_train_ds[0][0].shape)
del test_train_ds
26
(32, 224, 224, 3)
Expected Output:
26
(32, 224, 224, 3)
MODULE_HANDLE = 'data/resnet_50_feature_vector'
model = tf.keras.Sequential([
hub.KerasLayer(MODULE_HANDLE, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
tf.keras.layers.Dense(num_classes, activation='softmax')
])
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
keras_layer (KerasLayer) (None, 2048) 23561152
_________________________________________________________________
dense (Dense) (None, 2) 4098
=================================================================
Total params: 23,565,250
Trainable params: 4,098
Non-trainable params: 23,561,152
_________________________________________________________________
Define the Adam optimizer that is in the tf.keras.optimizers module.
def set_adam_optimizer():
### START CODE HERE ###
# Define the adam optimizer
optimizer = tf.keras.optimizers.Adam()
### END CODE HERE ###
return optimizer
## TEST CODE:
test_optimizer = set_adam_optimizer()
print(type(test_optimizer))
del test_optimizer
<class 'tensorflow.python.keras.optimizer_v2.adam.Adam'>
Expected Output:
<class 'tensorflow.python.keras.optimizer_v2.adam.Adam'>
Define the loss function as the sparse categorical cross entropy that’s in the tf.keras.losses module. Use the same function for both training and validation.
def set_sparse_cat_crossentropy_loss():
### START CODE HERE ###
# Define object oriented metric of Sparse categorical crossentropy for train and val loss
train_loss = tf.keras.losses.SparseCategoricalCrossentropy()
val_loss = tf.keras.losses.SparseCategoricalCrossentropy()
### END CODE HERE ###
return train_loss, val_loss
## TEST CODE:
test_train_loss, test_val_loss = set_sparse_cat_crossentropy_loss()
print(type(test_train_loss))
print(type(test_val_loss))
del test_train_loss, test_val_loss
<class 'tensorflow.python.keras.losses.SparseCategoricalCrossentropy'>
<class 'tensorflow.python.keras.losses.SparseCategoricalCrossentropy'>
Expected Output:
<class 'tensorflow.python.keras.losses.SparseCategoricalCrossentropy'>
<class 'tensorflow.python.keras.losses.SparseCategoricalCrossentropy'>
Define the accuracy function as the spare categorical accuracy that’s contained in the tf.keras.metrics module. Use the same function for both training and validation.
def set_sparse_cat_crossentropy_accuracy():
### START CODE HERE ###
# Define object oriented metric of Sparse categorical accuracy for train and val accuracy
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
### END CODE HERE ###
return train_accuracy, val_accuracy
## TEST CODE:
test_train_accuracy, test_val_accuracy = set_sparse_cat_crossentropy_accuracy()
print(type(test_train_accuracy))
print(type(test_val_accuracy))
del test_train_accuracy, test_val_accuracy
<class 'tensorflow.python.keras.metrics.SparseCategoricalAccuracy'>
<class 'tensorflow.python.keras.metrics.SparseCategoricalAccuracy'>
Expected Output:
<class 'tensorflow.python.keras.metrics.SparseCategoricalAccuracy'>
<class 'tensorflow.python.keras.metrics.SparseCategoricalAccuracy'>
Call the three functions that you defined to set the optimizer, loss and accuracy
optimizer = set_adam_optimizer()
train_loss, val_loss = set_sparse_cat_crossentropy_loss()
train_accuracy, val_accuracy = set_sparse_cat_crossentropy_accuracy()
In the training loop:
x
train_loss
, passing in the true y
and the predicted y
.tape.gradient
and pass in the loss and the model’s trainable_variables
.optimizer.apply_gradients
and pass in a zip()
of the two lists: the gradients and the model’s trainable_variables
.train_accuracy
, passing in the true y
and the predicted y
.# this code uses the GPU if available, otherwise uses a CPU
device = '/gpu:0' if tf.config.list_physical_devices('GPU') else '/cpu:0'
EPOCHS = 2
# Custom training step
def train_one_step(model, optimizer, x, y, train_loss, train_accuracy):
'''
Trains on a batch of images for one step.
Args:
model (keras Model) -- image classifier
optimizer (keras Optimizer) -- optimizer to use during training
x (Tensor) -- training images
y (Tensor) -- training labels
train_loss (keras Loss) -- loss object for training
train_accuracy (keras Metric) -- accuracy metric for training
'''
with tf.GradientTape() as tape:
### START CODE HERE ###
# Run the model on input x to get predictions
predictions = model(x)
# Compute the training loss using `train_loss`, passing in the true y and the predicted y
loss = train_loss(y, predictions)
# Using the tape and loss, compute the gradients on model variables using tape.gradient
grads = tape.gradient(loss, model.trainable_variables)
# Zip the gradients and model variables, and then apply the result on the optimizer
optimizer.apply_gradients(zip(grads, model.trainable_variables))
# Call the train accuracy object on ground truth and predictions
train_accuracy(y, predictions)
### END CODE HERE
return loss
## TEST CODE:
def base_model():
inputs = tf.keras.layers.Input(shape=(2))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
return model
test_model = base_model()
test_optimizer = set_adam_optimizer()
test_image = tf.ones((2,2))
test_label = tf.ones((1,))
test_train_loss, _ = set_sparse_cat_crossentropy_loss()
test_train_accuracy, _ = set_sparse_cat_crossentropy_accuracy()
test_result = train_one_step(test_model, test_optimizer, test_image, test_label, test_train_loss, test_train_accuracy)
print(test_result)
del test_result, test_model, test_optimizer, test_image, test_label, test_train_loss, test_train_accuracy
tf.Tensor(0.6931472, shape=(), dtype=float32)
Expected Output:
You will see a Tensor with the same shape and dtype. The value might be different.
tf.Tensor(0.6931472, shape=(), dtype=float32)
You’ll first loop through the training batches to train the model. (Please complete these sections)
train
function will use a for loop to iteratively call the train_one_step
function that you just defined.tf.print
to print the step number, loss, and train_accuracy.result() at each step. Remember to use tf.print when you plan to generate autograph code.Next, you’ll loop through the batches of the validation set to calculation the validation loss and validation accuracy. (This code is provided for you). At each iteration of the loop:
Finally, you’ll print the validation loss and accuracy using tf.print. (Please complete this section)
loss
, which is the validation loss calculated by the last loop through the validation dataset.HINT If you submit your assignment and see this error for your stderr output:
Cannot convert 1e-07 to EagerTensor of dtype int64
Please check your calls to train_accuracy and val_accuracy to make sure that you pass in the true and predicted values in the correct order (check the documentation to verify the order of parameters).
# Decorate this function with tf.function to enable autograph on the training loop
@tf.function
def train(model, optimizer, epochs, device, train_ds, train_loss, train_accuracy, valid_ds, val_loss, val_accuracy):
'''
Performs the entire training loop. Prints the loss and accuracy per step and epoch.
Args:
model (keras Model) -- image classifier
optimizer (keras Optimizer) -- optimizer to use during training
epochs (int) -- number of epochs
train_ds (tf Dataset) -- the train set containing image-label pairs
train_loss (keras Loss) -- loss function for training
train_accuracy (keras Metric) -- accuracy metric for training
valid_ds (Tensor) -- the val set containing image-label pairs
val_loss (keras Loss) -- loss object for validation
val_accuracy (keras Metric) -- accuracy metric for validation
'''
step = 0
loss = 0.0
for epoch in range(epochs):
for x, y in train_ds:
# training step number increments at each iteration
step += 1
with tf.device(device_name=device):
### START CODE HERE ###
# Run one training step by passing appropriate model parameters
# required by the function and finally get the loss to report the results
loss = train_one_step(
model=model,
optimizer=optimizer,
x=x,
y=y,
train_loss=train_loss,
train_accuracy=train_accuracy
)
### END CODE HERE ###
# Use tf.print to report your results.
# Print the training step number, loss and accuracy
tf.print('Step', step,
': train loss', loss,
'; train accuracy', train_accuracy.result())
with tf.device(device_name=device):
for x, y in valid_ds:
# Call the model on the batches of inputs x and get the predictions
y_pred = model(x)
loss = val_loss(y, y_pred)
val_accuracy(y, y_pred)
# Print the validation loss and accuracy
### START CODE HERE ###
tf.print('val loss', loss, '; val accuracy', val_accuracy.result())
### END CODE HERE ###
Run the train
function to train your model! You should see the loss generally decreasing and the accuracy increasing.
Note: Please let the training finish before submitting and do not modify the next cell. It is required for grading. This will take around 5 minutes to run.
train(model, optimizer, EPOCHS, device, train_ds, train_loss, train_accuracy, valid_ds, val_loss, val_accuracy)
Step 1 : train loss 0.442082286 ; train accuracy 0.8125
Step 2 : train loss 0.371749818 ; train accuracy 0.828125
Step 3 : train loss 0.181267202 ; train accuracy 0.875
Step 4 : train loss 0.0999904126 ; train accuracy 0.90625
Step 5 : train loss 0.106646009 ; train accuracy 0.925
Step 6 : train loss 0.087532863 ; train accuracy 0.9375
Step 7 : train loss 0.0718464404 ; train accuracy 0.946428597
Step 8 : train loss 0.0266073756 ; train accuracy 0.953125
Step 9 : train loss 0.0388143621 ; train accuracy 0.958333313
Step 10 : train loss 0.0265576709 ; train accuracy 0.9625
Step 11 : train loss 0.01707929 ; train accuracy 0.965909064
Step 12 : train loss 0.0145232156 ; train accuracy 0.96875
Step 13 : train loss 0.0102705304 ; train accuracy 0.971153855
Step 14 : train loss 0.00559332594 ; train accuracy 0.973214269
Step 15 : train loss 0.0118989311 ; train accuracy 0.975
Step 16 : train loss 0.00820282 ; train accuracy 0.9765625
Step 17 : train loss 0.00787908304 ; train accuracy 0.977941155
Step 18 : train loss 0.00701327203 ; train accuracy 0.979166687
Step 19 : train loss 0.00721777696 ; train accuracy 0.980263174
Step 20 : train loss 0.00241702073 ; train accuracy 0.98125
Step 21 : train loss 0.00587104121 ; train accuracy 0.982142866
Step 22 : train loss 0.00208650879 ; train accuracy 0.982954562
Step 23 : train loss 0.00193865667 ; train accuracy 0.983695626
Step 24 : train loss 0.00274004834 ; train accuracy 0.984375
Step 25 : train loss 0.0625281334 ; train accuracy 0.98375
Step 26 : train loss 0.0135757038 ; train accuracy 0.984184921
val loss 0.00242076861 ; val accuracy 1
Step 27 : train loss 0.00209505693 ; train accuracy 0.98477751
Step 28 : train loss 0.00327854138 ; train accuracy 0.985327303
Step 29 : train loss 0.00319580338 ; train accuracy 0.985838771
Step 30 : train loss 0.00143901561 ; train accuracy 0.986315787
Step 31 : train loss 0.00107555441 ; train accuracy 0.986761689
Step 32 : train loss 0.000837450905 ; train accuracy 0.987179458
Step 33 : train loss 0.00206937734 ; train accuracy 0.987571716
Step 34 : train loss 0.00232008193 ; train accuracy 0.987940609
Step 35 : train loss 0.0014165109 ; train accuracy 0.988288283
Step 36 : train loss 0.00176208676 ; train accuracy 0.988616467
Step 37 : train loss 0.00243537081 ; train accuracy 0.988926768
Step 38 : train loss 0.00140081509 ; train accuracy 0.98922056
Step 39 : train loss 0.00120992027 ; train accuracy 0.989499211
Step 40 : train loss 0.00180399721 ; train accuracy 0.989763796
Step 41 : train loss 0.000653388444 ; train accuracy 0.990015388
Step 42 : train loss 0.000898264174 ; train accuracy 0.990254879
Step 43 : train loss 0.00325405411 ; train accuracy 0.990483165
Step 44 : train loss 0.00160021463 ; train accuracy 0.990701
Step 45 : train loss 0.0117650935 ; train accuracy 0.9909091
Step 46 : train loss 0.00347364461 ; train accuracy 0.99110806
Step 47 : train loss 0.00392476656 ; train accuracy 0.991298556
Step 48 : train loss 0.00106286362 ; train accuracy 0.991481
Step 49 : train loss 0.00140742259 ; train accuracy 0.991655946
Step 50 : train loss 0.00153925922 ; train accuracy 0.991823912
Step 51 : train loss 0.00206658081 ; train accuracy 0.991985202
Step 52 : train loss 0.000612177479 ; train accuracy 0.992092431
val loss 0.00108572224 ; val accuracy 1
You can now see how your model performs on test images. First, let’s load the test dataset and generate predictions:
test_imgs = []
test_labels = []
predictions = []
with tf.device(device_name=device):
for images, labels in test_ds:
preds = model(images)
preds = preds.numpy()
predictions.extend(preds)
test_imgs.extend(images.numpy())
test_labels.extend(labels.numpy())
Let’s define a utility function for plotting an image and its prediction.
# Utilities for plotting
class_names = ['horse', 'human']
def plot_image(i, predictions_array, true_label, img):
predictions_array, true_label, img = predictions_array[i], true_label[i], img[i]
plt.grid(False)
plt.xticks([])
plt.yticks([])
img = np.squeeze(img)
plt.imshow(img, cmap=plt.cm.binary)
predicted_label = np.argmax(predictions_array)
# green-colored annotations will mark correct predictions. red otherwise.
if predicted_label == true_label:
color = 'green'
else:
color = 'red'
# print the true label first
print(true_label)
# show the image and overlay the prediction
plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
100*np.max(predictions_array),
class_names[true_label]),
color=color)
Choose an index and display the model’s prediction for that image.
# Visualize the outputs
# you can modify the index value here from 0 to 255 to test different images
index = 8
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(index, predictions, test_labels, test_imgs)
plt.show()
0