At this point in the course, you should be able to explore different network architectures for forecasting. In the previous weeks, you’ve used DNNs, RNNs, and CNNs to build these different models. In the final practice lab for this course, you’ll try one more configuration and that is a combination of all these types of networks: the data windows will pass through a convolution, followed by stacked LSTMs, followed by stacked dense layers. See if this improves results or you can just opt for simpler models.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import csv
def plot_series(x, y, format="-", start=0, end=None,
title=None, xlabel=None, ylabel=None, legend=None ):
"""
Visualizes time series data
Args:
x (array of int) - contains values for the x-axis
y (array of int or tuple of arrays) - contains the values for the y-axis
format (string) - line style when plotting the graph
start (int) - first time step to plot
end (int) - last time step to plot
title (string) - title of the plot
xlabel (string) - label for the x-axis
ylabel (string) - label for the y-axis
legend (list of strings) - legend for the plot
"""
# Setup dimensions of the graph figure
plt.figure(figsize=(10, 6))
# Check if there are more than two series to plot
if type(y) is tuple:
# Loop over the y elements
for y_curr in y:
# Plot the x and current y values
plt.plot(x[start:end], y_curr[start:end], format)
else:
# Plot the x and y values
plt.plot(x[start:end], y[start:end], format)
# Label the x-axis
plt.xlabel(xlabel)
# Label the y-axis
plt.ylabel(ylabel)
# Set the legend
if legend:
plt.legend(legend)
# Set the title
plt.title(title)
# Overlay a grid on the graph
plt.grid(True)
# Draw the graph on screen
plt.show()
# Download the Dataset
!wget https://storage.googleapis.com/tensorflow-1-public/course4/Sunspots.csv
# Initialize lists
time_step = []
sunspots = []
# Open CSV file
with open('./Sunspots.csv') as csvfile:
# Initialize reader
reader = csv.reader(csvfile, delimiter=',')
# Skip the first line
next(reader)
# Append row and sunspot number to lists
for row in reader:
time_step.append(int(row[0]))
sunspots.append(float(row[2]))
# Convert lists to numpy arrays
time = np.array(time_step)
series = np.array(sunspots)
# Preview the data
plot_series(time, series, xlabel='Month', ylabel='Monthly Mean Total Sunspot Number')
# Define the split time
split_time = 3000
# Get the train set
time_train = time[:split_time]
x_train = series[:split_time]
# Get the validation set
time_valid = time[split_time:]
x_valid = series[split_time:]
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
"""Generates dataset windows
Args:
series (array of float) - contains the values of the time series
window_size (int) - the number of time steps to include in the feature
batch_size (int) - the batch size
shuffle_buffer(int) - buffer size to use for the shuffle method
Returns:
dataset (TF Dataset) - TF Dataset containing time windows
"""
# Generate a TF Dataset from the series values
dataset = tf.data.Dataset.from_tensor_slices(series)
# Window the data but only take those with the specified size
dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
# Flatten the windows by putting its elements in a single batch
dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
# Create tuples with features and labels
dataset = dataset.map(lambda window: (window[:-1], window[-1]))
# Shuffle the windows
dataset = dataset.shuffle(shuffle_buffer)
# Create batches of windows
dataset = dataset.batch(batch_size).prefetch(1)
return dataset
As mentioned in the lectures, if your results don’t good, you can try tweaking the parameters here and see if the model will learn better.
# Parameters
window_size = 30
batch_size = 32
shuffle_buffer_size = 1000
# Generate the dataset windows
train_set = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)
You’ve seen these layers before and here is how it’s looks like when combined.
# Build the Model
model = tf.keras.models.Sequential([
tf.keras.layers.Conv1D(filters=64, kernel_size=3,
strides=1,
activation="relu",
padding='causal',
input_shape=[window_size, 1]),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.LSTM(64),
tf.keras.layers.Dense(30, activation="relu"),
tf.keras.layers.Dense(10, activation="relu"),
tf.keras.layers.Dense(1),
tf.keras.layers.Lambda(lambda x: x * 400)
])
# Print the model summary
model.summary()
As usual, you will want to pick an optimal learning rate.
# Get initial weights
init_weights = model.get_weights()
# Set the learning rate scheduler
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
lambda epoch: 1e-8 * 10**(epoch / 20))
# Initialize the optimizer
optimizer = tf.keras.optimizers.SGD(momentum=0.9)
# Set the training parameters
model.compile(loss=tf.keras.losses.Huber(), optimizer=optimizer)
# Train the model
history = model.fit(train_set, epochs=100, callbacks=[lr_schedule])
# Define the learning rate array
lrs = 1e-8 * (10 ** (np.arange(100) / 20))
# Set the figure size
plt.figure(figsize=(10, 6))
# Set the grid
plt.grid(True)
# Plot the loss in log scale
plt.semilogx(lrs, history.history["loss"])
# Increase the tickmarks size
plt.tick_params('both', length=10, width=1, which='both')
# Set the plot boundaries
plt.axis([1e-8, 1e-3, 0, 100])
Now you can proceed to reset and train the model. It is set for 100 epochs in the cell below but feel free to increase it if you want. Laurence got his results in the lectures after 500.
# Reset states generated by Keras
tf.keras.backend.clear_session()
# Reset the weights
model.set_weights(init_weights)
# Set the learning rate
learning_rate = 8e-7
# Set the optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9)
# Set the training parameters
model.compile(loss=tf.keras.losses.Huber(),
optimizer=optimizer,
metrics=["mae"])
# Train the model
history = model.fit(train_set,epochs=100)
You can visualize the training and see if the loss and MAE are still trending down.
# Get mae and loss from history log
mae=history.history['mae']
loss=history.history['loss']
# Get number of epochs
epochs=range(len(loss))
# Plot mae and loss
plot_series(
x=epochs,
y=(mae, loss),
title='MAE and Loss',
xlabel='MAE',
ylabel='Loss',
legend=['MAE', 'Loss']
)
# Only plot the last 80% of the epochs
zoom_split = int(epochs[-1] * 0.2)
epochs_zoom = epochs[zoom_split:]
mae_zoom = mae[zoom_split:]
loss_zoom = loss[zoom_split:]
# Plot zoomed mae and loss
plot_series(
x=epochs_zoom,
y=(mae_zoom, loss_zoom),
title='MAE and Loss',
xlabel='MAE',
ylabel='Loss',
legend=['MAE', 'Loss']
)
As before, you can get the predictions for the validation set time range and compute the metrics.
def model_forecast(model, series, window_size, batch_size):
"""Uses an input model to generate predictions on data windows
Args:
model (TF Keras Model) - model that accepts data windows
series (array of float) - contains the values of the time series
window_size (int) - the number of time steps to include in the window
batch_size (int) - the batch size
Returns:
forecast (numpy array) - array containing predictions
"""
# Generate a TF Dataset from the series values
dataset = tf.data.Dataset.from_tensor_slices(series)
# Window the data but only take those with the specified size
dataset = dataset.window(window_size, shift=1, drop_remainder=True)
# Flatten the windows by putting its elements in a single batch
dataset = dataset.flat_map(lambda w: w.batch(window_size))
# Create batches of windows
dataset = dataset.batch(batch_size).prefetch(1)
# Get predictions on the entire dataset
forecast = model.predict(dataset)
return forecast
# Reduce the original series
forecast_series = series[split_time-window_size:-1]
# Use helper function to generate predictions
forecast = model_forecast(model, forecast_series, window_size, batch_size)
# Drop single dimensional axis
results = forecast.squeeze()
# Plot the results
plot_series(time_valid, (x_valid, results))
# Compute the MAE
print(tf.keras.metrics.mean_absolute_error(x_valid, results).numpy())
This concludes the final practice lab for this course! You implemented a deep and complex architecture composed of CNNs, RNNs, and DNNs. You’ll be using the skills you developed throughout this course to complete the final assignment. Keep it up!
In this optional section, you will look at another way to dynamically set the learning rate. As you may have noticed, training for a long time generates less and less changes to the loss and metrics. You can run the cell below to observe that again.
# Initialize the optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-7, momentum=0.9)
# Set the training parameters
model.compile(loss=tf.keras.losses.Huber(),
optimizer=optimizer,
metrics=["mae"])
# Train the model
history = model.fit(train_set,epochs=10)
At some point, the static learning rate you set might no longer be the optimal one when the model has been learning for some time. You may want to decrease it some more to see better improvements. One way to do that is to have your training loop gradually decay the learning rate per epoch. You can pass in a lambda function similar like the one you did for the learning rate scheduler earlier, or use ExponentialDecay(). This is a built in scheduler from the Keras API. This decays the learning rate defined by this function:
def decayed_learning_rate(step):
return initial_learning_rate * decay_rate ^ (step / decay_steps)
See how it is used below.
# Set the initial learning rate
initial_learning_rate=1e-7
# Define the scheduler
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate,
decay_steps=400,
decay_rate=0.96,
staircase=True)
# Set the optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9)
# Set the training parameters
model.compile(loss=tf.keras.losses.Huber(),
optimizer=optimizer,
metrics=["mae"])
# Train the model
history = model.fit(train_set,epochs=40)
Now see the results by geting predicitons and computing the metrics.
# Reduce the original series
forecast_series = series[split_time-window_size:-1]
# Use helper function to generate predictions
forecast = model_forecast(model, forecast_series, window_size, batch_size)
# Drop single dimensional axis
results = forecast.squeeze()
# Plot the results
plot_series(time_valid, (x_valid, results))
# Compute the MAE
tf.keras.metrics.mean_absolute_error(x_valid, results).numpy()