The CIFAR-10 dataset (Canadian Institute For Advanced Research) is a collection of images that are commonly used to train machine learning and computer vision algorithms. It is one of the most widely used datasets for machine learning research. The CIFAR-10 dataset contains 60,000 32x32 color images in 10 different classes. The 10 different classes represent airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks. There are 6,000 images of each class.
import numpy as np
import pandas as pd
from packaging import version
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error as MSE
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models, layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, BatchNormalization, Dropout, Flatten, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing import image
from tensorflow.keras.utils import to_categorical
2022-11-21 01:33:51.703012: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
%matplotlib inline
np.set_printoptions(precision=3, suppress=True)
print("This notebook requires TensorFlow 2.0 or above")
print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >=2
This notebook requires TensorFlow 2.0 or above TensorFlow version: 2.10.0
print("Keras version: ", keras.__version__)
Keras version: 2.10.0
# from google.colab import drive
# drive.mount('/content/gdrive')
def get_three_classes(x, y):
def indices_of(class_id):
indices, _ = np.where(y == float(class_id))
return indices
indices = np.concatenate([indices_of(0), indices_of(1), indices_of(2)], axis=0)
x = x[indices]
y = y[indices]
count = x.shape[0]
indices = np.random.choice(range(count), count, replace=False)
x = x[indices]
y = y[indices]
y = tf.keras.utils.to_categorical(y)
return x, y
def show_random_examples(x, y, p):
indices = np.random.choice(range(x.shape[0]), 10, replace=False)
x = x[indices]
y = y[indices]
p = p[indices]
plt.figure(figsize=(10, 5))
for i in range(10):
plt.subplot(2, 5, i + 1)
plt.imshow(x[i])
plt.xticks([])
plt.yticks([])
col = 'green' if np.argmax(y[i]) == np.argmax(p[i]) else 'red'
plt.xlabel(class_names_preview[np.argmax(p[i])], color=col)
plt.show()
def plot_history(history):
losses = history.history['loss']
accs = history.history['accuracy']
val_losses = history.history['val_loss']
val_accs = history.history['val_accuracy']
epochs = len(losses)
plt.figure(figsize=(16, 4))
for i, metrics in enumerate(zip([losses, accs], [val_losses, val_accs], ['Loss', 'Accuracy'])):
plt.subplot(1, 2, i + 1)
plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
plt.legend()
plt.show()
def print_validation_report(y_test, predictions):
print("Classification Report")
print(classification_report(y_test, predictions))
print('Accuracy Score: {}'.format(accuracy_score(y_test, predictions)))
print('Root Mean Square Error: {}'.format(np.sqrt(MSE(y_test, predictions))))
def plot_confusion_matrix(y_true, y_pred):
mtx = confusion_matrix(y_true, y_pred)
fig, ax = plt.subplots(figsize=(8,8))
sns.heatmap(mtx, annot=True, fmt='d', linewidths=.75, cbar=False, ax=ax,cmap='Blues',linecolor='white')
# square=True,
plt.ylabel('true label')
plt.xlabel('predicted label')
The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.
The dataset is divided into five training batches and one test batch, each with 10000 images. The test batch contains exactly 1000 randomly-selected images from each class. The training batches contain the remaining images in random order, but some training batches may contain more images from one class than another. Between them, the training batches contain exactly 5000 images from each class.
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
print('train_images:\t{}'.format(x_train.shape))
print('train_labels:\t{}'.format(y_train.shape))
print('test_images:\t\t{}'.format(x_test.shape))
print('test_labels:\t\t{}'.format(y_test.shape))
train_images: (50000, 32, 32, 3) train_labels: (50000, 1) test_images: (10000, 32, 32, 3) test_labels: (10000, 1)
print("First ten labels training dataset:\n {}\n".format(y_train[0:10]))
print("This output the numeric label, need to convert to item description")
First ten labels training dataset: [[6] [9] [9] [4] [1] [1] [2] [7] [8] [3]] This output the numeric label, need to convert to item description
(train_images, train_labels),(test_images, test_labels)= tf.keras.datasets.cifar10.load_data()
x_preview, y_preview = get_three_classes(train_images, train_labels)
x_preview, y_preview = get_three_classes(test_images, test_labels)
class_names_preview = ['aeroplane', 'car', 'bird']
show_random_examples(x_preview, y_preview, y_preview)
The labels are an array of integers, ranging from 0 to 9. These correspond to the class of clothing the image represents:
Label | Class_ |
---|---|
0 | airplane |
1 | automobile |
2 | bird |
3 | cat |
4 | deer |
5 | dog |
6 | frog |
7 | horse |
8 | ship |
9 | truck |
class_names = ['airplane'
,'automobile'
,'bird'
,'cat'
,'deer'
,'dog'
,'frog'
,'horse'
,'ship'
,'truck']
x_train_split, x_valid_split, y_train_split, y_valid_split = train_test_split(x_train
,y_train
,test_size=.1
,random_state=42
,shuffle=True)
print(x_train_split.shape, x_valid_split.shape, x_test.shape)
(45000, 32, 32, 3) (5000, 32, 32, 3) (10000, 32, 32, 3)
The images are 28x28 NumPy arrays, with pixel values ranging from 0 to 255
x_train_norm = x_train_split/255
x_valid_norm = x_valid_split/255
x_test_norm = x_test/255
We use a Sequential class defined in Keras to create our model. The first 9 layers Conv2D MaxPooling handle feature learning. The last 3 layers, handle classification
model = Sequential([
Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), activation=tf.nn.relu,input_shape=x_train_norm.shape[1:]),
MaxPool2D((2, 2),strides=2),
Dropout(0.6),
Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), activation=tf.nn.relu),
MaxPool2D((2, 2),strides=2),
Dropout(0.6),
Conv2D(filters=512, kernel_size=(3, 3), strides=(1, 1), activation=tf.nn.relu),
MaxPool2D((2, 2),strides=2),
Dropout(0.6),
Flatten(),
Dense(units=10, activation=tf.nn.softmax)
])
2022-11-21 01:37:19.342545: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 30, 30, 128) 3584 max_pooling2d (MaxPooling2D (None, 15, 15, 128) 0 ) dropout (Dropout) (None, 15, 15, 128) 0 conv2d_1 (Conv2D) (None, 13, 13, 256) 295168 max_pooling2d_1 (MaxPooling (None, 6, 6, 256) 0 2D) dropout_1 (Dropout) (None, 6, 6, 256) 0 conv2d_2 (Conv2D) (None, 4, 4, 512) 1180160 max_pooling2d_2 (MaxPooling (None, 2, 2, 512) 0 2D) dropout_2 (Dropout) (None, 2, 2, 512) 0 flatten (Flatten) (None, 2048) 0 dense (Dense) (None, 10) 20490 ================================================================= Total params: 1,499,402 Trainable params: 1,499,402 Non-trainable params: 0 _________________________________________________________________
keras.utils.plot_model(model, "CIFAR10.png", show_shapes=True)
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.
In addition to setting up our model architecture, we also need to define which algorithm should the model use in order to optimize the weights and biases as per the given data. We will use stochastic gradient descent.
We also need to define a loss function. Think of this function as the difference between the predicted outputs and the actual outputs given in the dataset. This loss needs to be minimised in order to have a higher model accuracy. That's what the optimization algorithm essentially does - it minimises the loss during model training. For our multi-class classification problem, categorical cross entropy is commonly used.
Finally, we will use the accuracy during training as a metric to keep track of as the model trains.
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['accuracy'])
history = model.fit(x_train_norm
,y_train_split
,epochs=200
,batch_size=64
,validation_data=(x_valid_norm, y_valid_split)
,callbacks=[
tf.keras.callbacks.ModelCheckpoint("CNN_model.h5",save_best_only=True,save_weights_only=False)
,tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3),
]
)
Epoch 1/200 704/704 [==============================] - 1460s 2s/step - loss: 1.7262 - accuracy: 0.3623 - val_loss: 1.3675 - val_accuracy: 0.5166 Epoch 2/200 704/704 [==============================] - 517s 734ms/step - loss: 1.3820 - accuracy: 0.5035 - val_loss: 1.3520 - val_accuracy: 0.5346 Epoch 3/200 704/704 [==============================] - 464s 659ms/step - loss: 1.2771 - accuracy: 0.5476 - val_loss: 1.1930 - val_accuracy: 0.5946 Epoch 4/200 704/704 [==============================] - 1615s 2s/step - loss: 1.2009 - accuracy: 0.5766 - val_loss: 1.1353 - val_accuracy: 0.6102 Epoch 5/200 704/704 [==============================] - 313s 444ms/step - loss: 1.1557 - accuracy: 0.5911 - val_loss: 1.1681 - val_accuracy: 0.5866 Epoch 6/200 704/704 [==============================] - 1067s 2s/step - loss: 1.1249 - accuracy: 0.6040 - val_loss: 1.0392 - val_accuracy: 0.6452 Epoch 7/200 704/704 [==============================] - 884s 1s/step - loss: 1.0964 - accuracy: 0.6150 - val_loss: 1.0029 - val_accuracy: 0.6612 Epoch 8/200 704/704 [==============================] - 558s 793ms/step - loss: 1.0782 - accuracy: 0.6213 - val_loss: 0.9988 - val_accuracy: 0.6494 Epoch 9/200 704/704 [==============================] - 1390s 2s/step - loss: 1.0670 - accuracy: 0.6267 - val_loss: 0.9576 - val_accuracy: 0.6674 Epoch 10/200 704/704 [==============================] - 304s 432ms/step - loss: 1.0472 - accuracy: 0.6329 - val_loss: 0.9920 - val_accuracy: 0.6536 Epoch 11/200 704/704 [==============================] - 1404s 2s/step - loss: 1.0413 - accuracy: 0.6359 - val_loss: 0.9476 - val_accuracy: 0.6824 Epoch 12/200 704/704 [==============================] - 525s 744ms/step - loss: 1.0283 - accuracy: 0.6415 - val_loss: 0.8899 - val_accuracy: 0.7074 Epoch 13/200 704/704 [==============================] - 782s 1s/step - loss: 1.0209 - accuracy: 0.6447 - val_loss: 0.9011 - val_accuracy: 0.7110 Epoch 14/200 704/704 [==============================] - 1017s 1s/step - loss: 1.0063 - accuracy: 0.6482 - val_loss: 0.9000 - val_accuracy: 0.6960 Epoch 15/200 704/704 [==============================] - 240s 339ms/step - loss: 0.9983 - accuracy: 0.6538 - val_loss: 0.9794 - val_accuracy: 0.6604 Epoch 16/200 704/704 [==============================] - 395s 562ms/step - loss: 1.0012 - accuracy: 0.6530 - val_loss: 0.8694 - val_accuracy: 0.7002
In order to ensure that this is not a simple "memorization" by the machine, we should evaluate the performance on the test set. This is easy to do, we simply use the evaluate
method on our model.
model = tf.keras.models.load_model("CNN_model.h5")
print(f"Test acc: {model.evaluate(x_test_norm, y_test)[1]:.3f}")
313/313 [==============================] - 47s 142ms/step - loss: 0.8964 - accuracy: 0.6945 Test acc: 0.695
preds = model.predict(x_test_norm)
print('shape of preds: ', preds.shape)
313/313 [==============================] - 46s 141ms/step shape of preds: (10000, 10)
We use Matplotlib to create 2 plots--displaying the training and validation loss (resp. accuracy) for each (training) epoch side by side.
history_dict = history.history
history_dict.keys()
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
history_df=pd.DataFrame(history_dict)
history_df.tail().round(3)
loss | accuracy | val_loss | val_accuracy | |
---|---|---|---|---|
11 | 1.028 | 0.641 | 0.890 | 0.707 |
12 | 1.021 | 0.645 | 0.901 | 0.711 |
13 | 1.006 | 0.648 | 0.900 | 0.696 |
14 | 0.998 | 0.654 | 0.979 | 0.660 |
15 | 1.001 | 0.653 | 0.869 | 0.700 |
plot_history(history)
Using both sklearn.metrics
. Then we visualize the confusion matrix and see what that tells us.
pred1= model.predict(x_test_norm)
pred1=np.argmax(pred1, axis=1)
313/313 [==============================] - 43s 135ms/step
print_validation_report(y_test, pred1)
Classification Report precision recall f1-score support 0 0.86 0.64 0.74 1000 1 0.93 0.78 0.85 1000 2 0.60 0.55 0.58 1000 3 0.59 0.45 0.51 1000 4 0.61 0.65 0.63 1000 5 0.62 0.65 0.63 1000 6 0.52 0.91 0.66 1000 7 0.83 0.67 0.74 1000 8 0.77 0.85 0.81 1000 9 0.82 0.79 0.81 1000 accuracy 0.69 10000 macro avg 0.72 0.69 0.70 10000 weighted avg 0.72 0.69 0.70 10000 Accuracy Score: 0.6945 Root Mean Square Error: 2.1993408103338603
plot_confusion_matrix(y_test,pred1)
model = tf.keras.models.load_model('CNN_model.h5')
preds = model.predict(x_test_norm)
313/313 [==============================] - 38s 118ms/step
preds.shape
(10000, 10)
cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
df = pd.DataFrame(preds[0:20], columns = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'])
df.style.format("{:.2%}").background_gradient(cmap=cm)
airplane | automobile | bird | cat | deer | dog | frog | horse | ship | truck | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0.34% | 0.16% | 3.54% | 63.13% | 2.56% | 10.20% | 15.33% | 0.57% | 3.75% | 0.42% |
1 | 3.53% | 7.32% | 0.05% | 0.03% | 0.01% | 0.00% | 0.01% | 0.00% | 88.03% | 1.02% |
2 | 2.25% | 12.35% | 0.33% | 0.83% | 0.12% | 0.08% | 0.48% | 0.05% | 77.72% | 5.78% |
3 | 62.01% | 9.68% | 2.75% | 0.51% | 0.43% | 0.12% | 0.33% | 0.09% | 22.98% | 1.09% |
4 | 0.01% | 0.02% | 3.88% | 2.50% | 2.80% | 0.09% | 90.62% | 0.01% | 0.07% | 0.01% |
5 | 0.00% | 0.00% | 0.32% | 2.71% | 1.45% | 0.61% | 94.86% | 0.03% | 0.00% | 0.01% |
6 | 1.00% | 58.30% | 2.20% | 2.97% | 0.43% | 2.23% | 3.31% | 0.70% | 0.12% | 28.73% |
7 | 0.89% | 0.11% | 21.54% | 4.92% | 8.44% | 0.71% | 62.87% | 0.10% | 0.32% | 0.09% |
8 | 0.04% | 0.03% | 7.86% | 60.02% | 8.08% | 6.29% | 16.94% | 0.57% | 0.07% | 0.11% |
9 | 0.84% | 18.59% | 0.68% | 1.85% | 0.87% | 0.50% | 32.95% | 0.18% | 4.06% | 39.49% |
10 | 13.45% | 0.38% | 11.20% | 9.62% | 22.48% | 8.37% | 2.27% | 5.71% | 24.28% | 2.24% |
11 | 0.10% | 1.18% | 0.03% | 0.05% | 0.03% | 0.03% | 0.04% | 0.08% | 0.24% | 98.21% |
12 | 0.12% | 0.36% | 9.73% | 17.38% | 24.86% | 19.89% | 24.22% | 2.92% | 0.31% | 0.19% |
13 | 0.06% | 0.03% | 0.23% | 0.60% | 2.58% | 4.29% | 0.71% | 91.31% | 0.01% | 0.17% |
14 | 0.07% | 0.86% | 0.01% | 0.04% | 0.02% | 0.00% | 0.01% | 0.02% | 0.27% | 98.69% |
15 | 0.34% | 0.15% | 4.05% | 1.95% | 3.61% | 0.12% | 77.61% | 0.01% | 12.11% | 0.06% |
16 | 0.03% | 0.09% | 1.79% | 19.55% | 0.51% | 73.64% | 1.79% | 2.36% | 0.06% | 0.18% |
17 | 0.27% | 0.33% | 6.45% | 18.62% | 18.11% | 11.36% | 32.37% | 10.48% | 0.47% | 1.54% |
18 | 2.13% | 0.70% | 0.01% | 0.13% | 0.05% | 0.01% | 0.11% | 0.02% | 93.85% | 3.00% |
19 | 0.01% | 0.06% | 0.87% | 1.43% | 1.86% | 0.52% | 95.11% | 0.10% | 0.01% | 0.03% |
(_,_), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()
img = test_images[2000]
img_tensor = image.img_to_array(img)
img_tensor = np.expand_dims(img_tensor, axis=0)
class_names = ['airplane'
,'automobile'
,'bird'
,'cat'
,'deer'
,'dog'
,'frog'
,'horse'
,'ship'
,'truck']
plt.imshow(img, cmap='viridis')
plt.axis('off')
plt.show()
# Extracts the outputs of the top 8 layers:
layer_outputs = [layer.output for layer in model.layers[:8]]
# Creates a model that will return these outputs, given the model input:
activation_model = models.Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict(img_tensor)
len(activations)
1/1 [==============================] - 1s 528ms/step
8
layer_names = []
for layer in model.layers:
layer_names.append(layer.name)
layer_names
['conv2d', 'max_pooling2d', 'dropout', 'conv2d_1', 'max_pooling2d_1', 'dropout_1', 'conv2d_2', 'max_pooling2d_2', 'dropout_2', 'flatten', 'dense']
# These are the names of the layers, so can have them as part of our plot
layer_names = []
for layer in model.layers[:3]:
layer_names.append(layer.name)
images_per_row = 16
# Now let's display our feature maps
for layer_name, layer_activation in zip(layer_names, activations):
# This is the number of features in the feature map
n_features = layer_activation.shape[-1]
# The feature map has shape (1, size, size, n_features)
size = layer_activation.shape[1]
# We will tile the activation channels in this matrix
n_cols = n_features // images_per_row
display_grid = np.zeros((size * n_cols, images_per_row * size))
# We'll tile each filter into this big horizontal grid
for col in range(n_cols):
for row in range(images_per_row):
channel_image = layer_activation[0,
:, :,
col * images_per_row + row]
# Post-process the feature to make it visually palatable
channel_image -= channel_image.mean()
channel_image /= channel_image.std()
channel_image *= 64
channel_image += 128
channel_image = np.clip(channel_image, 0, 255).astype('uint8')
display_grid[col * size : (col + 1) * size,
row * size : (row + 1) * size] = channel_image
# Display the grid
scale = 1. / size
plt.figure(figsize=(scale * display_grid.shape[1],
scale * display_grid.shape[0]))
plt.title(layer_name)
plt.grid(False)
plt.imshow(display_grid, aspect='auto', cmap='viridis')
plt.show();
/var/folders/zz/wfk650fx7lx3mmyd6_y3yxgr0000gr/T/ipykernel_47007/1872159762.py:28: RuntimeWarning: invalid value encountered in true_divide channel_image /= channel_image.std()