import datetime
from packaging import version
from collections import Counter
import numpy as np
import pandas as pd

import matplotlib as mpl  # EA
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import mnist

2022-10-18 00:04:54.712252: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


%matplotlib inline
np.set_printoptions(precision=3, suppress=True)


print("This notebook requires TensorFlow 2.0 or above")
print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >=2

This notebook requires TensorFlow 2.0 or above
TensorFlow version:  2.10.0


print("Keras version: ", keras.__version__)

Keras version:  2.10.0


#from google.colab import drive
#drive.mount('/content/gdrive')


def print_validation_report(test_labels, predictions):
    print("Classification Report")
    print(classification_report(test_labels, predictions))
    print('Accuracy Score: {}'.format(accuracy_score(test_labels, predictions)))
    print('Root Mean Square Error: {}'.format(np.sqrt(MSE(test_labels, predictions)))) 
    
def plot_confusion_matrix(y_true, y_pred):
    mtx = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots(figsize=(8,8))
    sns.heatmap(mtx, annot=True, fmt='d', linewidths=.75,  cbar=False, ax=ax,cmap='Blues',linecolor='white')
    #  square=True,
    plt.ylabel('true label')
    plt.xlabel('predicted label')

def plot_history(history):
  losses = history.history['loss']
  accs = history.history['accuracy']
  val_losses = history.history['val_loss']
  val_accs = history.history['val_accuracy']
  epochs = len(losses)

  plt.figure(figsize=(16, 4))
  for i, metrics in enumerate(zip([losses, accs], [val_losses, val_accs], ['Loss', 'Accuracy'])):
    plt.subplot(1, 2, i + 1)
    plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
    plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
    plt.legend()
  plt.show()

def plot_digits(instances, pos, images_per_row=5, **options):
    size = 28
    images_per_row = min(len(instances), images_per_row)
    images = [instance.reshape(size,size) for instance in instances]
    n_rows = (len(instances) - 1) // images_per_row + 1
    row_images = []
    n_empty = n_rows * images_per_row - len(instances)
    images.append(np.zeros((size, size * n_empty)))
    for row in range(n_rows):
        rimages = images[row * images_per_row : (row + 1) * images_per_row]
        row_images.append(np.concatenate(rimages, axis=1))
    image = np.concatenate(row_images, axis=0)
    pos.imshow(image, cmap = 'binary', **options)
    pos.axis("off")

def plot_digit(data):
    image = data.reshape(28, 28)
    plt.imshow(image, cmap = 'hot',
               interpolation="nearest")
    plt.axis("off")


(x_train, y_train), (x_test, y_test)= tf.keras.datasets.mnist.load_data()


print('x_train:\t{}'.format(x_train.shape))
print('y_train:\t{}'.format(y_train.shape))
print('x_test:\t\t{}'.format(x_test.shape))
print('y_test:\t\t{}'.format(y_test.shape))

x_train:	(60000, 28, 28)
y_train:	(60000,)
x_test:		(10000, 28, 28)
y_test:		(10000,)


print("First ten labels training dataset:\n {}\n".format(y_train[0:10]))

First ten labels training dataset:
 [5 0 4 1 9 2 1 3 1 4]


items = [{'Class': x, 'Count': y} for x, y in Counter(y_train).items()]
distribution = pd.DataFrame(items).sort_values(['Class'])
sns.barplot(x=distribution.Class, y=distribution.Count);


items = [{'Class': x, 'Count': y} for x, y in Counter(y_test).items()]
distribution = pd.DataFrame(items).sort_values(['Class'])
sns.barplot(x=distribution.Class, y=distribution.Count);


Counter(y_train).most_common()

[(1, 6742),
 (7, 6265),
 (3, 6131),
 (2, 5958),
 (9, 5949),
 (0, 5923),
 (6, 5918),
 (8, 5851),
 (4, 5842),
 (5, 5421)]


Counter(y_test).most_common()

[(1, 1135),
 (2, 1032),
 (7, 1028),
 (3, 1010),
 (9, 1009),
 (4, 982),
 (0, 980),
 (8, 974),
 (6, 958),
 (5, 892)]


fig = plt.figure(figsize = (15, 9))

for i in range(50):
    plt.subplot(5, 10, 1+i)
    plt.title(y_train[i])
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_train[i].reshape(28,28), cmap='binary')


y_train_encoded = to_categorical(y_train)
y_test_encoded = to_categorical(y_test)

print("First ten entries of y_train:\n {}\n".format(y_train[0:10]))
print("First ten rows of one-hot y_train:\n {}".format(y_train_encoded[0:10,]))

First ten entries of y_train:
 [5 0 4 1 9 2 1 3 1 4]

First ten rows of one-hot y_train:
 [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


print('y_train_encoded shape: ', y_train_encoded.shape)
print('y_test_encoded shape: ', y_test_encoded.shape)

y_train_encoded shape:  (60000, 10)
y_test_encoded shape:  (10000, 10)


# Before reshape:
print('x_train:\t{}'.format(x_train.shape))
print('x_test:\t\t{}'.format(x_test.shape))

x_train:	(60000, 28, 28)
x_test:		(10000, 28, 28)


# Reshape the images:
x_train_reshaped = np.reshape(x_train, (60000, 784))
x_test_reshaped = np.reshape(x_test, (10000, 784))

# After reshape:
print('x_train_reshaped shape: ', x_train_reshaped.shape)
print('x_test_reshaped shape: ', x_test_reshaped.shape)

x_train_reshaped shape:  (60000, 784)
x_test_reshaped shape:  (10000, 784)


print(set(x_train_reshaped[0]))

{0, 1, 2, 3, 9, 11, 14, 16, 18, 23, 24, 25, 26, 27, 30, 35, 36, 39, 43, 45, 46, 49, 55, 56, 64, 66, 70, 78, 80, 81, 82, 90, 93, 94, 107, 108, 114, 119, 126, 127, 130, 132, 133, 135, 136, 139, 148, 150, 154, 156, 160, 166, 170, 171, 172, 175, 182, 183, 186, 187, 190, 195, 198, 201, 205, 207, 212, 213, 219, 221, 225, 226, 229, 238, 240, 241, 242, 244, 247, 249, 250, 251, 252, 253, 255}


x_train_norm = x_train_reshaped.astype('float32') / 255
x_test_norm = x_test_reshaped.astype('float32') / 255


# Take a look at the first reshaped and normalized training image:
print(set(x_train_norm[0]))

{0.0, 0.011764706, 0.53333336, 0.07058824, 0.49411765, 0.6862745, 0.101960786, 0.6509804, 1.0, 0.96862745, 0.49803922, 0.11764706, 0.14117648, 0.36862746, 0.6039216, 0.6666667, 0.043137256, 0.05490196, 0.03529412, 0.85882354, 0.7764706, 0.7137255, 0.94509804, 0.3137255, 0.6117647, 0.41960785, 0.25882354, 0.32156864, 0.21960784, 0.8039216, 0.8666667, 0.8980392, 0.7882353, 0.52156866, 0.18039216, 0.30588236, 0.44705883, 0.3529412, 0.15294118, 0.6745098, 0.88235295, 0.99215686, 0.9490196, 0.7647059, 0.2509804, 0.19215687, 0.93333334, 0.9843137, 0.74509805, 0.7294118, 0.5882353, 0.50980395, 0.8862745, 0.105882354, 0.09019608, 0.16862746, 0.13725491, 0.21568628, 0.46666667, 0.3647059, 0.27450982, 0.8352941, 0.7176471, 0.5803922, 0.8117647, 0.9764706, 0.98039216, 0.73333335, 0.42352942, 0.003921569, 0.54509807, 0.67058825, 0.5294118, 0.007843138, 0.31764707, 0.0627451, 0.09411765, 0.627451, 0.9411765, 0.9882353, 0.95686275, 0.83137256, 0.5176471, 0.09803922, 0.1764706}


model = Sequential([
    Dense(input_shape=[784], units=1, activation = tf.nn.relu,kernel_regularizer=tf.keras.regularizers.L2(0.001)),
    Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])

2022-10-18 00:05:01.923961: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense (Dense)               (None, 1)                 785       
                                                                 
 output_layer (Dense)        (None, 10)                20        
                                                                 
=================================================================
Total params: 805
Trainable params: 805
Non-trainable params: 0
_________________________________________________________________


keras.utils.plot_model(model, "mnist_model.png", show_shapes=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


model.compile(optimizer='rmsprop',           
               loss = 'categorical_crossentropy',
               metrics=['accuracy'])


history = model.fit(
    x_train_norm
    ,y_train_encoded
    ,epochs = 200
    ,validation_split=0.20 
    ,callbacks=[tf.keras.callbacks.ModelCheckpoint("DNN_model.h5",save_best_only=True,save_weights_only=False)
                ,tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)] 
    )

Epoch 1/200
1500/1500 [==============================] - 2s 938us/step - loss: 2.0451 - accuracy: 0.1872 - val_loss: 1.9215 - val_accuracy: 0.2074
Epoch 2/200
1500/1500 [==============================] - 1s 838us/step - loss: 1.8586 - accuracy: 0.2775 - val_loss: 1.7894 - val_accuracy: 0.3047
Epoch 3/200
1500/1500 [==============================] - 1s 833us/step - loss: 1.7613 - accuracy: 0.3082 - val_loss: 1.7260 - val_accuracy: 0.3273
Epoch 4/200
1500/1500 [==============================] - 1s 837us/step - loss: 1.7178 - accuracy: 0.3245 - val_loss: 1.6929 - val_accuracy: 0.3334
Epoch 5/200
1500/1500 [==============================] - 1s 833us/step - loss: 1.6914 - accuracy: 0.3292 - val_loss: 1.6702 - val_accuracy: 0.3079
Epoch 6/200
1500/1500 [==============================] - 1s 822us/step - loss: 1.6730 - accuracy: 0.3238 - val_loss: 1.6558 - val_accuracy: 0.3154


model = tf.keras.models.load_model("DNN_model.h5")
print(f"Test acc: {model.evaluate(x_test_norm, y_test_encoded)[1]:.3f}")

313/313 [==============================] - 0s 650us/step - loss: 1.6782 - accuracy: 0.3131
Test acc: 0.313


loss, accuracy = model.evaluate(x_test_norm, y_test_encoded)
print('test set accuracy: ', accuracy * 100)

313/313 [==============================] - 0s 786us/step - loss: 1.6782 - accuracy: 0.3131
test set accuracy:  31.310001015663147


preds = model.predict(x_test_norm)
print('shape of preds: ', preds.shape)

313/313 [==============================] - 0s 545us/step
shape of preds:  (10000, 10)


plt.figure(figsize = (12, 8))

start_index = 0

for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    pred = np.argmax(preds[start_index + i])
    actual = np.argmax(y_test_encoded[start_index + i])
    col = 'g'
    if pred != actual:
        col = 'r'
    plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred, actual), color = col)
    plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()


history_dict = history.history
history_dict.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


history_dict = history.history
history_dict.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


losses = history.history['loss']
accs = history.history['accuracy']
val_losses = history.history['val_loss']
val_accs = history.history['val_accuracy']
epochs = len(losses)


history_df=pd.DataFrame(history_dict)
history_df.tail().round(3)


plot_history(history)


pred1= model.predict(x_test_norm)
pred1=np.argmax(pred1, axis=1)

313/313 [==============================] - 0s 552us/step


print_validation_report(y_test, pred1)

Classification Report
              precision    recall  f1-score   support

           0       0.26      0.38      0.31       980
           1       0.32      0.59      0.41      1135
           2       0.44      0.62      0.51      1032
           3       0.26      0.36      0.30      1010
           4       0.21      0.06      0.10       982
           5       0.00      0.00      0.00       892
           6       0.27      0.13      0.18       958
           7       0.32      0.88      0.47      1028
           8       0.00      0.00      0.00       974
           9       0.00      0.00      0.00      1009

    accuracy                           0.31     10000
   macro avg       0.21      0.30      0.23     10000
weighted avg       0.21      0.31      0.23     10000

Accuracy Score: 0.3131
Root Mean Square Error: 3.1530144306678967

/Users/apoorvsara/opt/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/Users/apoorvsara/opt/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))
/Users/apoorvsara/opt/anaconda3/lib/python3.9/site-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))


# Get the predicted classes:
# pred_classes = model.predict_classes(x_train_norm)# give deprecation warning
pred_classes = np.argmax(model.predict(x_test_norm), axis=-1)
pred_classes;

313/313 [==============================] - 0s 536us/step


conf_mx = tf.math.confusion_matrix(y_test, pred_classes)
conf_mx;


cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
df = pd.DataFrame(preds[0:20], columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
df.style.format("{:.2%}").background_gradient(cmap=cm)


plot_confusion_matrix(y_test,pred_classes)


# Extracts the outputs of the 2 layers:
layer_outputs = [layer.output for layer in model.layers]

# Creates a model that will return these outputs, given the model input:
activation_model = models.Model(inputs=model.input, outputs=layer_outputs)

print(f"There are {len(layer_outputs)} layers")
layer_outputs; # description of the layers

There are 2 layers


# Get the outputs of all the hidden nodes for each of the 60000 training images
activations = activation_model.predict(x_train_norm)
hidden_layer_activation = activations[0]
output_layer_activations = activations[1]
hidden_layer_activation.shape   #  each of the 128 hidden nodes has one activation value per training image

1875/1875 [==============================] - 1s 581us/step

(60000, 1)


output_layer_activations.shape

(60000, 10)


print(f"The maximum activation value of the hidden nodes in the hidden layer is \
{hidden_layer_activation.max()}")

The maximum activation value of the hidden nodes in the hidden layer is 19.58106231689453


# Some stats about the output layer as an aside...
np.set_printoptions(suppress = True)  # display probabilities as decimals and NOT in scientific notation
ouput_layer_activation = activations[1]
print(f"The output node has shape {ouput_layer_activation.shape}")
print(f"The output for the first image are {ouput_layer_activation[0].round(4)}")
print(f"The sum of the probabilities is (approximately) {ouput_layer_activation[0].sum()}")

The output node has shape (60000, 10)
The output for the first image are [0.133 0.186 0.077 0.176 0.004 0.156 0.091 0.001 0.175 0.   ]
The sum of the probabilities is (approximately) 1.0000001192092896


#Get the dataframe of all the node values
activation_data = {'actual_class':y_train}
for k in range(0,1): 
    activation_data[f"act_val_{k}"] = hidden_layer_activation[:,k]

activation_df = pd.DataFrame(activation_data)
activation_df.head(15).round(3).T


# To see how closely the hidden node activation values correlate with the class labels
# Let us use seaborn for the boxplots this time.
bplot = sns.boxplot(y='act_val_0', x='actual_class', 
                 data=activation_df[['act_val_0','actual_class']], 
                 width=0.5,
                 palette="colorblind")


activation_df.groupby("actual_class")["act_val_0"].apply(lambda x: [round(min(x.tolist()),2),
 round(max(x.tolist()),2)]).reset_index().rename(columns={"act_val_0": "range_of_act_values"})

original label	one-hot encoded label
5	[0 0 0 0 0 1 0 0 0 0]
7	[0 0 0 0 0 0 0 1 0 0]
1	[0 1 0 0 0 0 0 0 0 0]

	0	1	2	3	4	5	6	7	8	9
0	6.89%	3.79%	0.12%	1.11%	24.16%	2.34%	0.17%	29.98%	1.55%	29.88%
1	0.00%	0.01%	61.62%	0.96%	0.00%	0.04%	37.08%	0.00%	0.28%	0.00%
2	22.68%	20.85%	2.75%	11.68%	5.40%	15.28%	3.53%	2.62%	13.53%	1.68%
3	20.62%	21.22%	3.83%	13.72%	2.71%	16.15%	4.80%	1.07%	15.25%	0.62%
4	6.89%	3.79%	0.12%	1.11%	24.16%	2.34%	0.17%	29.98%	1.55%	29.88%
5	22.84%	20.71%	2.63%	11.40%	5.84%	15.11%	3.39%	2.91%	13.28%	1.89%
6	6.89%	3.79%	0.12%	1.11%	24.16%	2.34%	0.17%	29.98%	1.55%	29.88%
7	6.89%	3.79%	0.12%	1.11%	24.16%	2.34%	0.17%	29.98%	1.55%	29.88%
8	22.04%	16.95%	1.36%	7.56%	13.41%	11.70%	1.81%	9.03%	9.36%	6.76%
9	6.89%	3.79%	0.12%	1.11%	24.16%	2.34%	0.17%	29.98%	1.55%	29.88%
10	21.75%	21.19%	3.29%	12.78%	3.81%	15.84%	4.17%	1.66%	14.50%	1.02%
11	6.79%	13.35%	14.46%	19.65%	0.03%	12.60%	15.91%	0.00%	17.20%	0.00%
12	6.89%	3.79%	0.12%	1.11%	24.16%	2.34%	0.17%	29.98%	1.55%	29.88%
13	7.46%	4.17%	0.14%	1.24%	24.27%	2.59%	0.20%	29.33%	1.73%	28.87%
14	17.29%	20.43%	5.41%	15.75%	1.10%	16.28%	6.60%	0.34%	16.63%	0.17%
15	17.50%	20.51%	5.30%	15.64%	1.17%	16.29%	6.48%	0.36%	16.57%	0.19%
16	6.89%	3.79%	0.12%	1.11%	24.16%	2.34%	0.17%	29.98%	1.55%	29.88%
17	7.45%	4.16%	0.14%	1.24%	24.27%	2.59%	0.20%	29.34%	1.72%	28.89%
18	2.77%	7.88%	23.84%	18.59%	0.00%	8.41%	24.33%	0.00%	14.19%	0.00%
19	6.89%	3.79%	0.12%	1.11%	24.16%	2.34%	0.17%	29.98%	1.55%	29.88%

	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14
actual_class	5.000	0.000	4.000	1.000	9.0	2.000	1.000	3.000	1.000	4.0	3.000	5.000	3.000	6.00	1.000
act_val_0	2.617	0.298	0.426	2.375	0.0	3.019	2.262	3.189	1.768	0.0	2.458	3.321	4.357	5.95	2.088

	actual_class	range_of_act_values
0	0	[0.0, 9.65]
1	1	[0.0, 12.25]
2	2	[0.0, 19.58]
3	3	[0.0, 13.1]
4	4	[0.0, 9.16]
5	5	[0.0, 13.41]
6	6	[0.0, 13.91]
7	7	[0.0, 6.7]
8	8	[0.0, 10.21]
9	9	[0.0, 3.87]

MSDS458 Research Assignment 1:¶

Importing Packages¶

Verify TensorFlow version and Keras version¶

Mount Google Drive to Colab environment¶

Research Assignment Reporting Functions¶

Loading MNIST Dataset¶

EDA Training and Test Sets¶

Review labels for training set¶

Find frequency of each label in training and test sets¶

Plot sample images with their labels¶

Preprocessing Data¶

Apply one-hot encoding on the labels¶

Reshape the images to 1D arrays¶

Review unique values with set from 1st image¶

Rescale the elements of the reshaped images¶

Build the DNN model¶

Compile the DNN model¶

Train the DNN model¶

Evaluate the DNN model¶

Making Predictions¶

Reviewing Performance¶

Plot performance metrics¶

Create the confusion matrix¶

Visualize the confusion matrix¶

Analyzing the Activation Values of the Hidden Nodes¶

Get the activation values of the hidden nodes¶

Create a dataframe with the activation values and the class labels¶

Visualize the activation values with boxplots¶

Displaying The Range Of Activation Values For Each Class Labels¶

	loss	accuracy	val_loss	val_accuracy
1	1.859	0.278	1.789	0.305
2	1.761	0.308	1.726	0.327
3	1.718	0.324	1.693	0.333
4	1.691	0.329	1.670	0.308
5	1.673	0.324	1.656	0.315