import datetime
from packaging import version
from collections import Counter
import numpy as np
import pandas as pd

import matplotlib as mpl  # EA
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import mnist


%matplotlib inline
np.set_printoptions(precision=3, suppress=True)


print("This notebook requires TensorFlow 2.0 or above")
print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >=2

This notebook requires TensorFlow 2.0 or above
TensorFlow version:  2.10.0


print("Keras version: ", keras.__version__)

Keras version:  2.10.0


#from google.colab import drive
#drive.mount('/content/gdrive')


def print_validation_report(test_labels, predictions):
    print("Classification Report")
    print(classification_report(test_labels, predictions))
    print('Accuracy Score: {}'.format(accuracy_score(test_labels, predictions)))
    print('Root Mean Square Error: {}'.format(np.sqrt(MSE(test_labels, predictions)))) 
    
def plot_confusion_matrix(y_true, y_pred):
    mtx = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots(figsize=(8,8))
    sns.heatmap(mtx, annot=True, fmt='d', linewidths=.75,  cbar=False, ax=ax,cmap='Blues',linecolor='white')
    #  square=True,
    plt.ylabel('true label')
    plt.xlabel('predicted label')

def plot_history(history):
  losses = history.history['loss']
  accs = history.history['accuracy']
  val_losses = history.history['val_loss']
  val_accs = history.history['val_accuracy']
  epochs = len(losses)

  plt.figure(figsize=(16, 4))
  for i, metrics in enumerate(zip([losses, accs], [val_losses, val_accs], ['Loss', 'Accuracy'])):
    plt.subplot(1, 2, i + 1)
    plt.plot(range(epochs), metrics[0], label='Training {}'.format(metrics[2]))
    plt.plot(range(epochs), metrics[1], label='Validation {}'.format(metrics[2]))
    plt.legend()
  plt.show()

def plot_digits(instances, pos, images_per_row=5, **options):
    size = 28
    images_per_row = min(len(instances), images_per_row)
    images = [instance.reshape(size,size) for instance in instances]
    n_rows = (len(instances) - 1) // images_per_row + 1
    row_images = []
    n_empty = n_rows * images_per_row - len(instances)
    images.append(np.zeros((size, size * n_empty)))
    for row in range(n_rows):
        rimages = images[row * images_per_row : (row + 1) * images_per_row]
        row_images.append(np.concatenate(rimages, axis=1))
    image = np.concatenate(row_images, axis=0)
    pos.imshow(image, cmap = 'binary', **options)
    pos.axis("off")

def plot_digit(data):
    image = data.reshape(28, 28)
    plt.imshow(image, cmap = 'hot',
               interpolation="nearest")
    plt.axis("off")


(x_train, y_train), (x_test, y_test)= tf.keras.datasets.mnist.load_data()


print('x_train:\t{}'.format(x_train.shape))
print('y_train:\t{}'.format(y_train.shape))
print('x_test:\t\t{}'.format(x_test.shape))
print('y_test:\t\t{}'.format(y_test.shape))

x_train:	(60000, 28, 28)
y_train:	(60000,)
x_test:		(10000, 28, 28)
y_test:		(10000,)


print("First ten labels training dataset:\n {}\n".format(y_train[0:10]))

First ten labels training dataset:
 [5 0 4 1 9 2 1 3 1 4]


items = [{'Class': x, 'Count': y} for x, y in Counter(y_train).items()]
distribution = pd.DataFrame(items).sort_values(['Class'])
sns.barplot(x=distribution.Class, y=distribution.Count);


Counter(y_train).most_common()

[(1, 6742),
 (7, 6265),
 (3, 6131),
 (2, 5958),
 (9, 5949),
 (0, 5923),
 (6, 5918),
 (8, 5851),
 (4, 5842),
 (5, 5421)]


Counter(y_test).most_common()

[(1, 1135),
 (2, 1032),
 (7, 1028),
 (3, 1010),
 (9, 1009),
 (4, 982),
 (0, 980),
 (8, 974),
 (6, 958),
 (5, 892)]


fig = plt.figure(figsize = (15, 9))

for i in range(50):
    plt.subplot(5, 10, 1+i)
    plt.title(y_train[i])
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_train[i].reshape(28,28), cmap='binary')


y_train_encoded = to_categorical(y_train)
y_test_encoded = to_categorical(y_test)

print("First ten entries of y_train:\n {}\n".format(y_train[0:10]))
print("First ten rows of one-hot y_train:\n {}".format(y_train_encoded[0:10,]))

First ten entries of y_train:
 [5 0 4 1 9 2 1 3 1 4]

First ten rows of one-hot y_train:
 [[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


print('y_train_encoded shape: ', y_train_encoded.shape)
print('y_test_encoded shape: ', y_test_encoded.shape)

y_train_encoded shape:  (60000, 10)
y_test_encoded shape:  (10000, 10)


# Before reshape:
print('x_train:\t{}'.format(x_train.shape))
print('x_test:\t\t{}'.format(x_test.shape))

x_train:	(60000, 28, 28)
x_test:		(10000, 28, 28)


np.set_printoptions(linewidth=np.inf)
print("{}".format(x_train[2020]))

[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0 167 208  19   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0  13 235 254  99   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0  74 254 234   4   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0 154 254 145   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0 224 254  92   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0  51 245 211  13   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   2 169 254 101   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  27 254 254  88   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  72 255 241  15   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  88 254 153   0   0  33  53 155 156 102  15   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 130 254  31   0 128 235 254 254 254 254 186  10   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 190 254  51 178 254 246 213 111 109 186 254 145   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 192 254 229 254 216  90   0   0   0  57 254 234   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 235 254 254 247  85   0   0   0   0  32 254 234   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 235 254 254 118   0   0   0   0   0 107 254 201   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 235 255 254 102  12   0   0   0   8 188 248 119   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0 207 254 254 238 107   0   0  39 175 254 148   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  84 254 248  74  11  32 115 238 254 176  11   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0  21 214 254 254 254 254 254 254 132   6   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0  14  96 176 254 254 214  48  12   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]]


# Reshape the images:
x_train_reshaped = np.reshape(x_train, (60000, 784))
x_test_reshaped = np.reshape(x_test, (10000, 784))

# After reshape:
print('x_train_reshaped shape: ', x_train_reshaped.shape)
print('x_test_reshaped shape: ', x_test_reshaped.shape)

x_train_reshaped shape:  (60000, 784)
x_test_reshaped shape:  (10000, 784)


print(set(x_train_reshaped[0]))

{0, 1, 2, 3, 9, 11, 14, 16, 18, 23, 24, 25, 26, 27, 30, 35, 36, 39, 43, 45, 46, 49, 55, 56, 64, 66, 70, 78, 80, 81, 82, 90, 93, 94, 107, 108, 114, 119, 126, 127, 130, 132, 133, 135, 136, 139, 148, 150, 154, 156, 160, 166, 170, 171, 172, 175, 182, 183, 186, 187, 190, 195, 198, 201, 205, 207, 212, 213, 219, 221, 225, 226, 229, 238, 240, 241, 242, 244, 247, 249, 250, 251, 252, 253, 255}


x_train_norm = x_train_reshaped.astype('float32') / 255
x_test_norm = x_test_reshaped.astype('float32') / 255


# Take a look at the first reshaped and normalized training image:
print(set(x_train_norm[0]))

{0.0, 0.011764706, 0.53333336, 0.07058824, 0.49411765, 0.6862745, 0.101960786, 0.6509804, 1.0, 0.96862745, 0.49803922, 0.11764706, 0.14117648, 0.36862746, 0.6039216, 0.6666667, 0.043137256, 0.05490196, 0.03529412, 0.85882354, 0.7764706, 0.7137255, 0.94509804, 0.3137255, 0.6117647, 0.41960785, 0.25882354, 0.32156864, 0.21960784, 0.8039216, 0.8666667, 0.8980392, 0.7882353, 0.52156866, 0.18039216, 0.30588236, 0.44705883, 0.3529412, 0.15294118, 0.6745098, 0.88235295, 0.99215686, 0.9490196, 0.7647059, 0.2509804, 0.19215687, 0.93333334, 0.9843137, 0.74509805, 0.7294118, 0.5882353, 0.50980395, 0.8862745, 0.105882354, 0.09019608, 0.16862746, 0.13725491, 0.21568628, 0.46666667, 0.3647059, 0.27450982, 0.8352941, 0.7176471, 0.5803922, 0.8117647, 0.9764706, 0.98039216, 0.73333335, 0.42352942, 0.003921569, 0.54509807, 0.67058825, 0.5294118, 0.007843138, 0.31764707, 0.0627451, 0.09411765, 0.627451, 0.9411765, 0.9882353, 0.95686275, 0.83137256, 0.5176471, 0.09803922, 0.1764706}


#Get the dataframe of all the pixel values
pixel_data = {'actual_class':y_train}
for k in range(0,784): 
    pixel_data[f"pix_val_{k}"] = x_train_norm[:,k]
pixel_df = pd.DataFrame(pixel_data)
pixel_df.head(15).round(3).T


pixel_df.pix_val_77.value_counts()

0.000000    59720
1.000000       25
0.996078       13
0.992157        9
0.050980        6
            ...  
0.894118        1
0.690196        1
0.725490        1
0.517647        1
0.819608        1
Name: pix_val_77, Length: 150, dtype: int64


pixel_df.pix_val_78.value_counts()

0.000000    59862
1.000000        6
0.960784        4
0.992157        4
0.141176        4
            ...  
0.556863        1
0.584314        1
0.427451        1
0.078431        1
0.501961        1
Name: pix_val_78, Length: 97, dtype: int64


plt.figure(figsize=(8, 8))
color = sns.color_palette("hls", 10)
sns.scatterplot(x="pix_val_77", y="pix_val_78", hue="actual_class",  palette=color, data = pixel_df, legend="full")
plt.legend(loc='upper left');


rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rnd_clf.fit(x_train_norm,y_train_encoded)

RandomForestClassifier(random_state=42)


plot_digit(rnd_clf.feature_importances_)
cbar = plt.colorbar(ticks=[rnd_clf.feature_importances_.min(), rnd_clf.feature_importances_.max()])
cbar.ax.set_yticklabels(['Not important', 'Very important'])
plt.show()


# https://stackoverflow.com/questions/6910641/how-do-i-get-indices-of-n-maximum-values-in-a-numpy-array
n = 70
imp_arr = rnd_clf.feature_importances_
idx = (-imp_arr).argsort()[:n]          # get the indices of the 70 "most important" features/pixels
len(idx)

70


train_images_sm = x_train_norm[:,idx]
test_images_sm = x_test_norm[:,idx]
train_images_sm.shape, test_images_sm.shape # the reduced images have dimension 70

((60000, 70), (10000, 70))


# to convert an index n, 0<= n < 784
def pair(n,size):
    x = n//size 
    y = n%size
    return x,y


plt.imshow(x_train_norm[1].reshape(28,28),cmap='binary')
x, y = np.array([pair(k,28) for k in idx]).T
plt.scatter(x,y,color='red',s=20)

<matplotlib.collections.PathCollection at 0x7f998ec32220>


model = Sequential([
    Dense(input_shape=[70], units=128, activation = tf.nn.relu,kernel_regularizer=tf.keras.regularizers.L2(0.001)),
    Dense(name = "output_layer", units = 10, activation = tf.nn.softmax)
])


model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_2 (Dense)             (None, 128)               9088      
                                                                 
 output_layer (Dense)        (None, 10)                1290      
                                                                 
=================================================================
Total params: 10,378
Trainable params: 10,378
Non-trainable params: 0
_________________________________________________________________


keras.utils.plot_model(model, "mnist_model.png", show_shapes=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


model.compile(optimizer='rmsprop',           
               loss = 'categorical_crossentropy',
               metrics=['accuracy'])


history = model.fit(
    train_images_sm
    ,y_train_encoded
    ,epochs = 200
    ,validation_split=0.20 
    ,callbacks=[tf.keras.callbacks.ModelCheckpoint("DNN_model.h5",save_best_only=True,save_weights_only=False)
                ,tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)] 
    )

Epoch 1/200
1500/1500 [==============================] - 2s 1ms/step - loss: 0.7078 - accuracy: 0.8198 - val_loss: 0.5170 - val_accuracy: 0.8717
Epoch 2/200
1500/1500 [==============================] - 2s 1ms/step - loss: 0.4977 - accuracy: 0.8750 - val_loss: 0.4304 - val_accuracy: 0.8967
Epoch 3/200
1500/1500 [==============================] - 2s 1ms/step - loss: 0.4320 - accuracy: 0.8916 - val_loss: 0.3876 - val_accuracy: 0.9078
Epoch 4/200
1500/1500 [==============================] - 2s 1ms/step - loss: 0.3923 - accuracy: 0.9029 - val_loss: 0.3606 - val_accuracy: 0.9145
Epoch 5/200
1500/1500 [==============================] - 2s 1ms/step - loss: 0.3648 - accuracy: 0.9104 - val_loss: 0.3533 - val_accuracy: 0.9133
Epoch 6/200
1500/1500 [==============================] - 2s 1ms/step - loss: 0.3452 - accuracy: 0.9142 - val_loss: 0.3332 - val_accuracy: 0.9187
Epoch 7/200
1500/1500 [==============================] - 2s 1ms/step - loss: 0.3311 - accuracy: 0.9185 - val_loss: 0.3279 - val_accuracy: 0.9187
Epoch 8/200
1500/1500 [==============================] - 2s 1ms/step - loss: 0.3189 - accuracy: 0.9215 - val_loss: 0.3225 - val_accuracy: 0.9207
Epoch 9/200
1500/1500 [==============================] - 2s 1ms/step - loss: 0.3108 - accuracy: 0.9244 - val_loss: 0.3078 - val_accuracy: 0.9262
Epoch 10/200
1500/1500 [==============================] - 1s 993us/step - loss: 0.3020 - accuracy: 0.9261 - val_loss: 0.3134 - val_accuracy: 0.9233
Epoch 11/200
1500/1500 [==============================] - 1s 988us/step - loss: 0.2955 - accuracy: 0.9281 - val_loss: 0.3004 - val_accuracy: 0.9294
Epoch 12/200
1500/1500 [==============================] - 1s 985us/step - loss: 0.2902 - accuracy: 0.9286 - val_loss: 0.3111 - val_accuracy: 0.9225
Epoch 13/200
1500/1500 [==============================] - 1s 988us/step - loss: 0.2856 - accuracy: 0.9313 - val_loss: 0.2880 - val_accuracy: 0.9308
Epoch 14/200
1500/1500 [==============================] - 1s 989us/step - loss: 0.2802 - accuracy: 0.9313 - val_loss: 0.2961 - val_accuracy: 0.9293
Epoch 15/200
1500/1500 [==============================] - 2s 1ms/step - loss: 0.2768 - accuracy: 0.9328 - val_loss: 0.2982 - val_accuracy: 0.9297


model = tf.keras.models.load_model("DNN_model.h5")
print(f"Test acc: {model.evaluate(test_images_sm, y_test_encoded)[1]:.3f}")

313/313 [==============================] - 0s 705us/step - loss: 0.2902 - accuracy: 0.9309
Test acc: 0.931


# loss, accuracy = model.evaluate(x_test_norm, y_test_encoded)
# print('test set accuracy: ', accuracy * 100)


preds = model.predict(test_images_sm)
print('shape of preds: ', preds.shape)

313/313 [==============================] - 0s 579us/step
shape of preds:  (10000, 10)


plt.figure(figsize = (12, 8))

start_index = 0

for i in range(25):
    plt.subplot(5, 5, i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    pred = np.argmax(preds[start_index + i])
    actual = np.argmax(y_test_encoded[start_index + i])
    col = 'g'
    if pred != actual:
        col = 'r'
    plt.xlabel('i={} | pred={} | true={}'.format(start_index + i, pred, actual), color = col)
    plt.imshow(x_test[start_index + i], cmap='binary')
plt.show()


history_dict = history.history
history_dict.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


history_dict = history.history
history_dict.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


losses = history.history['loss']
accs = history.history['accuracy']
val_losses = history.history['val_loss']
val_accs = history.history['val_accuracy']
epochs = len(losses)


history_df=pd.DataFrame(history_dict)
history_df.tail().round(3)


plot_history(history)


pred1= model.predict(test_images_sm)
pred1=np.argmax(pred1, axis=1)

313/313 [==============================] - 0s 575us/step


print_validation_report(y_test, pred1)

Classification Report
              precision    recall  f1-score   support

           0       0.91      0.98      0.94       980
           1       0.97      0.98      0.98      1135
           2       0.94      0.89      0.91      1032
           3       0.93      0.92      0.93      1010
           4       0.93      0.93      0.93       982
           5       0.88      0.91      0.90       892
           6       0.95      0.92      0.93       958
           7       0.93      0.94      0.93      1028
           8       0.95      0.91      0.93       974
           9       0.90      0.92      0.91      1009

    accuracy                           0.93     10000
   macro avg       0.93      0.93      0.93     10000
weighted avg       0.93      0.93      0.93     10000

Accuracy Score: 0.9309
Root Mean Square Error: 1.1062097450302995


# Get the predicted classes:
# pred_classes = model.predict_classes(x_train_norm)# give deprecation warning
pred_classes = np.argmax(model.predict(test_images_sm), axis=-1)
pred_classes;

313/313 [==============================] - 0s 572us/step


conf_mx = tf.math.confusion_matrix(y_test, pred_classes)
conf_mx;


cm = sns.light_palette((260, 75, 60), input="husl", as_cmap=True)
df = pd.DataFrame(preds[0:20], columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
df.style.format("{:.2%}").background_gradient(cmap=cm)


plot_confusion_matrix(y_test,pred_classes)

original label	one-hot encoded label
5	[0 0 0 0 0 1 0 0 0 0]
7	[0 0 0 0 0 0 0 1 0 0]
1	[0 1 0 0 0 0 0 0 0 0]

	0	1	2	3	4	5	6	7	8	9
0	0.02%	0.00%	0.63%	0.13%	0.00%	0.02%	0.00%	99.03%	0.05%	0.12%
1	73.99%	0.01%	18.86%	0.00%	0.00%	0.01%	6.95%	0.00%	0.18%	0.00%
2	0.00%	98.78%	0.15%	0.05%	0.11%	0.08%	0.12%	0.59%	0.11%	0.01%
3	100.00%	0.00%	0.00%	0.00%	0.00%	0.00%	0.00%	0.00%	0.00%	0.00%
4	0.00%	0.00%	0.12%	0.00%	95.00%	0.00%	0.26%	0.75%	0.02%	3.84%
5	0.00%	98.53%	0.02%	0.02%	0.01%	0.01%	0.00%	1.32%	0.08%	0.00%
6	0.00%	0.00%	0.04%	0.71%	88.68%	0.36%	0.01%	1.17%	0.65%	8.38%
7	0.00%	0.00%	0.01%	0.24%	0.33%	0.42%	0.00%	0.25%	0.91%	97.83%
8	0.01%	0.00%	74.29%	0.53%	2.59%	3.46%	0.69%	0.21%	2.90%	15.32%
9	0.00%	0.00%	0.00%	0.01%	2.55%	0.06%	0.00%	0.48%	0.08%	96.82%
10	99.96%	0.00%	0.00%	0.00%	0.00%	0.04%	0.00%	0.00%	0.00%	0.00%
11	2.00%	0.00%	0.04%	0.00%	0.70%	5.16%	90.79%	0.00%	1.00%	0.31%
12	0.00%	0.00%	0.00%	0.00%	0.82%	0.00%	0.00%	0.23%	0.01%	98.94%
13	99.98%	0.00%	0.00%	0.00%	0.00%	0.00%	0.02%	0.00%	0.00%	0.00%
14	0.00%	99.90%	0.01%	0.03%	0.00%	0.01%	0.00%	0.00%	0.05%	0.00%
15	1.76%	0.00%	0.02%	3.76%	0.00%	94.43%	0.01%	0.01%	0.01%	0.00%
16	0.00%	0.00%	0.04%	0.00%	0.86%	0.00%	0.00%	0.03%	0.03%	99.03%
17	0.47%	0.00%	0.34%	0.46%	0.00%	0.14%	0.00%	98.51%	0.01%	0.07%
18	0.01%	0.00%	0.44%	77.60%	0.76%	16.28%	0.31%	0.02%	2.31%	2.28%
19	0.00%	0.00%	0.00%	0.00%	99.39%	0.00%	0.00%	0.08%	0.00%	0.53%

MSDS458 Research Assignment 1:¶

Importing Packages¶

Verify TensorFlow version and Keras version¶

Mount Google Drive to Colab environment¶

Research Assignment Reporting Functions¶

Loading MNIST Dataset¶

EDA Training and Test Sets¶

Review labels for training set¶

Find frequency of each label in training and test sets¶

Plot sample images with their labels¶

Preprocessing Data¶

Apply one-hot encoding on the labels¶

Reshape the images to 1D arrays¶

Review unique values with set from 1st image¶

Rescale the elements of the reshaped images¶

Create a dataframe with the pixel values and class labels¶

Use a scatter plot to visualize the predicive power of the pixel values at two fixed locations in the image, i.e. how well the pixel values at two fixed locations in the image "predict" the class labels.¶

Reducing dimensionality of the data with Random Forests.¶

Create Training and Test Examples Leveraging 70 Pixels¶

Visualize the 70 pixels¶

Creating the DNN Model¶

Build the DNN model¶

Compile the DNN model¶

Train the DNN model¶

Evaluate the DNN model¶

Making Predictions¶

Reviewing Performance¶

Plot performance metrics¶

Create the confusion matrix¶

Visualize the confusion matrix¶

	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14
actual_class	5.0	0.0	4.0	1.0	9.0	2.0	1.0	3.0	1.0	4.0	3.0	5.0	3.0	6.0	1.0
pix_val_0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
pix_val_1	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
pix_val_2	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
pix_val_3	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
pix_val_779	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
pix_val_780	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
pix_val_781	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
pix_val_782	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
pix_val_783	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0

	loss	accuracy	val_loss	val_accuracy
10	0.295	0.928	0.300	0.929
11	0.290	0.929	0.311	0.923
12	0.286	0.931	0.288	0.931
13	0.280	0.931	0.296	0.929
14	0.277	0.933	0.298	0.930