NOTE: THIS IS FOR REFERENCE, NOT FOR USE
# Experiment with the mnist fashion data set

It is very similar to the mnist digit data set.  It has 10 categories of black and white images. 

* 0 = t-shirt
* 1 = trouser
* 2 = pullover
* 3 = dress
* 4 = coat
* 5 = sandal
* 6 = shirt
* 7 = sneaker
* 8 = bag
* 9 = ankle boot

In [1138]:
from aitk.utils import gallery, array_to_image
from aitk.networks import Network

import tensorflow
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.utils import to_categorical

# Get the data
* Download the data
* Explore what you have

In [1139]:
(train_x, train_y), (test_x, test_y) = fashion_mnist.load_data()

In [1140]:
train_x.shape

(60000, 28, 28)

In [1141]:
test_x.shape

(10000, 28, 28)

In [1142]:
images = [array_to_image(train_x[i]) for i in range(10)]
bigger = [image.resize((100,100)) for image in images]

In [1143]:
gallery(bigger, gallery_shape=(5,2), labels=train_y[:10])

0,1,2,3,4
9,0,0,3,0
2,7,2,5,5


In [1144]:
train_y[:20]

array([9, 0, 0, 3, 0, 2, 7, 2, 5, 5, 0, 9, 5, 5, 7, 9, 1, 0, 6, 4],
      dtype=uint8)

# Prepare the data for the network
* You may need to normalize the inputs so that they are in the range [0,1].
* You may need to convert the targets so that they are represented as one-hot vectors when you are doing categorization.

### Input data

In [1145]:
min_input = train_x.min()
max_input = train_x.max()
print("range of input values is:", min_input, max_input)

range of input values is: 0 255


In [1146]:
train_x_normalized = train_x/255

In [1147]:
test_x_normalized = test_x/255

### Data sent into a Conv2D layer must have a depth
* This may require you to do a reshape command.
* For these black and white images there is only one channel of information.
* For color images there are typically 3 channels (Red, Green, Blue)

In [1148]:
train_x_normalized = train_x_normalized.reshape(60000,28,28,1)
test_x_normalized = test_x_normalized.reshape(10000,28,28,1)

### Target data

In [1149]:
num_categories = len(set(train_y))

In [1150]:
train_y_category = to_categorical(train_y, num_categories)

In [1151]:
test_y_category = to_categorical(test_y, num_categories)

In [1152]:
train_y_category[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.], dtype=float32)

# Construct the model

This is just one possible configuration of layers to learn the data set.  You should experiment with the number of filters, the filter size, and the layers themselves. However, be sure to leave the output layer's activation function as `softmax`.

You should explicitly name each layer so that it will be easier to inspect them later. Use a clear naming convention.  For example the first convolution layer might be "conv1", the next "conv2", and so on.

In [1153]:
neural_net = Sequential()
neural_net.add(Conv2D(20,(2,2),name="conv1", activation="relu",input_shape=(28,28,1)))
neural_net.add(Conv2D(25,(3,3),name="conv2", activation="relu",padding = 'same'))
neural_net.add(MaxPooling2D(pool_size=(2,2),name="pool1"))
neural_net.add(Conv2D(30,(4,4),name="conv3", activation="relu", padding = 'same'))
neural_net.add(Dropout(.1))
neural_net.add(Conv2D(35,(5,5),name="conv4", activation="relu",padding = 'same'))
neural_net.add(MaxPooling2D(pool_size=(3,5),name="pool2"))
neural_net.add(Conv2D(30,(5,5),name="conv5", activation="relu",padding = 'same'))
neural_net.add(Flatten(name="flatten"))
neural_net.add(Dense(20, name="hidden1", activation='relu'))
neural_net.add(Dropout(.2))
neural_net.add(Dense(10, name="output", activation='softmax'))
neural_net.summary()

Model: "sequential_66"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1 (Conv2D)              (None, 27, 27, 20)        100       
                                                                 
 conv2 (Conv2D)              (None, 27, 27, 25)        4525      
                                                                 
 pool1 (MaxPooling2D)        (None, 13, 13, 25)        0         
                                                                 
 conv3 (Conv2D)              (None, 13, 13, 30)        12030     
                                                                 
 dropout_44 (Dropout)        (None, 13, 13, 30)        0         
                                                                 
 conv4 (Conv2D)              (None, 13, 13, 35)        26285     
                                                                 
 pool2 (MaxPooling2D)        (None, 4, 2, 35)        

# Compile the model

For categorical data like handwritten digits we typically use a loss function called `categorical_crossentropy` and should be used in conjunction with the `softmax` activation function at the output layer.  This interprets the outputs as representing probabilities and forces them to sum to 1.0.  Thus the output from the network will reflect how likely it considers a particular input to be a member of one of the output categories.

In [1154]:
neural_net.compile(optimizer="SGD", loss="categorical_crossentropy",
                   metrics=['accuracy'])

# Create an aitk Network
This allows us to do more visualization of what is happening inside the network.

In [1155]:
net = Network(neural_net)

In [1156]:
from time import sleep
for i in range(10):
    net.propagate(train_x_normalized[i])
    net.display(train_x_normalized[i])
    sleep(1.0)

# Train the model

In [1157]:
history = net.fit(train_x_normalized, 
                  train_y_category, 
                  verbose=1,
                  validation_data=(test_x_normalized, 
                                   test_y_category),
                  epochs=25)

Epoch 25/25 loss: 0.26620590686798096 - accuracy: 0.9040833115577698 - val_loss: 0.2667994797229767 - val_accuracy: 0.9042999744415283


# Inspect the feature maps

We can ask the network to propagate to specific layers and inspect the representations created there to try to understand how it has solved the problem. First we need to set up the color maps to display properly by finding the maximum possible value using a particular test image.

In [1158]:
test_image = 100 # change this to see a different image 

In [1159]:
from math import ceil
for layer in ["conv1", "pool1"]:
    data = [net.propagate_to(test_x_normalized[test_image], layer, channel=channel)
            for channel in range(8)]
    largest = max([sublist.max() for sublist in data])
    net.set_config_layer(layer, colormap=("gray", 0, ceil(largest)))

In [1160]:
images = [net.propagate_to(test_x_normalized[test_image], "conv1", "image", channel=channel)
         for channel in range(8)]
original = test_x_normalized[test_image]
bigger = [image.resize((200,200),resample=0) for image in images]
gallery([original] + bigger, labels="channel{index}", gallery_shape=(9,1))

0,1,2,3,4,5,6,7,8
channel0,channel1,channel2,channel3,channel4,channel5,channel6,channel7,channel8


In [1161]:
images = [net.propagate_to(test_x_normalized[test_image], "pool1", "image", channel=channel)
         for channel in range(8)]
original = test_x_normalized[test_image]
bigger = [image.resize((200,200),resample=0) for image in images]
gallery([original] + bigger, labels="channel{index}", gallery_shape=(9,1))

0,1,2,3,4,5,6,7,8
channel0,channel1,channel2,channel3,channel4,channel5,channel6,channel7,channel8


# Examine the results
Check which inputs the network is getting wrong. Recall that there are 10 thousand test images.

In [1162]:
from numpy import argmax
outputs = net.predict(test_x_normalized)
answers = [argmax(output) for output in outputs]
targets = [argmax(target) for target in test_y_category]

In [1163]:
incorrect = [i for i in range(len(answers)) if answers[i] != targets[i]]
len(incorrect)

957

In [1164]:
missed_target = [targets[i] for i in incorrect]
wrong_answer = [answers[i] for i in incorrect]

Let's find out which target classifications are most frequently missed.

In [1165]:
from collections import Counter

In [1166]:
t_ctr = Counter(missed_target)
t_ctr.most_common()

[(6, 291),
 (4, 158),
 (0, 134),
 (2, 126),
 (3, 96),
 (9, 50),
 (1, 33),
 (7, 30),
 (8, 20),
 (5, 19)]

Let's find out which wrong classification is most frequently given.

In [1167]:
a_ctr = Counter(wrong_answer)
a_ctr.most_common()

[(6, 228),
 (2, 192),
 (4, 181),
 (0, 148),
 (3, 84),
 (7, 58),
 (9, 25),
 (5, 21),
 (8, 18),
 (1, 2)]

In [1168]:
images = [array_to_image(test_x[index]) for index in incorrect]
label = [str(missed_target[i]) + "," + str(wrong_answer[i]) for i in range(len(wrong_answer))]
gallery(images, labels=label)

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
75,42,95,42,60,30,79,26,26,24,34,97,62,42,97,63,43,26,13,24,26,42,62,42,60,62,97,46.0,62.0,6.0,6.0
43,6,20,46,97,64,6,2,24,30,60,36,80,42,64,46,34,60,36,42,26,36,82,42,42,85,26,6.0,43.0,36.0,42.0
13,63,64,20,6,26,13,79,42,42,62,24,62,6,3,57,46,62,6,64,6,6,46,62,62,34,64,6.0,60.0,24.0,60.0
30,42,6,4,64,57,46,60,46,62,3,64,6,46,60,43,64,62,26,63,46,6,60,6,64,24,24,60.0,85.0,6.0,42.0
42,34,36,42,24,46,46,60,46,26,84,46,62,97,13,6,62,26,97,62,26,6,6,60,26,24,97,64.0,68.0,10.0,34.0
6,6,62,24,62,60,34,43,46,34,46,6,46,86,6,46,64,34,97,60,3,57,97,13,24,36,83,13.0,24.0,60.0,62.0
79,57,24,42,26,34,43,34,60,26,24,34,42,64,60,62,34,59,24,24,60,24,6,64,30,97,32,42.0,24.0,62.0,62.0
46,16,60,60,64,26,46,60,64,83,13,6,46,62,62,6,34,42,6,97,64,26,64,34,6,60,6,24.0,79.0,8.0,62.0
46,46,2,63,34,2,6,62,62,46,36,30,2,30,26,39,13,62,79,42,43,34,32,60,62,60,62,8.0,34.0,34.0,97.0
6,79,34,46,24,97,60,62,13,2,26,79,30,62,34,75,83,32,60,46,63,60,43,97,43,34,2,26.0,62.0,46.0,46.0
