<a href="https://colab.research.google.com/github/zerotodeeplearning/ztdl-masterclasses/blob/master/solutions_do_not_open/Neural_Networks_with_Keras_solution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Learn with us: www.zerotodeeplearning.com

Copyright © 2021: Zero to Deep Learning ® Catalit LLC.

In [None]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Neural Networks with Keras

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

## Shallow and Deep Networks

In [None]:
from sklearn.datasets import make_moons

In [None]:
X, y = make_moons(n_samples=1000, noise=0.1, random_state=0)

sns.scatterplot(X[:, 0], X[:, 1], hue=y);

In [None]:
X.shape

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.3,
                                                    random_state=0)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.losses import BinaryCrossentropy, SparseCategoricalCrossentropy

### Shallow Model

In [None]:
model = Sequential([
    Dense(1, input_shape=(2,))
])

model.compile(optimizer=Adam(learning_rate=0.05),
              loss=BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
h = model.fit(X_train, y_train, epochs=50,
              verbose=0, validation_split=0.1)

In [None]:
pd.DataFrame(h.history).plot();

In [None]:
def plot_decision_boundary(model, X, y):
    amin, bmin = X.min(axis=0) - 0.1
    amax, bmax = X.max(axis=0) + 0.1
    hticks = np.linspace(amin, amax, 101)
    vticks = np.linspace(bmin, bmax, 101)
    
    aa, bb = np.meshgrid(hticks, vticks)
    ab = np.c_[aa.ravel(), bb.ravel()]
    
    c = model.predict(ab)
    cc = c.reshape(aa.shape)

    plt.figure(figsize=(12, 8))
    plt.contourf(aa, bb, cc, cmap='bwr', alpha=0.2)
    sns.scatterplot(X[:, 0], X[:, 1], hue=y);

In [None]:
plot_decision_boundary(model, X, y)

In [None]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

In [None]:
accuracy

### Exercise 1: Deep model

The model above was not able to perfectly classify the data. Build a deep model with at least 1 or 2 hidden layers and re-train it on the data. You should be able to obtain 100% accuracy. Remember to include the activation function in the definition of each layer.

- Define a model
- Compile the model
- Fit the model
- Plot the training history
- Plot the decision boundary
- Compare the model performance on training and test set
- Print the confusion matrix for the test set (bonus points if you make it pretty)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
model = Sequential([
    Dense(4, input_shape=(2,), activation='tanh'),
    Dense(2, activation='tanh'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(learning_rate=0.01),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
h = model.fit(X_train, y_train, epochs=100,
              verbose=0, validation_split=0.1)

In [None]:
pd.DataFrame(h.history).plot();

In [None]:
plot_decision_boundary(model, X, y)

In [None]:
y_train_pred = np.argmax(model.predict(X_train),axis=1)
y_test_pred = np.argmax(model.predict(X_test),axis=1)

print("The Accuracy score on the Train set is:\t{:0.3f}".format(accuracy_score(y_train, y_train_pred)))
print("The Accuracy score on the Test set is:\t{:0.3f}".format(accuracy_score(y_test, y_test_pred)))

In [None]:
cm = confusion_matrix(y_test, y_test_pred)
pd.DataFrame(cm)

## Multiclass classification with Images

In [None]:
from tensorflow.keras.datasets import fashion_mnist

In [None]:
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train, X_test = X_train / 255.0, X_test / 255.0

In [None]:
label_description = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot"
]

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
plt.figure(figsize=(10, 10))
for i in range(16):
    plt.subplot(4, 4, i+1)
    plt.imshow(X_train[i], cmap='gray')
    plt.title(label_description[y_train[i]])
    plt.axis('off')

In [None]:
model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])

model.compile('adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
h = model.fit(X_train, y_train, epochs=5, validation_split=0.1)

In [None]:
pd.DataFrame(h.history).plot();

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_pred[:5]

In [None]:
y_test

In [None]:
y_pred_class = np.argmax(y_pred, axis=-1)

In [None]:
y_pred_class

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test, y_pred_class, target_names=label_description))

In [None]:
cm = confusion_matrix(y_test, y_pred_class)
df = pd.DataFrame(cm, index=label_description, columns=label_description)
df

### Exercise 2: Convolutional networks and GPU

Use a convolutional model to improve the performance. Write a model like this one:

```python
model = Sequential([
    Reshape(target_shape=(28, 28, 1),
            input_shape=(28, 28)),
    Conv2D(# your code here),
    Conv2D(# your code here),
    MaxPooling2D(),
    Flatten(),
    Dense(# your code here),
    Dense(# your code here)
])
```

And train it on the data for 5 epochs. You should be able to bring the accuracy above 90%.
Bonus points if you figure out how to change Colab's `Notebook settings` to use GPU acceleration.

Remember to display the confusion matrix for the test set.

In [None]:
from tensorflow.keras.layers import Reshape, Conv2D, MaxPooling2D

In [None]:
model = Sequential([
    Reshape(target_shape=(28, 28, 1),
            input_shape=(28, 28)),
    Conv2D(32, (3, 3), activation='relu'),
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])

model.compile('adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
h = model.fit(X_train, y_train, epochs=3, validation_split=0.1)

In [None]:
pd.DataFrame(h.history).plot();

In [None]:
y_test_pred = np.argmax(model.predict(X_test),axis=1)


In [None]:
cm = confusion_matrix(y_test, y_test_pred)
df = pd.DataFrame(cm, index=label_description, columns=label_description)
df