# Predictive Modeling Project EPF
## Eurosat terrain classification

In [3]:
import os
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

### Exploring the dataset

In [4]:
# Load the EuroSAT dataset from TFDS and split train and test sets
train_ds, test_ds , val_ds= tfds.load('eurosat/rgb', split=['train[:60%]', 'train[60%:80%]', 'train[80%:]'], shuffle_files=True, as_supervised=True)

2022-12-17 09:57:51.638811: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-12-17 09:57:51.639054: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-17 09:57:51.639122: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2022-12-17 09:57:51.639165: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2022-12-17 09:57:51.640856: W tensorflow/c

In [5]:
# Print the number of samples in each set
print('Number of training samples: ', tf.data.experimental.cardinality(train_ds).numpy())
print('Number of validation samples: ', tf.data.experimental.cardinality(val_ds).numpy())
print('Number of test samples: ', tf.data.experimental.cardinality(test_ds).numpy())


Number of training samples:  16200
Number of validation samples:  5400
Number of test samples:  5400


In [6]:
# show some images from the dataset
import matplotlib.pyplot as plt
%matplotlib inline

fig = plt.figure(figsize=(10, 10))
for i, (image, label) in enumerate(train_ds.take(9)):
    ax = fig.add_subplot(3, 3, i + 1, xticks=[], yticks=[])
    ax.imshow(image.numpy().astype("uint8"))
    ax.set_title(label.numpy())

ImportError: cannot import name '_imaging' from 'PIL' (/usr/lib/python3/dist-packages/PIL/__init__.py)

In [7]:
# show shape of the 5 first images
for image, label in train_ds.take(5):
    print(image.numpy().shape)
    

(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)
(64, 64, 3)


2022-12-17 09:58:38.556014: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [8]:
# Show all the unique labels in the dataset
labels = []
for _, label in train_ds:
    labels.append(label.numpy())
labels = np.unique(labels)
print(labels)


[0 1 2 3 4 5 6 7 8 9]


The fact that the labels are just numbers is no a probleme we can always translate them back to the original labels. The dataset is available at https://www.tensorflow.org/datasets/catalog/eurosat.

### Data preprocessing
For the preprocessing we will use the following steps:
- Resize the images to 64x64
- Normalize the images
- Repeat for all the datasets

At the moment no need for data augmentation.

In [9]:
# Define the preprocessing sequencial model
from tensorflow.keras import layers
from tensorflow.keras import models

preprocess_input = models.Sequential([
    layers.experimental.preprocessing.Rezise(32, 32),
    layers.experimental.preprocessing.Rescaling(1./255),
])
    


AttributeError: module 'keras.api._v2.keras.layers.experimental.preprocessing' has no attribute 'Rezise'

In [10]:
# Define the model
model = models.Sequential([
    preprocess_input,
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

NameError: name 'preprocess_input' is not defined

In [None]:
# show the model summary
model.summary()

In [None]:
# Compile the model
model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
                

In [None]:
# Train the model
history = model.fit(train_ds, epochs=10, validation_data=val_ds)

In [None]:
# Evaluate the model
test_loss, test_acc = model.evaluate(test_ds)
print('Test accuracy: ', test_acc)


In [None]:
# Plot the training and validation accuracy and loss
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
# Show the confusion matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns

y_pred = []
y_true = []
for image, label in test_ds:
    y_pred.append(np.argmax(model.predict(image[tf.newaxis, ...])))
    y_true.append(label.numpy())

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 10))
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
