# Présentation

Ce code permet la création d'un modèle basé sur `RESNET50` pour classifier SA = Summer Autumn, WS = Winter Spring

# Importation

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, Input, Flatten, Reshape, ConvLSTM2D, MaxPooling2D, Dropout
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model

import numpy as np
import pandas as pd
from datetime import datetime, timedelta
# fix random seed for reproducibility
tf.random.set_seed(7)

# Notebook
from IPython.display import clear_output
clear_output()

# Importation des données

Tout ce qui est d'$indice_1$ désigne les données de validation et d'$indice_2$ désigne les données d'entraînement.

In [2]:
dateparse = lambda x: datetime.strptime(x, '%d-%m-%Y')
df1 = pd.read_csv("Challenge_2_submission_template.csv", parse_dates=['Date of Harvest'], date_parser=dateparse)
df2 = pd.read_csv("Crop_Yield_Data_challenge_2.csv", parse_dates=['Date of Harvest'], date_parser=dateparse)

In [3]:
# Les données dans cette cellule permet de normaliser les entrées entre [0, 1]
borne = [
    (-0.0440750, 1.1379025),
    (-0.0215250, 1.1155175),
    (-0.1473925, 1.13914),
    (0.04604250, 1.2140225),
    (21762.0000, 56916.0),
    (293.0, 48423.0)
]
mini = np.array([
    -0.0440750,
    -0.0215250,
    -0.1473925,
    0.04604250,
    21762.0000,
    293.0,
])
multi = np.array([
    1.1819775, 
    1.1370425, 
    1.2865325, 
    1.16798, 
    35154.0, 
    48130.0
])

In [4]:
# Validation
x1 = []
y1 = []
for i in range(len(df1)):
    with open(f"inputs_lstm_val/{i}.npy", "rb") as f:
        arr = np.load(f)
        arr = np.transpose(arr, (0, 2, 3, 1))
        arr = (arr-mini)/multi
        x1.append(arr)
    if df1.loc[i, "Season(SA = Summer Autumn, WS = Winter Spring)"] == "SA":
        y1.append([1, 0])
    else:
        y1.append([0, 1])
x1 = pad_sequences(x1, dtype="float", maxlen=32)
y1 = np.array(y1)

In [5]:
# Entrainement
x2 = []
y2 = []
for i in range(len(df2)):
    with open(f"inputs_lstm/{i}.npy", "rb") as f:
        arr = np.load(f)
        arr = np.transpose(arr, (0, 2, 3, 1))
        arr = (arr-mini)/multi
        x2.append(arr)
    if df2.loc[i, "Season(SA = Summer Autumn, WS = Winter Spring)"] == "SA":
        y2.append([1, 0])
    else:
        y2.append([0, 1])
x2 = pad_sequences(x2, dtype="float", maxlen=32)
y2 = np.array(y2)

# Création du modèle

In [6]:
# Création du modèle
model = Sequential()
# Ajout des couches d'entrée et de convolution récurrente
model.add(Input(shape=(None,100, 100, 6)))
model.add(ConvLSTM2D(filters=3, kernel_size=3, return_sequences=False))
# Ajout du modèle RESNET50 pré-entrainé
pretrained = ResNet50(
    include_top=False, 
    input_shape=model.output_shape[1:]
)
for layer in pretrained.layers:
    layer.trainable = False
model.add(pretrained)
# Ajout des couches de sorties
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(32))
model.add(Dense(16))
model.add(Dropout(0.4))
model.add(Dense(2, activation="softmax"))

clear_output()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv_lstm2d (ConvLSTM2D)    (None, 98, 98, 3)         984       
                                                                 
 resnet50 (Functional)       (None, 4, 4, 2048)        23587712  
                                                                 
 max_pooling2d (MaxPooling2D  (None, 2, 2, 2048)       0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 8192)              0         
                                                                 
 dense (Dense)               (None, 32)                262176    
                                                                 
 dense_1 (Dense)             (None, 16)                528       
                                                        

# Entrainement du modèle

In [7]:
# compilation du modèle
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
# Enregistrement du meilleur modèle au cours de l'entrainement
checkpoint = ModelCheckpoint(
    filepath='models_finaux/classification/{epoch:02d}-{val_accuracy:.2f}.h5', 
    monitor="val_accuracy", mode="max", 
    save_best_only=True, verbose=1
)
callbacks = [checkpoint]

## Entrainement

In [9]:
# Environ 5 min d'entrainement par génération
# Il a fallu 14 génération pour atteindre un modèle correct
history = model.fit(
    x2, y2, 
    shuffle=True,
    batch_size=16, 
    validation_data = (x1, y1),
    epochs=3, callbacks=callbacks
)
# Les données ci-dessous ne sont pas représentatif du vrai entrainement fait sur une VM pendant plusieures heures

Epoch 1/3
Epoch 1: val_accuracy improved from -inf to 0.66667, saving model to model/classification/01-0.67.h5
Epoch 2/3
Epoch 2: val_accuracy did not improve from 0.66667
Epoch 3/3
Epoch 3: val_accuracy did not improve from 0.66667
