### `Prédiction des LAI, LEAF_LENGTH et LEAF_WIDTH`

1. Chargement des librairies et packages

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder,LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout

2. Chargement des données

In [7]:
# Installer openpyxl


# Charger les données
file_path = './Gombo_Observations_etiquettesV0.5.xlsx'
df = pd.read_excel(file_path, sheet_name="Etiquettes_2 (2)")

3. Analyse exploratoire et descriptive

In [8]:
#Aperçu 

df.head(10)

Unnamed: 0,Acquisition Session,DAS,Treatment,LEAF_LENGTH,LEAF_WIDTH,LAI,LEAF_NUMBER,PLANT_HEIGHT,STEM_DIAMETER,RF_U,...,PSRI,ARI,dBE,BEIP,Sum_dBE,dYE,Sum_dYE,PhRI,TVI,RVSI
0,AS1,8,T1,15,15,1.6875,2.0,70.0,1.0,0.941167,...,0.011238,98.736618,0.000181,520.48,0.02366,-8e-06,-0.012776,0.09421,12.518276,0.01914
1,AS1,8,T1,12,12,1.08,2.0,100.0,1.0,0.744238,...,0.021673,14.671596,0.000109,522.74,0.01406,5e-06,-0.005068,0.016501,11.297272,0.016846
2,AS1,8,T1,18,18,2.43,2.0,80.0,1.0,0.8512,...,0.00955,129.747527,0.000153,521.51,0.019626,8e-06,-0.009159,0.120737,12.832241,0.01881
3,AS1,8,T1,18,15,2.025,2.0,120.0,1.0,0.941167,...,0.011238,98.736618,0.000181,520.48,0.02366,-8e-06,-0.012776,0.09421,12.518276,0.01914
4,AS9,40,T1,140,143,150.15,10.0,430.0,5.0,0.902224,...,0.023126,18.993673,0.000336,522.13,0.04089,-8e-06,-0.023103,0.05428,14.36723,0.015359
5,AS9,40,T1,140,145,152.25,8.0,540.0,5.0,0.966109,...,0.010328,75.801243,0.000246,520.89,0.031391,-1.4e-05,-0.018244,0.101764,14.336211,0.019098
6,AS9,40,T1,103,97,74.9325,8.0,540.0,5.0,0.820931,...,0.004772,34.753011,0.000424,521.3,0.052414,-2.6e-05,-0.031791,0.104747,17.487941,0.015643
7,AS9,40,T1,95,91,64.8375,10.0,430.0,5.5,0.902224,...,0.023126,18.993673,0.000336,522.13,0.04089,-8e-06,-0.023103,0.05428,14.36723,0.015359
8,AS9,40,T1,55,61,25.1625,9.0,590.0,6.0,0.966109,...,0.010328,75.801243,0.000246,520.89,0.031391,-1.4e-05,-0.018244,0.101764,14.336211,0.019098
9,AS9,40,T1,55,47,19.3875,9.0,550.0,6.0,0.543862,...,0.009123,6.972464,0.000831,520.68,0.102649,-4.8e-05,-0.060393,0.06502,18.61482,-0.002932


In [9]:
#Dimension
df.shape

(2048, 118)

In [10]:
#Type des colonnes
df.dtypes

Acquisition Session     object
DAS                      int64
Treatment               object
LEAF_LENGTH              int64
LEAF_WIDTH               int64
                        ...   
dYE                    float64
Sum_dYE                float64
PhRI                   float64
TVI                    float64
RVSI                   float64
Length: 118, dtype: object

* On souhaite avoir les colonnes de types `object` afin de les encoder avant la mise en place du modèle

In [11]:
# Extraire les colonnes de type object
object_columns = df.select_dtypes(include=['object'])

# Afficher les colonnes de type object
print("Colonnes de type object :")
print(object_columns.columns.tolist())

# Optionnel : Afficher un aperçu des données
print("\nAperçu des colonnes de type object :")
print(object_columns.head())

Colonnes de type object :
['Acquisition Session', 'Treatment']

Aperçu des colonnes de type object :
  Acquisition Session Treatment
0                 AS1        T1
1                 AS1        T1
2                 AS1        T1
3                 AS1        T1
4                 AS9        T1


* Valeurs manquantes

In [12]:
# Identifier les colonnes avec des valeurs manquantes
missing_columns = df.columns[df.isnull().any()]

# Afficher les colonnes avec valeurs manquantes et leur nombre
print("Colonnes avec des valeurs manquantes :")
for col in missing_columns:
    print(f"{col}: {df[col].isnull().sum()} valeur(s) manquante(s)")


Colonnes avec des valeurs manquantes :
LEAF_NUMBER: 1 valeur(s) manquante(s)
PLANT_HEIGHT: 1 valeur(s) manquante(s)
STEM_DIAMETER: 1 valeur(s) manquante(s)


* Imputation

In [13]:
# Imputer les valeurs manquantes par la moyenne de chaque colonne
for col in missing_columns:
    mean_value = df[col].mean()  # Calculer la moyenne
    df[col].fillna(mean_value, inplace=True)  # Remplacer les NaN par la moyenne

# Vérifier qu'il n'y a plus de valeurs manquantes
print("Nombre total de valeurs manquantes après imputation :")
print(df.isnull().sum().sum())

Nombre total de valeurs manquantes après imputation :
0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(mean_value, inplace=True)  # Remplacer les NaN par la moyenne


In [14]:
#Voir les repartitions des colonnes
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
DAS,2048.0,40.417969,14.238018,8.000000,28.000000,40.000000,52.000000,64.000000
LEAF_LENGTH,2048.0,82.447754,53.990764,2.000000,35.000000,78.000000,125.000000,230.000000
LEAF_WIDTH,2048.0,80.851562,54.087803,2.000000,33.000000,75.000000,125.000000,225.000000
LAI,2048.0,71.767665,76.225798,0.030000,8.662500,45.000000,119.062500,388.125000
LEAF_NUMBER,2048.0,10.219834,2.874264,2.000000,8.000000,11.000000,13.000000,15.000000
...,...,...,...,...,...,...,...,...
dYE,2048.0,-0.000070,0.000067,-0.000242,-0.000109,-0.000067,-0.000033,0.000312
Sum_dYE,2048.0,-0.221808,0.083847,-0.497871,-0.270262,-0.224214,-0.176880,-0.005068
PhRI,2048.0,0.073371,0.033536,0.010582,0.046009,0.072678,0.107873,0.125578
TVI,2048.0,50.878909,11.533686,9.476849,45.309731,54.614441,59.690544,67.000143


4. Modélisation

In [15]:
#Encodage : Appliquer Label Encoding sur chaque colonne catégorique
label_encoder = LabelEncoder()
for col in object_columns:
    df[col] = label_encoder.fit_transform(df[col])

df.head(5)

Unnamed: 0,Acquisition Session,DAS,Treatment,LEAF_LENGTH,LEAF_WIDTH,LAI,LEAF_NUMBER,PLANT_HEIGHT,STEM_DIAMETER,RF_U,...,PSRI,ARI,dBE,BEIP,Sum_dBE,dYE,Sum_dYE,PhRI,TVI,RVSI
0,0,8,0,15,15,1.6875,2.0,70.0,1.0,0.941167,...,0.011238,98.736618,0.000181,520.48,0.02366,-8e-06,-0.012776,0.09421,12.518276,0.01914
1,0,8,0,12,12,1.08,2.0,100.0,1.0,0.744238,...,0.021673,14.671596,0.000109,522.74,0.01406,5e-06,-0.005068,0.016501,11.297272,0.016846
2,0,8,0,18,18,2.43,2.0,80.0,1.0,0.8512,...,0.00955,129.747527,0.000153,521.51,0.019626,8e-06,-0.009159,0.120737,12.832241,0.01881
3,0,8,0,18,15,2.025,2.0,120.0,1.0,0.941167,...,0.011238,98.736618,0.000181,520.48,0.02366,-8e-06,-0.012776,0.09421,12.518276,0.01914
4,14,40,0,140,143,150.15,10.0,430.0,5.0,0.902224,...,0.023126,18.993673,0.000336,522.13,0.04089,-8e-06,-0.023103,0.05428,14.36723,0.015359


In [16]:
# Variables cibles (outputs) et entrées (features)
target_columns = ["LAI", "LEAF_LENGTH", "LEAF_WIDTH"]
X = df.drop(columns=target_columns)
y = df[target_columns]

In [17]:
# Normalisation des données
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y)

In [18]:
# Séparer en jeu d'entraînement et de test
seed = 2025
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=seed)

# Reshaper pour le RNN
X_train_rnn = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_rnn = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

In [19]:
# Construire le modèle RNN
model = Sequential()
model.add(SimpleRNN(64, activation='relu', input_shape=(1, X_train.shape[1])))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='linear'))  # 3 sorties pour les cibles LAI, LEAF_LENGTH, LEAF_WIDTH

  super().__init__(**kwargs)


In [20]:
model.summary()

In [21]:
# Compiler le modèle
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [22]:
# Entraîner le modèle
history = model.fit(X_train_rnn, y_train, epochs=100, batch_size=16, validation_split=0.2)

Epoch 1/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - loss: 1.2795 - mae: 0.9209 - val_loss: 0.9839 - val_mae: 0.8213
Epoch 2/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.9858 - mae: 0.8268 - val_loss: 0.9061 - val_mae: 0.7804
Epoch 3/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.8560 - mae: 0.7535 - val_loss: 0.8505 - val_mae: 0.7463
Epoch 4/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.7511 - mae: 0.7014 - val_loss: 0.8175 - val_mae: 0.7237
Epoch 5/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 0.7714 - mae: 0.7012 - val_loss: 0.8269 - val_mae: 0.7261
Epoch 6/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0.7173 - mae: 0.6744 - val_loss: 0.8027 - val_mae: 0.7081
Epoch 7/100
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - l

In [23]:
# Évaluer le modèle
loss, mae = model.evaluate(X_test_rnn, y_test)
print(f"Test Loss: {loss}, Test MAE: {mae}")

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.7929 - mae: 0.6794
Test Loss: 0.8367147445678711, Test MAE: 0.7008740901947021


In [24]:
# Faire des prédictions
y_pred_scaled = model.predict(X_test_rnn)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step


In [25]:
# Revenir aux échelles d'origine pour les prédictions
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_test_original = scaler_y.inverse_transform(y_test)

# Afficher quelques résultats de prédiction
for i in range(5):
    print(f"Réel : {y_test_original[i]}, Prédit : {y_pred[i]}")

Réel : [ 79.5675 103.     103.    ], Prédit : [53.2626   74.781944 73.22886 ]
Réel : [25.65 57.   60.  ], Prédit : [74.42056  93.632545 90.01536 ]
Réel : [58.725 87.    90.   ], Prédit : [52.98877 74.32013 72.80621]
Réel : [ 94.08 112.   112.  ], Prédit : [ 93.58572  101.365715 100.21432 ]
Réel : [28.8 64.  60. ], Prédit : [81.64985  92.20723  91.708664]
