#  Modèle de prédiction des bandes Sentinel-2 avant 2015 avec distance spatiale

In [None]:
#  Importation des bibliothèques nécessaires
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import mean_squared_error


In [None]:
#  Charger vos données

df = pd.read_csv('sentinel_results_improved_all.csv')  

df['Data_Year'] = df['Data_Year'].astype(int)
# print(df)
target_years = [2018]
bands = [f'B{i}' for i in range(2, 9)] + ['B8A', 'B11', 'B12']
years = list(range(2019, 2026))
df_target = df[df['Data_Year'].isin(target_years)].copy()
df_target['Target_Year'] = df_target['Data_Year']
df_target = df_target[['ProfileID', 'Target_Year'] + bands]
df = df[df['Data_Year'].isin(years)].copy()
df_target



Unnamed: 0,ProfileID,Target_Year,B2,B3,B4,B5,B6,B7,B8,B8A,B11,B12
0,AO SOTER_P.1/57,2018,1752.0,2710.0,3720.0,4120.5,4104.0,4287.0,4136.0,4282.5,5397.0,5177.0
8,AO SOTER_P.110c/60,2018,341.0,634.0,436.0,1178.0,2368.0,2548.0,2974.0,2937.0,2183.0,1208.0
16,AO SOTER_P.113c/60,2018,478.0,752.0,986.0,1162.0,1640.0,1837.0,1670.0,1997.0,2402.0,1572.0
31,AO SOTER_P.114/58,2018,586.0,863.0,877.0,1288.0,1914.0,2139.0,2188.0,2399.0,2722.0,1896.0
39,AO SOTER_P.104c/62,2018,1170.0,1352.5,1040.0,1644.0,3277.0,3858.5,3800.0,4053.0,2460.0,1407.0
...,...,...,...,...,...,...,...,...,...,...,...,...
45884,MW lrep_ZA6,2018,849.0,1106.0,916.0,1357.0,2460.0,2789.0,2784.0,2964.0,2381.0,1663.0
45920,MW lrep_ZA64,2018,2132.0,2188.0,2018.0,2540.0,3282.0,3473.0,3432.0,3587.0,3064.0,2642.0
45942,MW lrep_ZA7,2018,589.0,919.0,1192.0,1288.0,1638.0,1815.0,1754.0,1906.0,1921.0,1666.0
46048,MW lrep_ZA8,2018,1086.0,1372.0,1592.0,1956.0,2537.0,2681.0,2462.0,2837.0,2823.0,2465.0


In [None]:
#  Construire des séquences temporelles par profil
def build_sequence(group):
    seq = []
    for year in years:
        row = group[group['Data_Year'] == year]
        if not row.empty:
            seq.extend([row.iloc[0][b] for b in bands])
        else:
            seq.extend([np.nan] * len(bands))
    return pd.Series(seq, index=[f"{b}_{y}" for y in years for b in bands])

df_seq = df.groupby('ProfileID').apply(build_sequence).reset_index()
df_meta = df.groupby('ProfileID')[['Latitude', 'Longitude']].first().reset_index()
df_full = df_seq.merge(df_meta, on='ProfileID')
df_full.dropna(inplace=True)
df_full


  df_seq = df.groupby('ProfileID').apply(build_sequence).reset_index()


Unnamed: 0,ProfileID,B2_2019,B3_2019,B4_2019,B5_2019,B6_2019,B7_2019,B8_2019,B8A_2019,B11_2019,...,B4_2025,B5_2025,B6_2025,B7_2025,B8_2025,B8A_2025,B11_2025,B12_2025,Latitude,Longitude
0,AO SOTER_P.1/57,1713.0,2714.0,3738.0,4134.0,4108.0,4277.0,4102.0,4280.5,5360.5,...,3880.0,4339.5,4398.0,4494.0,4306.0,4525.5,5531.0,5162.0,-15.222598,12.161278
1,AO SOTER_P.101c/63,320.0,534.0,483.0,907.0,1631.0,1877.0,1760.0,2113.0,1697.0,...,356.0,1192.0,2994.0,3398.0,3560.0,3672.0,2280.0,1170.0,-10.950086,17.573093
2,AO SOTER_P.104c/62,461.5,599.5,613.5,892.0,1479.5,1705.5,1826.0,2025.0,1728.5,...,520.0,950.0,1469.0,1705.0,1886.0,1984.0,1967.0,1140.0,-10.078560,15.107436
3,AO SOTER_P.107/66,383.0,516.0,525.0,870.0,1394.0,1656.0,1788.0,1873.0,1756.0,...,742.0,1178.0,1828.0,2113.0,2198.0,2397.0,2609.0,1779.0,-14.194014,17.573781
4,AO SOTER_P.110c/60,489.5,725.0,835.5,1122.5,1482.0,1589.0,1653.0,1786.0,2109.0,...,380.0,930.0,2064.0,2399.0,2356.0,2646.0,1682.0,880.0,-6.152014,14.635114
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6040,MW lrep_ZA92,574.5,770.5,947.5,1220.5,1600.0,1775.0,1729.0,2018.5,2416.0,...,1096.0,1318.5,2440.0,2868.5,3166.0,3207.0,2354.5,1397.5,-15.360000,35.050000
6041,MW lrep_ZA93,470.5,799.0,890.0,1343.0,1870.5,2096.0,2315.0,2445.5,2809.5,...,760.0,1238.5,2221.0,2535.0,2529.0,2781.0,2540.5,1646.5,-15.360000,35.060000
6042,MW lrep_ZA94,561.0,778.0,1102.0,1393.0,1829.0,2035.0,2204.0,2388.0,3135.0,...,1262.0,1827.0,2725.0,2964.0,2988.0,3058.0,3091.0,2371.0,-15.290000,35.220000
6043,MW lrep_ZA95,435.5,688.0,659.5,1108.5,1660.5,1869.0,1946.0,2125.5,2186.0,...,519.5,1063.5,2511.0,3008.5,2924.0,3253.5,2158.0,1145.0,-15.280000,35.190000


In [None]:
#  Ajouter la distance moyenne aux 5 voisins
coords = df_full[['Latitude', 'Longitude']].values
nbrs = NearestNeighbors(n_neighbors=5).fit(coords)
distances, _ = nbrs.kneighbors(coords)
df_full['mean_distance'] = distances[:, 1:].mean(axis=1)
df_full 


Unnamed: 0,ProfileID,B2_2019,B3_2019,B4_2019,B5_2019,B6_2019,B7_2019,B8_2019,B8A_2019,B11_2019,...,B5_2025,B6_2025,B7_2025,B8_2025,B8A_2025,B11_2025,B12_2025,Latitude,Longitude,mean_distance
0,AO SOTER_P.1/57,1713.0,2714.0,3738.0,4134.0,4108.0,4277.0,4102.0,4280.5,5360.5,...,4339.5,4398.0,4494.0,4306.0,4525.5,5531.0,5162.0,-15.222598,12.161278,0.475891
1,AO SOTER_P.101c/63,320.0,534.0,483.0,907.0,1631.0,1877.0,1760.0,2113.0,1697.0,...,1192.0,2994.0,3398.0,3560.0,3672.0,2280.0,1170.0,-10.950086,17.573093,0.375958
2,AO SOTER_P.104c/62,461.5,599.5,613.5,892.0,1479.5,1705.5,1826.0,2025.0,1728.5,...,950.0,1469.0,1705.0,1886.0,1984.0,1967.0,1140.0,-10.078560,15.107436,0.522964
3,AO SOTER_P.107/66,383.0,516.0,525.0,870.0,1394.0,1656.0,1788.0,1873.0,1756.0,...,1178.0,1828.0,2113.0,2198.0,2397.0,2609.0,1779.0,-14.194014,17.573781,1.049642
4,AO SOTER_P.110c/60,489.5,725.0,835.5,1122.5,1482.0,1589.0,1653.0,1786.0,2109.0,...,930.0,2064.0,2399.0,2356.0,2646.0,1682.0,880.0,-6.152014,14.635114,0.353583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6040,MW lrep_ZA92,574.5,770.5,947.5,1220.5,1600.0,1775.0,1729.0,2018.5,2416.0,...,1318.5,2440.0,2868.5,3166.0,3207.0,2354.5,1397.5,-15.360000,35.050000,0.026304
6041,MW lrep_ZA93,470.5,799.0,890.0,1343.0,1870.5,2096.0,2315.0,2445.5,2809.5,...,1238.5,2221.0,2535.0,2529.0,2781.0,2540.5,1646.5,-15.360000,35.060000,0.027325
6042,MW lrep_ZA94,561.0,778.0,1102.0,1393.0,1829.0,2035.0,2204.0,2388.0,3135.0,...,1827.0,2725.0,2964.0,2988.0,3058.0,3091.0,2371.0,-15.290000,35.220000,0.035285
6043,MW lrep_ZA95,435.5,688.0,659.5,1108.5,1660.5,1869.0,1946.0,2125.5,2186.0,...,1063.5,2511.0,3008.5,2924.0,3253.5,2158.0,1145.0,-15.280000,35.190000,0.038100


In [None]:
#  Créer le dataset final avec Target_Year
records = []
for y in target_years:
    df_y = df_target[df_target['Target_Year'] == y].copy()
    df_merged = df_full.merge(df_y[['ProfileID'] + bands], on='ProfileID')
    df_merged['Target_Year'] = y
    records.append(df_merged)

df_model = pd.concat(records, ignore_index=True)


In [None]:
#  Séparer les features et la cible
X = df_model.drop(columns=['ProfileID', 'Latitude', 'Longitude'] + bands)
Y = df_model[bands]
print(X)
print(Y)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


          B2_2019      B3_2019      B4_2019      B5_2019      B6_2019  \
0     1713.000000  2714.000000  3738.000000  4134.000000  4108.000000   
1      461.500000   599.500000   613.500000   892.000000  1479.500000   
2      383.000000   516.000000   525.000000   870.000000  1394.000000   
3      489.500000   725.000000   835.500000  1122.500000  1482.000000   
4      971.705882  1263.666667  1618.000000  1904.000000  2251.166667   
...           ...          ...          ...          ...          ...   
3788   881.875000  1088.200000  1327.111111  1367.500000  1934.750000   
3789   736.375000  1055.300000  1504.307692  1699.038462  1977.865385   
3790   628.000000   867.000000  1020.000000  1405.000000  1843.000000   
3791   573.500000   802.500000   962.000000  1336.500000  1986.500000   
3792   579.000000   792.000000   833.000000  1154.000000  2103.000000   

          B7_2019      B8_2019  B8A_2019  B11_2019  B12_2019  ...  B4_2025  \
0     4277.000000  4102.000000    4280.5    5

In [None]:
#  Entraîner le modèle0
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR


# Supprimer les lignes où Y_train contient des NaN
mask = ~Y_train.isnull().any(axis=1)
X_train_clean = X_train[mask]
Y_train_clean = Y_train[mask]

model = MultiOutputRegressor(SVR(kernel='linear', C=1.0, epsilon=0.1))
model.fit(X_train_clean, Y_train_clean)


In [None]:
#  Prédire et évaluer les erreurs 
y_pred = model.predict(X_test)

for i, band in enumerate(Y.columns):
    # Remove NaN in Y_test for the current band
    mask = ~Y_test.iloc[:, i].isna()
    mse = mean_squared_error(Y_test.loc[mask, Y.columns[i]], y_pred[mask, i])
    rmse = np.sqrt(mse)
    print(f"{band}: RMSE = {rmse:.2f}")


B2: RMSE = 719.60
B3: RMSE = 691.18
B4: RMSE = 716.21
B5: RMSE = 751.26
B6: RMSE = 726.71
B7: RMSE = 730.36
B8: RMSE = 699.08
B8A: RMSE = 722.59
B11: RMSE = 691.20
B12: RMSE = 675.46


  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
 