# üéØ G√âN√âRATION DES PR√âDICTIONS KAGGLE

**Ce notebook**:
1. Charge les mod√®les entra√Æn√©s
2. Pr√©pare le test set (2012-2013) avec les m√™mes features
3. G√©n√®re les pr√©dictions
4. Cr√©e le fichier submission.csv

**‚ö†Ô∏è Pr√©requis**: Avoir ex√©cut√© KAGGLE_TRAIN_MODEL.ipynb

In [None]:
# IMPORTS
import pandas as pd
import numpy as np
import pickle
import warnings
warnings.filterwarnings('ignore')

print("‚úì Imports r√©ussis!")

## 1. CHARGEMENT DES MOD√àLES ENTRA√éN√âS

In [None]:
# Charger les mod√®les
print("üì¶ Chargement des mod√®les...")
with open('xgb_final.pkl', 'rb') as f:
    xgb_model = pickle.load(f)
with open('lgb_final.pkl', 'rb') as f:
    lgb_model = pickle.load(f)
with open('cat_final.pkl', 'rb') as f:
    cat_model = pickle.load(f)
with open('imputer.pkl', 'rb') as f:
    imputer = pickle.load(f)
with open('weights.pkl', 'rb') as f:
    weights = pickle.load(f)
with open('features.pkl', 'rb') as f:
    features = pickle.load(f)

print(f"‚úì Mod√®les charg√©s!")
print(f"‚úì Features: {len(features)}")
print(f"‚úì Poids ensemble: {weights}")

## 2. CHARGEMENT DU TEST SET

**Important**: On doit d'abord merger le test avec les donn√©es m√©t√©o 2012-2013

In [None]:
# OPTION 1: Si vous avez d√©j√† un fichier test_synop_merged.csv
# df_test = pd.read_csv('data_plus/test_synop_merged.csv')

# OPTION 2: Charger le test brut et le merger avec les donn√©es m√©t√©o
# (Code de merge √† ajouter - similaire au train)

# Pour l'instant, on suppose que vous devez cr√©er ce fichier
print("‚ö†Ô∏è IMPORTANT:")
print("Vous devez d'abord cr√©er le test set avec les donn√©es m√©t√©o 2012-2013")
print("Voir le script de pr√©paration du test ci-dessous...")

# PLACEHOLDER - √Ä adapter selon votre structure
# df_test = pd.read_csv('data_origin/test.csv')
# Puis merger avec donn√©es m√©t√©o 2012-2013 comme fait pour le train

## 3. PR√âPARATION DU TEST SET

In [None]:
# Cette cellule suppose que df_test est charg√© et a les colonnes m√©t√©o
# Similaires √† df_train_full

def prepare_test_set(df_test, df_train_full):
    """
    Pr√©parer le test set avec les M√äMES features que le train
    """
    df_test = df_test.copy()
    
    # Convertir date
    # Cr√©er date √† partir de week_year si n√©cessaire
    def week_to_datetime(week_str):
        year = int(str(week_str)[:4])
        week = int(str(week_str)[4:6])
        from datetime import datetime
        jan4 = datetime(year, 1, 4)
        week_one_monday = jan4 - pd.Timedelta(days=jan4.weekday())
        return week_one_monday + pd.Timedelta(weeks=week-1)
    
    if 'date' not in df_test.columns:
        df_test['date'] = df_test['week'].apply(week_to_datetime)
    df_test['date'] = pd.to_datetime(df_test['date'])
    
    # Ajouter saison
    def get_season(date):
        month = date.month
        if month in [12, 1, 2]:
            return 'Hiver'
        elif month in [3, 4, 5]:
            return 'Printemps'
        elif month in [6, 7, 8]:
            return 'Ete'
        else:
            return 'Automne'
    
    df_test['saison'] = df_test['date'].apply(get_season)
    
    # Features temporelles
    df_test['year'] = df_test['date'].dt.year
    df_test['month'] = df_test['date'].dt.month
    df_test['week_of_year'] = df_test['date'].dt.isocalendar().week
    df_test['day_of_year'] = df_test['date'].dt.dayofyear
    
    # Features cycliques
    df_test['week_sin'] = np.sin(2 * np.pi * df_test['week_of_year'] / 52)
    df_test['week_cos'] = np.cos(2 * np.pi * df_test['week_of_year'] / 52)
    df_test['month_sin'] = np.sin(2 * np.pi * df_test['month'] / 12)
    df_test['month_cos'] = np.cos(2 * np.pi * df_test['month'] / 12)
    
    # Encoder saison
    saison_map = {'Hiver': 1, 'Printemps': 2, 'Ete': 3, 'Automne': 4}
    df_test['saison_encoded'] = df_test['saison'].map(saison_map)
    
    # üéØ FEATURES HISTORIQUES - Calcul√©es depuis le TRAIN
    # Cr√©er les mappings depuis le train
    hist_week_map = df_train_full.groupby(['region_code', 'week_of_year'])['TauxGrippe'].mean().to_dict()
    hist_month_map = df_train_full.groupby(['region_code', 'month'])['TauxGrippe'].mean().to_dict()
    hist_season_map = df_train_full.groupby(['region_code', 'saison'])['TauxGrippe'].mean().to_dict()
    region_mean_map = df_train_full.groupby('region_code')['TauxGrippe'].mean().to_dict()
    region_std_map = df_train_full.groupby('region_code')['TauxGrippe'].std().to_dict()
    week_global_map = df_train_full.groupby('week_of_year')['TauxGrippe'].mean().to_dict()
    
    # Appliquer au test
    df_test['TauxGrippe_hist_week_mean'] = df_test.apply(
        lambda x: hist_week_map.get((x['region_code'], x['week_of_year']), region_mean_map.get(x['region_code'], 0)),
        axis=1
    )
    df_test['TauxGrippe_hist_month_mean'] = df_test.apply(
        lambda x: hist_month_map.get((x['region_code'], x['month']), region_mean_map.get(x['region_code'], 0)),
        axis=1
    )
    df_test['TauxGrippe_hist_season_mean'] = df_test.apply(
        lambda x: hist_season_map.get((x['region_code'], x['saison']), region_mean_map.get(x['region_code'], 0)),
        axis=1
    )
    df_test['TauxGrippe_region_mean'] = df_test['region_code'].map(region_mean_map)
    df_test['TauxGrippe_region_std'] = df_test['region_code'].map(region_std_map)
    df_test['TauxGrippe_week_global_mean'] = df_test['week_of_year'].map(week_global_map)
    
    return df_test

print("‚úì Fonction de pr√©paration du test d√©finie")

In [None]:
# Charger le train pour les stats historiques
df_train_full = pd.read_csv('data_plus/train_synop_cleaned_complet.csv')
df_train_full['date'] = pd.to_datetime(df_train_full['date'])

# Charger et pr√©parer le test
# ‚ö†Ô∏è √Ä ADAPTER: Charger votre test set avec les donn√©es m√©t√©o
# df_test = pd.read_csv('data_plus/test_synop_merged.csv')
# df_test = prepare_test_set(df_test, df_train_full)

print("‚ö†Ô∏è Veuillez charger votre test set avec les donn√©es m√©t√©o 2012-2013")
print("   Puis appliquer: df_test = prepare_test_set(df_test, df_train_full)")

## 4. G√âN√âRATION DES PR√âDICTIONS

In [None]:
# Cette cellule suppose que df_test est pr√©par√©

# Pr√©parer X_test
X_test = df_test[features]
test_ids = df_test['Id']

# Imputer
X_test = pd.DataFrame(
    imputer.transform(X_test),
    columns=X_test.columns,
    index=X_test.index
)

print(f"‚úì Test set pr√©par√©: {X_test.shape}")

# Pr√©dictions avec les 3 mod√®les
print("\nüöÄ G√©n√©ration des pr√©dictions...")
y_pred_xgb = xgb_model.predict(X_test)
print("‚úì XGBoost")

y_pred_lgb = lgb_model.predict(X_test)
print("‚úì LightGBM")

y_pred_cat = cat_model.predict(X_test)
print("‚úì CatBoost")

# Ensemble (moyenne pond√©r√©e)
y_pred = (
    weights['XGBoost'] * y_pred_xgb +
    weights['LightGBM'] * y_pred_lgb +
    weights['CatBoost'] * y_pred_cat
)
print("‚úì Ensemble")

# S'assurer que les pr√©dictions sont positives
y_pred = np.maximum(y_pred, 0)

print(f"\nüìä Statistiques des pr√©dictions:")
print(f"   Min: {y_pred.min():.2f}")
print(f"   Max: {y_pred.max():.2f}")
print(f"   Moyenne: {y_pred.mean():.2f}")
print(f"   M√©diane: {np.median(y_pred):.2f}")

## 5. CR√âATION DU FICHIER SUBMISSION

In [None]:
# Cr√©er le fichier submission
submission = pd.DataFrame({
    'Id': test_ids,
    'TauxGrippe': y_pred
})

# Sauvegarder
submission.to_csv('submission_ensemble.csv', index=False)

print("\n‚úÖ FICHIER SUBMISSION CR√â√â!")
print("="*80)
print(f"Fichier: submission_ensemble.csv")
print(f"Lignes: {len(submission)}")
print(f"\nAper√ßu:")
print(submission.head(10))
print(f"\nüéØ Pr√™t √† soumettre sur Kaggle!")
print("="*80)

## 6. (OPTIONNEL) CR√âER DES SUBMISSIONS INDIVIDUELLES

In [None]:
# Si vous voulez tester les mod√®les individuellement

# XGBoost
submission_xgb = pd.DataFrame({'Id': test_ids, 'TauxGrippe': np.maximum(y_pred_xgb, 0)})
submission_xgb.to_csv('submission_xgb.csv', index=False)

# LightGBM
submission_lgb = pd.DataFrame({'Id': test_ids, 'TauxGrippe': np.maximum(y_pred_lgb, 0)})
submission_lgb.to_csv('submission_lgb.csv', index=False)

# CatBoost
submission_cat = pd.DataFrame({'Id': test_ids, 'TauxGrippe': np.maximum(y_pred_cat, 0)})
submission_cat.to_csv('submission_cat.csv', index=False)

print("‚úì Submissions individuelles cr√©√©es:")
print("  - submission_xgb.csv")
print("  - submission_lgb.csv")
print("  - submission_cat.csv")