In [1]:
import evidently
print(evidently.__version__)

0.7.20


In [2]:
import sys, os
import evidently

print("Python executable:", sys.executable)
print("Python version   :", sys.version)
print("Evidently version:", evidently.__version__)
print("Evidently path   :", evidently.__file__)
print("CWD              :", os.getcwd())

# Test d'import ciblé
try:
    from evidently.report import Report
    print("✅ Import evidently.report OK")
except Exception as e:
    print("❌ Import evidently.report FAILED:", repr(e))


Python executable: c:\Users\utilisateur\Python\OC - Projet 7\api-oc-projet7\venv\Scripts\python.exe
Python version   : 3.13.2 (tags/v3.13.2:4f8bb39, Feb  4 2025, 15:23:48) [MSC v.1942 64 bit (AMD64)]
Evidently version: 0.7.20
Evidently path   : c:\Users\utilisateur\Python\OC - Projet 7\api-oc-projet7\venv\Lib\site-packages\evidently\__init__.py
CWD              : c:\Users\utilisateur\Python\OC - Projet 7\api-oc-projet7
❌ Import evidently.report FAILED: ModuleNotFoundError("No module named 'evidently.report'")


In [3]:
import numpy as np
import pandas as pd

from evidently import Dataset, DataDefinition, Report
from evidently.presets import DataDriftPreset


# -----------------------------
# 1) Chemins
# -----------------------------
TRAIN_PATH = "data/raw/application_train.csv"
TEST_PATH  = "data/raw/application_test.csv"
OUTPUT_HTML = "artifacts/data_drift_report.html"


# -----------------------------
# 2) Chargement pandas
# -----------------------------
train_raw = pd.read_csv(TRAIN_PATH)
test_raw  = pd.read_csv(TEST_PATH)

print("Raw shapes:", train_raw.shape, test_raw.shape)


# -----------------------------
# 3) Sélection & alignement des features
# -----------------------------
EXCLUDE_COLS = {"TARGET", "SK_ID_CURR", "SK_ID_BUREAU", "SK_ID_PREV", "index"}

train_feats = [c for c in train_raw.columns if c not in EXCLUDE_COLS]
common_feats = [c for c in train_feats if c in test_raw.columns]

train_df = train_raw[common_feats].copy()
test_df  = test_raw[common_feats].copy()

# Important: certains presets Evidently peuvent mal gérer pd.NA -> on normalise
train_df = train_df.replace({pd.NA: np.nan})
test_df  = test_df.replace({pd.NA: np.nan})

print("Aligned shapes:", train_df.shape, test_df.shape)
assert list(train_df.columns) == list(test_df.columns), "Colonnes train/test non alignées"


# -----------------------------
# 4) DataDefinition (remplace ColumnMapping en 0.7+)
# -----------------------------
num_cols = train_df.select_dtypes(exclude=["object"]).columns.tolist()
cat_cols = train_df.select_dtypes(include=["object"]).columns.tolist()

data_def = DataDefinition(
    numerical_columns=num_cols,
    categorical_columns=cat_cols,
    # pas de target/prediction ici: drift de features (non supervisé)
)

eval_train = Dataset.from_pandas(train_df, data_definition=data_def)
eval_test  = Dataset.from_pandas(test_df,  data_definition=data_def)

print(f"DataDefinition: {len(num_cols)} num, {len(cat_cols)} cat")


# -----------------------------
# 5) Report Data Drift
# -----------------------------
report = Report([
    DataDriftPreset()
])

my_report = report.run(current_data=eval_test, reference_data=eval_train)

# Affichage dans un notebook (si tu es en notebook) :
# my_report

my_report.save_html(OUTPUT_HTML)
print(f"✅ Rapport drift sauvegardé : {OUTPUT_HTML}")



Raw shapes: (307511, 122) (48744, 121)
Aligned shapes: (307511, 120) (48744, 120)
DataDefinition: 104 num, 16 cat
✅ Rapport drift sauvegardé : artifacts/data_drift_report.html
