In [1]:
# ======================
# 06_final_model_training.py
# ======================

import os
import joblib
import numpy as np
from scipy import sparse
from xgboost import XGBClassifier

# ======================
# File paths
# ======================
OUTPUT_DIR = r"C:\Users\vibho\Downloads\Engineering\exoplanet-ai\data\processed"
PIPELINE_FILE = r"C:\Users\vibho\Downloads\Engineering\exoplanet-ai\src\preprocessing\feature_pipeline.pkl"

# Full dataset
X_file = os.path.join(OUTPUT_DIR, "X_processed.npz")
y_file = os.path.join(OUTPUT_DIR, "y_encoded.npy")

# Final model paths
FINAL_MODEL_FILE = os.path.join(OUTPUT_DIR, "XGBoost_final_model.pkl")
CLASS_MAPPING_FILE = os.path.join(OUTPUT_DIR, "class_mapping.npy")

# ======================
# Load full dataset
# ======================
X = sparse.load_npz(X_file)
y = np.load(y_file)

print(f"Full dataset shape: {X.shape}, Target shape: {y.shape}")

# ======================
# Train final XGBoost model
# ======================
FINAL_RANDOM_STATE = 42

final_model = XGBClassifier(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=FINAL_RANDOM_STATE,
    n_jobs=-1,
    use_label_encoder=False,
    eval_metric="mlogloss"
)

print("🔹 Training final XGBoost model on full dataset...")
final_model.fit(X, y)
print("✅ Final XGBoost model trained.")

# ======================
# Save final model
# ======================
joblib.dump(final_model, FINAL_MODEL_FILE)
print(f"Final model saved to: {FINAL_MODEL_FILE}")

# ======================
# Save class mapping
# ======================
# Load pipeline to get LabelEncoder
pipeline = joblib.load(PIPELINE_FILE)
# Assuming LabelEncoder was used in feature_engineer.py
# Save label classes
import joblib
label_encoder_file = os.path.join(OUTPUT_DIR, "label_encoder.pkl")
joblib.dump(pipeline, label_encoder_file)  # Save pipeline with encoder info
print(f"Pipeline (with LabelEncoder) saved to: {label_encoder_file}")


Full dataset shape: (19761, 108807), Target shape: (19761,)
🔹 Training final XGBoost model on full dataset...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


✅ Final XGBoost model trained.
Final model saved to: C:\Users\vibho\Downloads\Engineering\exoplanet-ai\data\processed\XGBoost_final_model.pkl
Pipeline (with LabelEncoder) saved to: C:\Users\vibho\Downloads\Engineering\exoplanet-ai\data\processed\label_encoder.pkl
