In [None]:
# ================================
# Insurance Claim Severity Prediction
# Full ML Pipeline in One File
# ================================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor



# =====================================================
# 1. Load Dataset
# =====================================================
def load_data(path="claims.csv"):
    print("Loading dataset...")
    df = pd.read_csv(path)
    print("Dataset loaded. Shape:", df.shape)
    return df



# =====================================================
# 2. Preprocess & Split
# =====================================================
def preprocess_data(df):
    print("Preparing features and target...")

    X = df.drop("claim_amount", axis=1)
    y = df["claim_amount"]

    num_cols = X.select_dtypes(include=["int64", "float64"]).columns
    cat_cols = X.select_dtypes(include=["object"]).columns

    preprocess = ColumnTransformer(
        transformers=[
            ("num", StandardScaler(), num_cols),
            ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols)
        ]
    )

    return X, y, preprocess



# =====================================================
# 3. Build Models
# =====================================================
def build_models(preprocess):

    rf_model = Pipeline(steps=[
        ("preprocess", preprocess),
        ("model", RandomForestRegressor(
            n_estimators=300,
            random_state=42
        ))
    ])

    gb_model = Pipeline(steps=[
        ("preprocess", preprocess),
        ("model", GradientBoostingRegressor(random_state=42))
    ])

    return rf_model, gb_model



# =====================================================
# 4. Train and Evaluate Models
# =====================================================
def evaluate(model, X_test, y_test):
    preds = model.predict(X_test)
    mae