# Import Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from imblearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,roc_auc_score,roc_curve
from xgboost import XGBClassifier
import joblib

# Load Dataset

In [2]:

data = pd.read_csv(r"D:\FinTech_Fraud_Detection_Project\data\raw\creditcard.csv")
print("Dataset Loaded:", data.shape)


Dataset Loaded: (284807, 31)


# Remove Duplicate Rows

In [3]:
data = data.drop_duplicates()
print("Shape of the Data: ",data.shape)
print("Duplicate Rows in Data: ",data.duplicated().sum())

Shape of the Data:  (283726, 31)
Duplicate Rows in Data:  0


In [5]:
x = data.drop("Class", axis=1)
y = data["Class"]

# Train-Test Split

In [6]:
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.20, random_state=42, stratify=y
)
print("Train/Test shapes:", x_train.shape, x_test.shape)


Train/Test shapes: (226980, 30) (56746, 30)


# XGBoost Model & Evaluation

In [7]:
xgb_model = Pipeline([
    ("scaler", StandardScaler()),
    ("model", XGBClassifier(
        n_estimators=300,
        max_depth=8,
        learning_rate=0.05,
        subsample=0.8,
        gamma=0.1,
        reg_lambda=1.0,
        min_child_weight=3,
        objective='binary:logistic',
        eval_metric='logloss',
        random_state=42
    ))
])
xgb_model.fit(x_train,y_train)

In [8]:
y_proba=xgb_model.predict_proba(x_test)[:,1]
threshold = 0.29;
y_pred_thresh = (y_proba >= threshold).astype(int)

print("Threshold:",threshold)
print("Accuracy",accuracy_score(y_test,y_pred_thresh))
print("\nConfusion Matrix:\n",confusion_matrix(y_test,y_pred_thresh))
print("\nClassification Report\n",classification_report(y_test,y_pred_thresh))



Threshold: 0.29
Accuracy 0.9995770627004547

Confusion Matrix:
 [[56649     2]
 [   22    73]]

Classification Report
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56651
           1       0.97      0.77      0.86        95

    accuracy                           1.00     56746
   macro avg       0.99      0.88      0.93     56746
weighted avg       1.00      1.00      1.00     56746



In [None]:
# Save MODEL

In [None]:
joblib.dump(xgb_model, "THE_xgb_Champion_model.pkl")
print("Model saved successfully!")