In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve

In [None]:
df = pd.read_csv("/creditcard.csv")

df.head()


In [None]:
df.shape


In [None]:
df.columns


In [None]:
df['Class'].value_counts()


In [None]:
fraud_percentage = df['Class'].mean() * 100
fraud_percentage


In [None]:
import imblearn
import xgboost
print("Libraries installed correctly!")


In [None]:
from imblearn.over_sampling import SMOTE
print("SMOTE is ready!")


In [None]:
X = df.drop('Class', axis=1)
y = df['Class']

In [None]:
X['Amount_original'] = X['Amount']


In [None]:
scaler = StandardScaler()

X['Amount'] = scaler.fit_transform(X[['Amount']])


In [None]:
X[['Amount_original', 'Amount']].head()


In [None]:
X = X.drop('Amount_original', axis=1)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [None]:
print("Train size:", X_train.shape)
print("Test size:", X_test.shape)

print("Fraud rate in train:", y_train.mean())
print("Fraud rate in test:", y_test.mean())


In [None]:
from sklearn.ensemble import IsolationForest


In [None]:
iso_forest = IsolationForest(
    n_estimators=100,
    contamination=0.0017,
    random_state=42
)

iso_forest.fit(X_train)


In [None]:
y_pred_iso = iso_forest.predict(X_test)

y_pred_iso = np.where(y_pred_iso == -1, 1, 0)


In [None]:
cm_iso = confusion_matrix(y_test, y_pred_iso)
print("Confusion Matrix (Isolation Forest):")
print(cm_iso)

print("\nClassification Report:")
print(classification_report(y_test, y_pred_iso))


In [None]:
y_scores_iso = -iso_forest.decision_function(X_test)

roc_auc_iso = roc_auc_score(y_test, y_scores_iso)
print("ROC-AUC (Isolation Forest):", roc_auc_iso)


In [None]:
fpr, tpr, _ = roc_curve(y_test, y_scores_iso)

plt.figure()
plt.plot(fpr, tpr)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve - Isolation Forest")
plt.show()


In [None]:
from xgboost import XGBClassifier


In [None]:
xgb_clf = XGBClassifier(
    n_estimators=200,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    scale_pos_weight=(y_train == 0).sum() / (y_train == 1).sum(),
    eval_metric='logloss',
    random_state=42
)


In [None]:
xgb_clf.fit(X_train, y_train)


In [None]:
y_pred_xgb = xgb_clf.predict(X_test)

y_proba_xgb = xgb_clf.predict_proba(X_test)[:, 1]

print("Confusion Matrix (XGBoost بدون SMOTE):")
print(confusion_matrix(y_test, y_pred_xgb))

print("\nClassification Report:")
print(classification_report(y_test, y_pred_xgb))

print("\nROC-AUC:", roc_auc_score(y_test, y_proba_xgb))


In [None]:
from imblearn.over_sampling import SMOTE


In [None]:
smote = SMOTE(random_state=42)

X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

print("Avant SMOTE:", y_train.value_counts())
print("Après SMOTE:", y_train_smote.value_counts())


In [None]:
xgb_smote = XGBClassifier(
    n_estimators=200,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric='logloss',
    random_state=42
)


In [None]:
xgb_smote.fit(X_train_smote, y_train_smote)


In [None]:
y_pred_xgb_smote = xgb_smote.predict(X_test)

y_proba_xgb_smote = xgb_smote.predict_proba(X_test)[:, 1]

print("Confusion Matrix (XGBoost + SMOTE):")
print(confusion_matrix(y_test, y_pred_xgb_smote))

print("\nClassification Report:")
print(classification_report(y_test, y_pred_xgb_smote))

print("\nROC-AUC:", roc_auc_score(y_test, y_proba_xgb_smote))


In [None]:
from sklearn.metrics import precision_recall_curve

precision, recall, thresholds = precision_recall_curve(y_test, y_proba_xgb_smote)


In [None]:
plt.figure()
plt.plot(thresholds, recall[:-1])
plt.xlabel("Threshold")
plt.ylabel("Recall")
plt.title("Recall vs Threshold")
plt.show()


In [None]:
target_recall = 0.90

best_threshold = thresholds[recall[:-1] >= target_recall][0]
best_threshold


In [None]:
y_pred_opt = (y_proba_xgb_smote >= best_threshold).astype(int)

print("Confusion Matrix (Threshold optimisé):")
print(confusion_matrix(y_test, y_pred_opt))

print("\nClassification Report:")
print(classification_report(y_test, y_pred_opt))

print("\nROC-AUC:", roc_auc_score(y_test, y_proba_xgb_smote))


In [None]:
f1_scores = 2 * (precision[:-1] * recall[:-1]) / (precision[:-1] + recall[:-1] + 1e-6)


In [None]:
valid_idx = recall[:-1] >= 0.90
best_idx = np.argmax(f1_scores[valid_idx])

best_threshold = thresholds[valid_idx][best_idx]
best_threshold


In [None]:
y_pred_opt = (y_proba_xgb_smote >= best_threshold).astype(int)

print("Confusion Matrix (Threshold optimisé intelligemment):")
print(confusion_matrix(y_test, y_pred_opt))

print("\nClassification Report:")
print(classification_report(y_test, y_pred_opt))

print("\nROC-AUC:", roc_auc_score(y_test, y_proba_xgb_smote))


In [None]:
from sklearn.metrics import roc_curve

fpr, tpr, _ = roc_curve(y_test, y_proba_xgb_smote)

plt.figure()
plt.plot(fpr, tpr)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve - XGBoost + SMOTE")
plt.show()


In [None]:
tn, fp, fn, tp = confusion_matrix(y_test, y_pred_opt).ravel()
missed_frauds = fn
total_actual_frauds = fn + tp

print(f"Number of missed frauds (False Negatives): {missed_frauds}")
print(f"Total actual fraudulent transactions: {total_actual_frauds}")


In [None]:
# Run this cell first
import os
from getpass import getpass

# Enter your GitHub username
username = input("Enter your GitHub username: ")

# Enter your GitHub token (paste it)
token = getpass("Enter your GitHub token: ")

# Set up git configuration
!git config --global user.email "{username}@users.noreply.github.com"
!git config --global user.name "{username}"

# Optional: Save credentials for this session
os.environ['GITHUB_TOKEN'] = token
print("✓ GitHub credentials configured!")

In [7]:
!git config --global user.name "You-org"
!git config --global user.email "iselmi537@gmail.com"


In [8]:
!git clone https://github.com/you-org/Fraud-Detection.git


Cloning into 'Fraud-Detection'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (3/3), done.


In [9]:
!cp /content/Fraud-Detection.ipynb /content/Fraud-Detection/


cp: cannot stat '/content/Fraud-Detection.ipynb': No such file or directory
