In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
df=pd.read_csv('C:\\Users\\Vidya\\Documents\\Creditcard_fraud_detection\\creditcard_2023.csv')
df.head()
df.info()


In [None]:

df.isnull().sum()


In [None]:
x=df.drop(['id','Class'],axis=1,errors='ignore')
y=df['Class']

In [None]:
print(x.columns.tolist())

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)


In [None]:
x_train.shape

In [None]:
x_test.shape

In [None]:
scaler=StandardScaler()
x_train_scaled=scaler.fit_transform(x_train)
x_test_scaled=scaler.transform(x_test)

In [None]:
print(pd.Series(y_train).value_counts(normalize=True))

In [None]:
rf_model=RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_split=5,
    random_state=42
)

In [None]:
cv_scores=cross_val_score(rf_model,x_train_scaled,y_train,cv=5, scoring='f1')
print('\n Cross-validation F1 scores:',cv_scores)
print('Average F1 Score:',np.mean(cv_scores))


In [None]:
rf_model.fit(x_train_scaled,y_train)


In [None]:
y_pred=rf_model.predict(x_test_scaled)

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
plt.figure(figsize=(8,6))
cm=confusion_matrix(y_test,y_pred)
sns.heatmap(cm,annot=True,fmt='d',cmap='Blues')
plt.title('Confusion_matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()


In [None]:
importance=rf_model.feature_importances_
feature_imp=pd.DataFrame({
    'Feature':x.columns,
    'Importance':importance
}).sort_values('Importance',ascending=False)


In [None]:
feature_imp.head()

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(data=feature_imp,x='Importance',y='Feature')
plt.title('Feature importance Ranking')
plt.xlabel('Importance score')
plt.tight_layout()
plt.show()

In [None]:
y_pred_proba=rf_model.predict_proba(x_test_scaled)[:,1]
fpr,tpr,_=roc_curve(y_test,y_pred_proba)
roc_curve=auc(fpr,tpr)


In [None]:
plt.figure(figsize=(8,6))
plt.plot(fpr,tpr,color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_curve:.2f})')
plt.plot([0,1],[0,1],color='navy',lw=2, linestyle='--')
plt.xlim([0.0,1.0])
plt.ylim([0.0,1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title("Receiver Operating Characteristics (ROC) Curve")
plt.legend(loc='lower right')
plt.show()

In [None]:
def check_fraud(transaction_dict, model, scaler):
    """
    Predict whether a transaction is Fraud or Legit.

    Parameters:
    - transaction_dict: dictionary containing transaction features
    - model: trained RandomForest model
    - scaler: fitted StandardScaler

    Returns:
    - prediction label and fraud probability
    """

    # Convert input dictionary to DataFrame
    transaction_df = pd.DataFrame([transaction_dict])

    # Scale the input using trained scaler
    transaction_scaled = scaler.transform(transaction_df)

    # Predict class and probability
    prediction = model.predict(transaction_scaled)[0]
    probability = model.predict_proba(transaction_scaled)[0][1]

    # Result interpretation
    if prediction == 1:
        result = "ðŸš¨ Fraudulent Transaction"
    else:
        result = "âœ… Legitimate Transaction"

    return result, probability


In [None]:
sample_transaction = {
    'V1': -1.359807,
    'V2': -0.072781,
    'V3': 2.536347,
    'V4': 1.378155,
    'V5': -0.338321,
    'V6': 0.462388,
    'V7': 0.239599,
    'V8': 0.098698,
    'V9': 0.363787,
    'V10': 0.090794,
    'V11': -0.551600,
    'V12': -0.617801,
    'V13': -0.991390,
    'V14': -0.311169,
    'V15': 1.468177,
    'V16': -0.470401,
    'V17': 0.207971,
    'V18': 0.025791,
    'V19': 0.403993,
    'V20': 0.251412,
    'V21': -0.018307,
    'V22': 0.277838,
    'V23': -0.110474,
    'V24': 0.066928,
    'V25': 0.128539,
    'V26': -0.189115,
    'V27': 0.133558,
    'V28': -0.021053,
    'Amount': 149.62
}


In [None]:
result, risk = check_fraud(sample_transaction, rf_model, scaler)

print(result)
print(f"Fraud Probability: {risk:.2f}")
