## Library

In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

## Dataset

In [2]:
df = pd.read_csv(os.getcwd() + '/dataset/creditcard.csv')
df.tail(3)

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
284804,172788.0,1.919565,-0.301254,-3.24964,-0.557828,2.630515,3.03126,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,172788.0,-0.24044,0.530483,0.70251,0.689799,-0.377961,0.623708,-0.68618,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.0,0
284806,172792.0,-0.533413,-0.189733,0.703337,-0.506271,-0.012546,-0.649617,1.577006,-0.41465,0.48618,...,0.261057,0.643078,0.376777,0.008797,-0.473649,-0.818267,-0.002415,0.013649,217.0,0


In [3]:
X = df.drop('Class', axis=1)
y = df['Class']    

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Modeling

In [5]:
def evaluate_model(model, X_train, X_test, y_train, y_test):
    # fit & predict
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # accuracy, confusion metric, classification report
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)
    
    return accuracy, conf_matrix, class_report

In [6]:
# logistic regression model
def logistic_regression(X_train, X_test, y_train, y_test):
    model = LogisticRegression(max_iter=500, solver='liblinear')
    return evaluate_model(model, X_train, X_test, y_train, y_test)

# decision tree model
def decision_tree(X_train, X_test, y_train, y_test):
    model = DecisionTreeClassifier()
    return evaluate_model(model, X_train, X_test, y_train, y_test)

# random forest model
def random_forest(X_train, X_test, y_train, y_test):
    model = RandomForestClassifier()
    return evaluate_model(model, X_train, X_test, y_train, y_test)

## Logistic Regression

In [7]:
logistic_accuracy, logistic_conf_matrix, logistic_class_report = logistic_regression(X_train, X_test, y_train, y_test)
print("Logistic Regression Results:")
print(f"Accuracy: {logistic_accuracy}")
print(f"Confusion Matrix:\n{logistic_conf_matrix}")
print(f"Classification Report:\n{logistic_class_report}")

Logistic Regression Results:
Accuracy: 0.9989993328885924
Confusion Matrix:
[[56853    11]
 [   46    52]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.83      0.53      0.65        98

    accuracy                           1.00     56962
   macro avg       0.91      0.77      0.82     56962
weighted avg       1.00      1.00      1.00     56962



## Decision Tree

In [8]:
tree_accuracy, tree_conf_matrix, tree_class_report = decision_tree(X_train, X_test, y_train, y_test)
print("\nDecision Tree Results:")
print(f"Accuracy: {tree_accuracy}")
print(f"Confusion Matrix:\n{tree_conf_matrix}")
print(f"Classification Report:\n{tree_class_report}")


Decision Tree Results:
Accuracy: 0.9991397773954567
Confusion Matrix:
[[56835    29]
 [   20    78]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.73      0.80      0.76        98

    accuracy                           1.00     56962
   macro avg       0.86      0.90      0.88     56962
weighted avg       1.00      1.00      1.00     56962



## Random Forest

In [9]:
forest_accuracy, forest_conf_matrix, forest_class_report = random_forest(X_train, X_test, y_train, y_test)
print("\nRandom Forest Results:")
print(f"Accuracy: {forest_accuracy}")
print(f"Confusion Matrix:\n{forest_conf_matrix}")
print(f"Classification Report:\n{forest_class_report}")


Random Forest Results:
Accuracy: 0.9995962220427653
Confusion Matrix:
[[56863     1]
 [   22    76]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.99      0.78      0.87        98

    accuracy                           1.00     56962
   macro avg       0.99      0.89      0.93     56962
weighted avg       1.00      1.00      1.00     56962

