In [21]:
import pandas as pd 
from pathlib import Path
import numpy as np
import json
import boto3
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from dotenv import load_dotenv
load_dotenv()

ACCESS_KEY=os.getenv("ACESS_KEY")
SECRET_ACCESS_KEY=os.getenv("SECRET_ACCESS_KEY")


data_path = Path('.').resolve().parent.joinpath("data", "creditcard_2023.csv")
data = pd.read_csv(data_path, dtype=str)
data.rename(columns={"id": "transaction_id"}, inplace=True)
data.drop(columns=["Amount"], inplace=True)
data["transaction_time"] = data["transaction_id"].apply(lambda x: (int(x) // 10) + 1)
for col in [f"V{i}" for i in range(1,29)]:
    data[col] = data[col].astype(float)

train, test = train_test_split(data, test_size=0.3, random_state=42)

model_parameters=dict()
for fraud_cls in [0, 1]:
    mu_g = train.query(f"Class == '{fraud_cls}'")[[f"V{i}" for i in range(1, 29)]].mean().to_list()
    sigma_g = np.linalg.inv(train.query(f"Class == '{fraud_cls}'")[[f"V{i}" for i in range(1, 29)]].cov())
    model_parameters[fraud_cls] = {"mu": mu_g, "sigma": sigma_g.tolist()}

In [26]:
def classify_transaction(transaction_data):
    transaction_data = transaction_data[[f"V{i}" for i in range(1, 29)]].astype(float)

    mu_0 = np.array(model_parameters[0]['mu'])
    sigma_0 = np.array(model_parameters[0]['sigma'])

    mu_1 = np.array(model_parameters[1]['mu'])
    sigma_1 = np.array(model_parameters[1]['sigma'])

    x = np.array(transaction_data)

    g = -(x - mu_1).dot(sigma_1).dot(x - mu_1)
    h = -(x - mu_0).dot(sigma_0).dot(x - mu_0)

    return 1 if g > h else 0

test["predictions"] = test.apply(lambda x: classify_transaction(x), axis=1)

In [32]:
print(classification_report(test["Class"].astype(int), test["predictions"]))

              precision    recall  f1-score   support

           0       0.91      0.99      0.95     85149
           1       0.99      0.91      0.95     85440

    accuracy                           0.95    170589
   macro avg       0.95      0.95      0.95    170589
weighted avg       0.95      0.95      0.95    170589



In [33]:
print(confusion_matrix(test["Class"].astype(int), test["predictions"]))

[[84198   951]
 [ 7858 77582]]


# Interpretação dos resultados
O modelo bloqueou erroneamente 1,1% das transações honestas, e acertou 90,8% das transações fraudulentas