In [2]:
import pandas as pd
import numpy as np

In [None]:
data = pd.read_csv('datos_financieros.csv')
print(data.head()) 

   step      type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1   PAYMENT   9839.64  C1231006815       170136.0       160296.36   
1     1   PAYMENT   1864.28  C1666544295        21249.0        19384.72   
2     1  TRANSFER    181.00  C1305486145          181.0            0.00   
3     1  CASH_OUT    181.00   C840083671          181.0            0.00   
4     1   PAYMENT  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest  isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0        0               0  
1  M2044282225             0.0             0.0        0               0  
2   C553264065             0.0             0.0        1               0  
3    C38997010         21182.0             0.0        1               0  
4  M1230701703             0.0             0.0        0               0  


In [6]:
print(data.isnull().sum())

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64


In [7]:
print(data.type.value_counts())

type
CASH_OUT    2237500
PAYMENT     2151495
CASH_IN     1399284
TRANSFER     532909
DEBIT         41432
Name: count, dtype: int64


In [None]:
import plotly.express as px
type_counts = data["type"].value_counts().reset_index()
type_counts.columns = ["transaction", "quantity"]
figure = px.pie(
    type_counts,
    values="quantity",
    names="transaction",
    hole=0.5,
    title="Tipos de transacciones"
)
figure.show()

In [10]:
numeric_data = data.select_dtypes(include='number')
correlation = numeric_data.corr()
print(correlation["isFraud"].sort_values(ascending=False))

isFraud           1.000000
amount            0.076688
isFlaggedFraud    0.044109
step              0.031578
oldbalanceOrg     0.010154
newbalanceDest    0.000535
oldbalanceDest   -0.005885
newbalanceOrig   -0.008148
Name: isFraud, dtype: float64


In [11]:
print(data["type"].unique())

['PAYMENT' 'TRANSFER' 'CASH_OUT' 'DEBIT' 'CASH_IN']


In [12]:
data["type"] = data["type"].astype(str).str.upper().str.strip()

In [13]:
data["type"] = data["type"].map({"CASH_OUT": 1, "PAYMENT": 2, 
                                 "CASH_IN": 3, "TRANSFER": 4,
                                 "DEBIT": 5})

data["isFraud"] = data["isFraud"].map({0: "Sin fraude", 1: "Fraude"})
print(data.head())

   step  type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0     1     2   9839.64  C1231006815       170136.0       160296.36   
1     1     2   1864.28  C1666544295        21249.0        19384.72   
2     1     4    181.00  C1305486145          181.0            0.00   
3     1     1    181.00   C840083671          181.0            0.00   
4     1     2  11668.14  C2048537720        41554.0        29885.86   

      nameDest  oldbalanceDest  newbalanceDest     isFraud  isFlaggedFraud  
0  M1979787155             0.0             0.0  Sin fraude               0  
1  M2044282225             0.0             0.0  Sin fraude               0  
2   C553264065             0.0             0.0      Fraude               0  
3    C38997010         21182.0             0.0      Fraude               0  
4  M1230701703             0.0             0.0  Sin fraude               0  


In [14]:
print(data["type"].unique())

[2 4 1 5 3]


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

x = np.array(data[["type", "amount", "oldbalanceOrg", "newbalanceOrig"]])
y = np.array(data[["isFraud"]])

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.10, random_state=42) 
model = DecisionTreeClassifier()
model.fit(xtrain, ytrain)
print(model.score(xtest, ytest))

0.9997391011878755


In [17]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

ypred = model.predict(xtest)
precision = precision_score(ytest, ypred, pos_label="Fraude")
recall = recall_score(ytest, ypred, pos_label="Fraude")
f1 = f1_score(ytest, ypred, pos_label="Fraude")
conf_matrix = confusion_matrix(ytest, ypred, labels=["Sin fraude", "Fraude"])

In [18]:
print("Precisión:", round(precision, 2))
print("Recall:", round(recall, 2))
print("F1-score:", round(f1, 2))
print("Matriz de confusión:")
print(conf_matrix)

Precisión: 0.91
Recall: 0.89
F1-score: 0.9
Matriz de confusión:
[[635369     76]
 [    90    727]]
