In [1]:
!pip install prettytable



In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder
from prettytable import PrettyTable

# Load the dataset
df = pd.read_csv('/content/financial_transactions.csv')
# Data exploration
print("Dataset Overview:")
print(df.head())

# Preprocess the data
df.fillna(method='ffill', inplace=True)

# Encode categorical variables
label_encoder = LabelEncoder()
df['transaction_type'] = label_encoder.fit_transform(df['transaction_type'])

# Extract datetime features
df['transaction_time'] = pd.to_datetime(df['transaction_time'])
df['hour'] = df['transaction_time'].dt.hour
df['day'] = df['transaction_time'].dt.day
df['month'] = df['transaction_time'].dt.month
df['day_of_week'] = df['transaction_time'].dt.dayofweek
# Select features and target
features = ['amount', 'transaction_type', 'customer_id', 'hour', 'day', 'month', 'day_of_week']
X = df[features]
y = df['is_fraud']

# Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Train a Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test)
y_prob = rf_model.predict_proba(X_test)[:, 1]
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)

# Display model performance metrics in a table
performance_table = PrettyTable()
performance_table.field_names = ["Metric", "Value"]
performance_table.add_row(["Accuracy", f"{accuracy:.2f}"])
performance_table.add_row(["Precision", f"{precision:.2f}"])
performance_table.add_row(["Recall", f"{recall:.2f}"])
performance_table.add_row(["F1-Score", f"{f1:.2f}"])
performance_table.add_row(["ROC AUC Score", f"{roc_auc:.2f}"])

print("Model Performance Metrics:")
print(performance_table)
# Display confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
conf_matrix_table = PrettyTable()
conf_matrix_table.field_names = ["", "Predicted Non-Fraud", "Predicted Fraud"]
conf_matrix_table.add_row(["Actual Non-Fraud", conf_matrix[0, 0], conf_matrix[0, 1]])
conf_matrix_table.add_row(["Actual Fraud", conf_matrix[1, 0], conf_matrix[1, 1]])

print("Confusion Matrix:")
print(conf_matrix_table)

# Apply the model to new transactions
new_transactions = pd.read_csv('/content/financial_transactions.csv')
new_transactions['transaction_type'] = label_encoder.transform(new_transactions['transaction_type'])
new_transactions['transaction_time'] = pd.to_datetime(new_transactions['transaction_time'])
new_transactions['hour'] = new_transactions['transaction_time'].dt.hour
new_transactions['day'] = new_transactions['transaction_time'].dt.day
new_transactions['month'] = new_transactions['transaction_time'].dt.month
new_transactions['day_of_week'] = new_transactions['transaction_time'].dt.dayofweek

new_X = new_transactions[features]
new_X_scaled = scaler.transform(new_X)

# Predict fraud
new_transactions['is_fraud'] = rf_model.predict(new_X_scaled)
new_transactions['fraud_probability'] = rf_model.predict_proba(new_X_scaled)[:, 1]

# Display new transactions with predictions
print("New Transactions with Fraud Predictions:")
print(new_transactions.head().to_string(index=False))
# Save the results to a CSV file
new_transactions.to_csv('predicted_fraud_transactions.csv', index=False)

Dataset Overview:
   transaction_id  amount transaction_type  customer_id     transaction_time  \
0               1    1000         Purchase         1001  2023-01-01 10:00:00   
1               2    1500         Transfer         1002  2023-01-01 11:00:00   
2               3    2000         Purchase         1003  2023-01-01 12:00:00   
3               4    3000       Withdrawal         1004  2023-01-01 13:00:00   
4               5    5000         Transfer         1005  2023-01-01 14:00:00   

   is_fraud  
0         0  
1         1  
2         0  
3         0  
4         1  


  df.fillna(method='ffill', inplace=True)


Model Performance Metrics:
+---------------+-------+
|     Metric    | Value |
+---------------+-------+
|    Accuracy   |  0.93 |
|   Precision   |  0.83 |
|     Recall    |  1.00 |
|    F1-Score   |  0.91 |
| ROC AUC Score |  0.93 |
+---------------+-------+
Confusion Matrix:
+------------------+---------------------+-----------------+
|                  | Predicted Non-Fraud | Predicted Fraud |
+------------------+---------------------+-----------------+
| Actual Non-Fraud |          18         |        2        |
|   Actual Fraud   |          0          |        10       |
+------------------+---------------------+-----------------+
New Transactions with Fraud Predictions:
 transaction_id  amount  transaction_type  customer_id    transaction_time  is_fraud  hour  day  month  day_of_week  fraud_probability
              1    1000                 0         1001 2023-01-01 10:00:00         0    10    1      1            6               0.16
              2    1500                 1   