### Import necessary libraries

In [15]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
import os
import joblib
import warnings

import sys
sys.path.append('../scripts')  # Appending the path to access the scripts folder
from model_training import * 
from DL_models import *

In [16]:
# ignore warnings to hide from being displayed
warnings.filterwarnings('ignore')
warnings.filterwarnings("ignore", module="mlflow.sklearn")

#### Mlflow Autologging 

In [17]:
# Enable MLflow autologging 
mlflow.pytorch.autolog()
mlflow.sklearn.autolog(log_models=True)

### Load Data

In [18]:
# Load the cleaned datasets (from Task 1)
fraud_data = pd.read_csv('C:/Users/Administrator/Documents/kifiya/Week_8/clean_data/merged_data.csv')
creditcard_data = pd.read_csv('C:/Users/Administrator/Documents/kifiya/Week_8/clean_data/Preprocessed_Creditcard_Data.csv')

print('The Merged fraud data')
display(fraud_data.head())
print('credit data')
display(creditcard_data.head())


The Merged fraud data


Unnamed: 0,user_id,signup_time,purchase_time,purchase_value,device_id,sex,age,ip_address,class,lower_bound_ip_address,...,country_United States,country_Uruguay,country_Uzbekistan,country_Vanuatu,country_Venezuela,country_Viet Nam,country_Virgin Islands (U.S.),country_Yemen,country_Zambia,country_Zimbabwe
0,247547.0,2015-06-28 03:00:34,2015-08-09 03:57:29,47.0,KIXYSVCHIPQBR,0,30.0,16778860.0,0.0,16778240.0,...,False,False,False,False,False,False,False,False,False,False
1,220737.0,2015-01-28 14:21:11,2015-02-11 20:28:28,15.0,PKYOWQKWGJNJI,0,34.0,16842050.0,0.0,16809984.0,...,False,False,False,False,False,False,False,False,False,False
2,390400.0,2015-03-19 20:49:09,2015-04-11 23:41:23,44.0,LVCSXLISZHVUO,1,29.0,16843660.0,0.0,16843264.0,...,False,False,False,False,False,False,False,False,False,False
3,69592.0,2015-02-24 06:11:57,2015-05-23 16:40:14,55.0,UHAUHNXXUADJE,0,30.0,16938730.0,0.0,16924672.0,...,False,False,False,False,False,False,False,False,False,False
4,174987.0,2015-07-07 12:58:11,2015-11-03 04:04:30,51.0,XPGPMOHIDRMGE,0,37.0,16971980.0,0.0,16941056.0,...,False,False,False,False,False,False,False,False,False,False


credit data


Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V23,V24,V25,V26,V27,V28,Amount,Class,time_in_days,Amount_scaled
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0,0.0,0.2442
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0,0.0,-0.342584
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0,1.2e-05,1.1589
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0,1.2e-05,0.139886
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0,2.3e-05,-0.073813


## Feature engineering

### 1. For fraud data

In [19]:
# Create a new column 'is_night' to flag transactions occurring between midnight and 6 AM or after 10 PM
fraud_data['is_night'] = fraud_data['hour_of_day'].apply(lambda x: 1 if x < 6 or x > 22 else 0)

# Calculate the average purchase value for each user
user_avg_purchase = fraud_data.groupby('user_id')['purchase_value'].mean()

# Map the average purchase value to each user's transactions
fraud_data['user_avg_purchase'] = fraud_data['user_id'].map(user_avg_purchase)

# Calculate the absolute difference between the individual purchase value and the user's average purchase value
fraud_data['purchase_deviation'] = abs(fraud_data['purchase_value'] - fraud_data['user_avg_purchase'])

# Count the number of times each device ID appears in the dataset
device_count = fraud_data['device_id'].value_counts()

# Map the device usage count to each transaction
fraud_data['device_usage_count'] = fraud_data['device_id'].map(device_count)

# Identify devices used by multiple users
suspicious_devices = fraud_data['device_id'].value_counts()[fraud_data['device_id'].value_counts() > 1].index

# Flag transactions from suspicious devices
fraud_data['suspicious_device'] = fraud_data['device_id'].apply(lambda x: 1 if x in suspicious_devices else 0)

# Calculate the total number of transactions for each user
user_transaction_count = fraud_data.groupby('user_id')['transaction_count'].sum()

# Map the total transaction count to each user's transactions
fraud_data['total_transaction_count'] = fraud_data['user_id'].map(user_transaction_count)

# Calculate the average fraud rate for each device
fraud_rate_by_device = fraud_data.groupby('device_id')['class'].mean()

# Map the device fraud rate to each transaction
fraud_data['device_fraud_rate'] = fraud_data['device_id'].map(fraud_rate_by_device)

# Calculate the purchase value per transaction, adjusted to avoid division by zero
fraud_data['transaction_intensity'] = fraud_data['purchase_value'] / (fraud_data['transaction_count'] + 1)


# Drop unnecessary columns for training
fraud_data = fraud_data.drop(columns=['signup_time', 'purchase_time', 'user_id', 'device_id', 
                                     'ip_address', 'lower_bound_ip_address', 'upper_bound_ip_address'], errors='ignore')
print('The Merged fraud data')
display(fraud_data.head())


The Merged fraud data


Unnamed: 0,purchase_value,sex,age,class,transaction_count,hour_of_day,day_of_week,purchase_value_scaled,source_Direct,source_SEO,...,country_Zambia,country_Zimbabwe,is_night,user_avg_purchase,purchase_deviation,device_usage_count,suspicious_device,total_transaction_count,device_fraud_rate,transaction_intensity
0,47.0,0,30.0,0.0,1,3,6,0.549607,False,True,...,False,False,1,47.0,0.0,1,0,1,0.0,23.5
1,15.0,0,34.0,0.0,1,20,2,-1.197335,False,True,...,False,False,0,15.0,0.0,1,0,1,0.0,7.5
2,44.0,1,29.0,0.0,1,23,5,0.385831,False,False,...,False,False,1,44.0,0.0,2,1,1,0.0,22.0
3,55.0,0,30.0,0.0,1,16,5,0.986342,True,False,...,False,False,0,55.0,0.0,1,0,1,0.0,27.5
4,51.0,0,37.0,0.0,1,4,1,0.767974,False,True,...,False,False,1,51.0,0.0,1,0,1,0.0,25.5


### 2. For credit card data

In [20]:
# Convert seconds to hours
creditcard_data['hour_of_day'] = (creditcard_data['Time'] % 86400) // 3600  

# Identify weekends based on 'time_in_days'
creditcard_data['is_weekend'] = creditcard_data['time_in_days'].apply(lambda x: 1 if x % 7 in [5, 6] else 0)

# Calculate the number of transactions per day
creditcard_data['transactions_per_day'] = creditcard_data.groupby('time_in_days')['Class'].transform('count')

# Calculate the total amount spent per day
daily_amount_sum = creditcard_data.groupby('time_in_days')['Amount'].transform('sum')

# Calculate the ratio of each transaction's amount to the daily total
creditcard_data['amount_ratio'] = creditcard_data['Amount'] / (daily_amount_sum + 1)

# Apply log transformation to the 'Amount' column
creditcard_data['log_amount'] = np.log1p(creditcard_data['Amount'])

# Calculate the z-score for the 'Amount' column
mean_amount = creditcard_data['Amount'].mean()
std_amount = creditcard_data['Amount'].std()
creditcard_data['amount_deviation'] = (creditcard_data['Amount'] - mean_amount) / std_amount

# Calculate the Euclidean norm for all PCA components (V1 to V28)
creditcard_data['component_magnitude'] = (creditcard_data[[f'V{i}' for i in range(1, 29)]].pow(2).sum(axis=1)).pow(0.5)

print('credit data')
display(creditcard_data.head())


credit data


Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,Class,time_in_days,Amount_scaled,hour_of_day,is_weekend,transactions_per_day,amount_ratio,log_amount,amount_deviation,component_magnitude
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,0,0.0,0.2442,0.0,0,2,0.975931,5.01476,0.244199,3.911559
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,0,0.0,-0.342584,0.0,0,2,0.017546,1.305626,-0.342583,2.674524
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0,1.2e-05,1.1589,0.0,0,2,0.752564,5.939276,1.158898,6.080512
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,0,1.2e-05,0.139886,0.0,0,2,0.245449,4.824306,0.139886,4.284356
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,0,2.3e-05,-0.073813,0.0,0,2,0.93745,4.262539,-0.073813,3.565131


## Data Augmentation

In [21]:
# Specify the target column for fraud
target_column_fraud= 'class'
print("Fraud data:")
fraud_data = augment_and_merge_data(original_data=fraud_data, target_column=target_column_fraud)
# Specify the target column for credit
target_column_credit= 'Class'
print("Credit card data:")
creditcard_data = augment_and_merge_data(original_data=creditcard_data, target_column=target_column_credit)

# Display results
print("Augmented fraud data:")
display(fraud_data.head())

print("Augmented credit card data:")
display(creditcard_data.head())


Fraud data:
Applying SMOTE to generate synthetic samples...


2024/11/17 13:17:13 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '883ccf2a46a54e4ab435ccf9db3eccfc', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Merging synthetic and original data...
Shuffling merged dataset...
Original data shape: (129146, 202)
Synthetic data shape: (104610, 202)
Augmented data shape: (233756, 202)
Data augmentation and merging completed.
Credit card data:
Applying SMOTE to generate synthetic samples...


2024/11/17 13:17:40 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '21473f5215c34a21bdf3adeb2a10053a', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Merging synthetic and original data...
Shuffling merged dataset...
Original data shape: (283726, 40)
Synthetic data shape: (282780, 40)
Augmented data shape: (566506, 40)
Data augmentation and merging completed.
Augmented fraud data:


Unnamed: 0,purchase_value,sex,age,class,transaction_count,hour_of_day,day_of_week,purchase_value_scaled,source_Direct,source_SEO,...,country_Zambia,country_Zimbabwe,is_night,user_avg_purchase,purchase_deviation,device_usage_count,suspicious_device,total_transaction_count,device_fraud_rate,transaction_intensity
79787,28.0,1.0,40.0,0.0,1.0,12.0,2.0,-0.48764,False,False,...,False,False,0.0,28.0,0.0,1.0,0.0,1.0,0.0,14.0
10438,22.0,1.0,37.0,0.0,1.0,13.0,0.0,-0.815192,False,True,...,False,False,0.0,22.0,0.0,1.0,0.0,1.0,0.0,11.0
154853,67.0,1.0,41.0,1.0,1.0,13.0,1.0,1.641445,False,True,...,False,False,0.0,67.0,0.0,1.0,0.0,1.0,1.0,33.5
7323,56.0,1.0,23.0,0.0,1.0,10.0,2.0,1.040934,False,False,...,False,False,0.0,56.0,0.0,1.0,0.0,1.0,0.0,28.0
192673,20.0,0.0,29.0,1.0,1.0,18.0,3.0,-0.924375,False,True,...,False,False,0.0,20.0,0.0,12.0,1.0,1.0,0.916667,10.0


Augmented credit card data:


Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,Class,time_in_days,Amount_scaled,hour_of_day,is_weekend,transactions_per_day,amount_ratio,log_amount,amount_deviation,component_magnitude
554816,48422.549558,1.084606,0.690925,-0.843441,1.359081,0.168721,-0.474593,-0.332476,0.095933,-0.14736,...,1.0,0.560446,-0.349507,13.0,0.0,2.0,0.017088,0.670001,-0.349506,4.833821
19821,30629.0,-0.923736,1.108088,1.560499,-0.268841,0.075925,-0.230605,0.703012,0.024225,-0.071223,...,0.0,0.354502,-0.281482,8.0,0.0,3.0,0.127516,2.943913,-0.281481,3.22688
184234,126440.0,-1.431701,-1.397938,-1.991814,-2.281973,3.728501,2.114475,-0.799982,1.200862,-1.737078,...,0.0,1.463426,-0.349533,11.0,0.0,4.0,0.001544,0.667829,-0.349532,6.68975
503641,65366.690879,-3.101443,1.37393,-3.074656,3.390527,-5.66537,2.153805,-0.594211,-3.431825,-1.063649,...,1.0,0.756559,4.977794,17.969485,0.0,4.0,0.568129,7.192499,4.977785,14.123692
261118,160300.0,-1.330448,1.119757,-2.572612,1.622074,0.970933,-1.086979,0.505613,0.616914,-0.848251,...,0.0,1.855324,-0.184396,20.0,0.0,3.0,0.904619,3.768153,-0.184396,5.255461


## Prepare data for training

In [22]:
# Separate features and targets for Fraud Data
X_fraud = fraud_data.drop(columns=['class'])  # Feature set
y_fraud = fraud_data['class']  # Target

# Separate features and targets for Credit Card Data
X_credit = creditcard_data.drop(columns=['Class'])  # Feature set
y_credit = creditcard_data['Class']  # Target

# Apply SMOTE to balance the dataset
smote = SMOTE(random_state=42)
X_fraud, y_fraud = smote.fit_resample(X_fraud, y_fraud)
X_credit, y_credit = smote.fit_resample(X_credit, y_credit)

#for scikit-learn models
y_credit_sci=y_credit.values.ravel()
y_fraud_sci=y_fraud.values.ravel()
X_fraud_sci=X_fraud
X_credit_sci=X_credit



# Train-Test Split for both datasets
X_fraud_train, X_fraud_test, y_fraud_train, y_fraud_test = train_test_split(X_fraud, y_fraud, test_size=0.25, random_state=42)
X_credit_train, X_credit_test, y_credit_train, y_credit_test = train_test_split(X_credit, y_credit, test_size=0.25, random_state=42)

# train split for scikit_learn models
X_fraud_train_sci, X_fraud_test_sci, y_fraud_train_sci, y_fraud_test_sci = train_test_split(X_fraud_sci, y_fraud_sci, test_size=0.25, random_state=42)
X_credit_train_sci, X_credit_test_sci, y_credit_train_sci, y_credit_test_sci = train_test_split(X_credit_sci, y_credit_sci, test_size=0.25, random_state=42)

# Normalize the data (Standard Scaling)
scaler = StandardScaler()
X_fraud_train = scaler.fit_transform(X_fraud_train)
X_fraud_test = scaler.transform(X_fraud_test)
X_credit_train = scaler.fit_transform(X_credit_train)
X_credit_test = scaler.transform(X_credit_test)

# Convert the datasets into PyTorch tensors
X_fraud_train_tensor = torch.tensor(X_fraud_train, dtype=torch.float32)
y_fraud_train_tensor = torch.tensor(y_fraud_train.values, dtype=torch.float32)
X_fraud_test_tensor = torch.tensor(X_fraud_test, dtype=torch.float32)
y_fraud_test_tensor = torch.tensor(y_fraud_test.values, dtype=torch.float32)

X_credit_train_tensor = torch.tensor(X_credit_train, dtype=torch.float32)
y_credit_train_tensor = torch.tensor(y_credit_train.values, dtype=torch.float32)
X_credit_test_tensor = torch.tensor(X_credit_test, dtype=torch.float32)
y_credit_test_tensor = torch.tensor(y_credit_test.values, dtype=torch.float32)

# DataLoader for batching
batch_size = 32
train_loader_fraud = torch.utils.data.DataLoader(TensorDataset(X_fraud_train_tensor, y_fraud_train_tensor), batch_size=batch_size, shuffle=True)
test_loader_fraud = torch.utils.data.DataLoader(TensorDataset(X_fraud_test_tensor, y_fraud_test_tensor), batch_size=batch_size)

train_loader_credit = torch.utils.data.DataLoader(TensorDataset(X_credit_train_tensor, y_credit_train_tensor), batch_size=batch_size, shuffle=True)
test_loader_credit = torch.utils.data.DataLoader(TensorDataset(X_credit_test_tensor, y_credit_test_tensor), batch_size=batch_size)


## Initializing models

In [23]:
# Initialize models for both fraud and credit datasets
input_size_fraud = X_fraud_train.shape[1]
input_size_credit = X_credit_train.shape[1]

mlp_model_fraud = MLPModel(input_size_fraud)
cnn_model_fraud = CNNModel(input_size_fraud)
rnn_model_fraud = RNNModel(input_size_fraud)
lstm_model_fraud = LSTMModel(input_size_fraud)

mlp_model_credit = MLPModel(input_size_credit)
cnn_model_credit = CNNModel(input_size_credit)
rnn_model_credit = RNNModel(input_size_credit)
lstm_model_credit = LSTMModel(input_size_credit)

# LogisticRegression, RandomForest, GradientBoosting, DecisionTree
logistic_fraud = LogisticRegression(max_iter=1000)
rf_fraud = RandomForestClassifier(n_estimators=100)
gb_fraud = GradientBoostingClassifier(n_estimators=100)
dt_fraud = DecisionTreeClassifier()

logistic_credit = LogisticRegression(max_iter=1000)
rf_credit = RandomForestClassifier(n_estimators=100)
gb_credit = GradientBoostingClassifier(n_estimators=100)
dt_credit = DecisionTreeClassifier()

# Set loss function and optimizer
criterion = nn.BCELoss()

# Train and evaluate each model for both fraud and credit data
models = {
    "MLP_Fraud": mlp_model_fraud,
    "CNN_Fraud": cnn_model_fraud,
    "RNN_Fraud": rnn_model_fraud,
    "LSTM_Fraud": lstm_model_fraud,
    "MLP_Credit": mlp_model_credit,
    "CNN_Credit": cnn_model_credit,
    "RNN_Credit": rnn_model_credit,
    "LSTM_Credit": lstm_model_credit,
    "LogisticRegression_Fraud": logistic_fraud,
    "RandomForest_Fraud": rf_fraud,
    "GradientBoosting_Fraud": gb_fraud,
    "DecisionTree_Fraud": dt_fraud,
    "LogisticRegression_Credit": logistic_credit,
    "RandomForest_Credit": rf_credit,
    "GradientBoosting_Credit": gb_credit,
    "DecisionTree_Credit": dt_credit
}

### save directory trained models

In [24]:
# saving the models
save_folder = 'C:/Users/Administrator/Documents/kifiya/Week_8/saved_models'
os.makedirs(save_folder, exist_ok=True) 

        # Function to save PyTorch models
def save_pytorch_model(model, model_name):
    save_path = os.path.join(save_folder, f'{model_name}.pt')
    torch.save(model, save_path) 
    print(f'{model_name} saved at {save_path}')



# Function to save scikit-learn models
def save_sklearn_model(model, model_name):
    save_path = os.path.join(save_folder, f'{model_name}.joblib')
    joblib.dump(model, save_path)
    print(f'{model_name} saved at {save_path}')



## Train, Evaluate and Save models

In [25]:
# Training and evaluating each model with MLflow tracking
# Modify the existing model loop to include saving logic
for model_name, model in models.items():
    if isinstance(model, nn.Module):  # For PyTorch models
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        print(f"\nTraining {model_name}...")
        train_model(model, train_loader_fraud if "Fraud" in model_name else train_loader_credit, optimizer, criterion)
        print(f"Evaluating {model_name}...")
        evaluate_model(model, test_loader_fraud if "Fraud" in model_name else test_loader_credit)
        save_pytorch_model(model, model_name)  # Save PyTorch model
        print('----------------------------------------------------------------------------------------------------------------------------------------------------')
    else:  # For scikit-learn models
        print(f"\nTraining {model_name}...")
        X_train = X_fraud_train_sci if "Fraud" in model_name else X_credit_train_sci
        y_train = y_fraud_train_sci if "Fraud" in model_name else y_credit_train_sci
        X_test = X_fraud_test_sci if "Fraud" in model_name else X_credit_test_sci
        y_test = y_fraud_test_sci if "Fraud" in model_name else y_credit_test_sci
        train_sklearn_model(model, X_train, y_train)
        print(f"Evaluating {model_name}...")
        evaluate_sklearn_model(model, X_test, y_test,model_name)
        save_sklearn_model(model, model_name)  # Save scikit-learn model
        print('-----------------------------------------------------------------------------------------------------------------------------------------------------')


Training MLP_Fraud...
Epoch [1/20], Loss: 0.0422
Epoch [2/20], Loss: 0.0285
Epoch [3/20], Loss: 0.0275
Epoch [4/20], Loss: 0.0269
Epoch [5/20], Loss: 0.0263
Epoch [6/20], Loss: 0.0261
Epoch [7/20], Loss: 0.0256
Epoch [8/20], Loss: 0.0253
Epoch [9/20], Loss: 0.0250
Epoch [10/20], Loss: 0.0247
Epoch [11/20], Loss: 0.0243
Epoch [12/20], Loss: 0.0240
Epoch [13/20], Loss: 0.0236
Epoch [14/20], Loss: 0.0233
Epoch [15/20], Loss: 0.0229
Epoch [16/20], Loss: 0.0226
Epoch [17/20], Loss: 0.0222
Epoch [18/20], Loss: 0.0217
Epoch [19/20], Loss: 0.0212
Epoch [20/20], Loss: 0.0207
Evaluating MLP_Fraud...
Accuracy: 0.9917
Precision: 0.9918
Recall: 0.9918
F1 Score: 0.9917
MLP_Fraud saved at C:/Users/Administrator/Documents/kifiya/Week_8/saved_models\MLP_Fraud.pt
----------------------------------------------------------------------------------------------------------------------------------------------------

Training CNN_Fraud...
Epoch [1/20], Loss: 0.0397
Epoch [2/20], Loss: 0.0286
Epoch [3/20], Los

2024/11/17 15:17:33 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '0854c6eab72f4a67866df585f354ab80', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Accuracy: 0.9998
Precision: 0.9998
Recall: 0.9998
F1 Score: 0.9998
LSTM_Credit saved at C:/Users/Administrator/Documents/kifiya/Week_8/saved_models\LSTM_Credit.pt
----------------------------------------------------------------------------------------------------------------------------------------------------

Training LogisticRegression_Fraud...
Evaluating LogisticRegression_Fraud...


2024/11/17 15:20:47 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '8d44a35e9b9e4cc6bdb1b815062e453a', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Accuracy: 0.9915
Precision: 0.9916
Recall: 0.9916
F1 Score: 0.9915
LogisticRegression_Fraud saved at C:/Users/Administrator/Documents/kifiya/Week_8/saved_models\LogisticRegression_Fraud.joblib
-----------------------------------------------------------------------------------------------------------------------------------------------------

Training RandomForest_Fraud...
Evaluating RandomForest_Fraud...


2024/11/17 15:24:01 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '049c07ac2ea6479e948cbd6bfabe6820', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Accuracy: 0.9924
Precision: 0.9924
Recall: 0.9924
F1 Score: 0.9924
RandomForest_Fraud saved at C:/Users/Administrator/Documents/kifiya/Week_8/saved_models\RandomForest_Fraud.joblib
-----------------------------------------------------------------------------------------------------------------------------------------------------

Training GradientBoosting_Fraud...
Evaluating GradientBoosting_Fraud...


2024/11/17 15:27:47 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '052f7ddfcae54951b29406040062d4e9', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Accuracy: 0.9913
Precision: 0.9914
Recall: 0.9913
F1 Score: 0.9913
GradientBoosting_Fraud saved at C:/Users/Administrator/Documents/kifiya/Week_8/saved_models\GradientBoosting_Fraud.joblib
-----------------------------------------------------------------------------------------------------------------------------------------------------

Training DecisionTree_Fraud...
Evaluating DecisionTree_Fraud...


2024/11/17 15:29:47 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '4869e01ad9284dd0886c7f46293306ab', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Accuracy: 0.9923
Precision: 0.9923
Recall: 0.9923
F1 Score: 0.9923
DecisionTree_Fraud saved at C:/Users/Administrator/Documents/kifiya/Week_8/saved_models\DecisionTree_Fraud.joblib
-----------------------------------------------------------------------------------------------------------------------------------------------------

Training LogisticRegression_Credit...
Evaluating LogisticRegression_Credit...


2024/11/17 15:31:30 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '33c324edcb2d4a9abd8f6ca9079ac787', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Accuracy: 0.9785
Precision: 0.9786
Recall: 0.9785
F1 Score: 0.9785
LogisticRegression_Credit saved at C:/Users/Administrator/Documents/kifiya/Week_8/saved_models\LogisticRegression_Credit.joblib
-----------------------------------------------------------------------------------------------------------------------------------------------------

Training RandomForest_Credit...
Evaluating RandomForest_Credit...


2024/11/17 15:42:18 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '28fb6fe66db24d8795f72f7fee9fa423', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Accuracy: 0.9999
Precision: 0.9999
Recall: 0.9999
F1 Score: 0.9999
RandomForest_Credit saved at C:/Users/Administrator/Documents/kifiya/Week_8/saved_models\RandomForest_Credit.joblib
-----------------------------------------------------------------------------------------------------------------------------------------------------

Training GradientBoosting_Credit...
Evaluating GradientBoosting_Credit...


2024/11/17 16:02:50 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '191c4cc2f81b4052aa791ce86f4db8f0', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


Accuracy: 0.9870
Precision: 0.9871
Recall: 0.9871
F1 Score: 0.9870
GradientBoosting_Credit saved at C:/Users/Administrator/Documents/kifiya/Week_8/saved_models\GradientBoosting_Credit.joblib
-----------------------------------------------------------------------------------------------------------------------------------------------------

Training DecisionTree_Credit...
Evaluating DecisionTree_Credit...
Accuracy: 0.9985
Precision: 0.9985
Recall: 0.9985
F1 Score: 0.9985
DecisionTree_Credit saved at C:/Users/Administrator/Documents/kifiya/Week_8/saved_models\DecisionTree_Credit.joblib
-----------------------------------------------------------------------------------------------------------------------------------------------------
