In [1]:
# 1. Import Libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV

In [None]:
# 2. Load Preprocessed Data
X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')

In [3]:
# 3. Initialize Models
logreg_model = LogisticRegression(random_state=42)
rf_model = RandomForestClassifier(random_state=42)

In [4]:
# Logistic Regression
logreg_model.fit(X_train, y_train)
logreg_train_pred = logreg_model.predict(X_train)  
logreg_test_pred = logreg_model.predict(X_test)

In [5]:
# Random Forest
rf_model.fit(X_train, y_train)
rf_train_pred = rf_model.predict(X_train)         
rf_test_pred = rf_model.predict(X_test)

In [6]:
# Logistic Regression Metrics
print("\nLogistic Regression Model Evaluation:")
print("Training Accuracy:", accuracy_score(y_train, logreg_train_pred))
print("Testing Accuracy:", accuracy_score(y_test, logreg_test_pred))
print("Classification Report (Test Data):\n", classification_report(y_test, logreg_test_pred))
print("Confusion Matrix (Test Data):\n", confusion_matrix(y_test, logreg_test_pred))


Logistic Regression Model Evaluation:
Training Accuracy: 0.9598091025798849
Testing Accuracy: 0.9594639748166646
Classification Report (Test Data):
               precision    recall  f1-score   support

           0       0.94      0.98      0.96     56750
           1       0.98      0.94      0.96     56976

    accuracy                           0.96    113726
   macro avg       0.96      0.96      0.96    113726
weighted avg       0.96      0.96      0.96    113726

Confusion Matrix (Test Data):
 [[55727  1023]
 [ 3587 53389]]


In [7]:
# Random Forest Metrics
print("\nRandom Forest Model Evaluation:")
print("Training Accuracy:", accuracy_score(y_train, rf_train_pred))
print("Testing Accuracy:", accuracy_score(y_test, rf_test_pred))
print("Classification Report (Test Data):\n", classification_report(y_test, rf_test_pred))
print("Confusion Matrix (Test Data):\n", confusion_matrix(y_test, rf_test_pred))


Random Forest Model Evaluation:
Training Accuracy: 1.0
Testing Accuracy: 0.9998856901675958
Classification Report (Test Data):
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56750
           1       1.00      1.00      1.00     56976

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

Confusion Matrix (Test Data):
 [[56737    13]
 [    0 56976]]


In [8]:
import joblib

In [9]:
# 2. Save the Trained Models

# Save Logistic Regression model
joblib.dump(logreg_model, 'logistic_regression_model.pkl')

# Save Random Forest model
joblib.dump(rf_model, 'random_forest_model.pkl')

print("✅ Models saved successfully!")

✅ Models saved successfully!


In [10]:
logreg_model = joblib.load('logistic_regression_model.pkl')
rf_model = joblib.load('random_forest_model.pkl')

In [11]:
# 3. Compare Model Performances

# Logistic Regression
logreg_pred = logreg_model.predict(X_test)
logreg_acc = accuracy_score(y_test, logreg_pred)

# Random Forest
rf_pred = rf_model.predict(X_test)
rf_acc = accuracy_score(y_test, rf_pred)

In [12]:
comparison_df = pd.DataFrame({
    'Model': ['Logistic Regression', 'Random Forest'],
    'Testing Accuracy': [logreg_acc, rf_acc]
})

In [13]:
print("\n📊 Model Comparison Table:")
print(comparison_df)


📊 Model Comparison Table:
                 Model  Testing Accuracy
0  Logistic Regression          0.959464
1        Random Forest          0.999886


In [14]:
# 4. Save the Comparison Table
comparison_df.to_csv('model_comparison_report.csv', index=False)
print("\n✅ Model Comparison Report saved as 'model_comparison_report.csv'!")


✅ Model Comparison Report saved as 'model_comparison_report.csv'!


In [3]:
"""
Project Structure:

your_project_folder/
├── data/
│   ├── your_dataset.csv
├── models/
│   ├── logistic_regression_model.pkl
│   ├── random_forest_model.pkl
├── notebooks/
│   ├── eda.ipynb
│   ├── data_preprocessing.ipynb
│   ├── model_training.ipynb
├── reports/
│   └── model_comparison_report.csv
├── README.md 
└── requirements.txt
"""

'\nProject Structure:\n\nyour_project_folder/\n├── data/\n│   ├── your_dataset.csv\n├── models/\n│   ├── logistic_regression_model.pkl\n│   ├── random_forest_model.pkl\n├── notebooks/\n│   ├── eda.ipynb\n│   ├── data_preprocessing.ipynb\n│   ├── model_training.ipynb\n├── reports/\n│   └── model_comparison_report.csv\n├── README.md \n└── requirements.txt\n'