In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

In [None]:
!gdown 1KzMOGTCqCX8kvdz-54TdsyuwS97S8N7f --output creditcard.csv

Downloading...
From (original): https://drive.google.com/uc?id=1KzMOGTCqCX8kvdz-54TdsyuwS97S8N7f
From (redirected): https://drive.google.com/uc?id=1KzMOGTCqCX8kvdz-54TdsyuwS97S8N7f&confirm=t&uuid=81378367-7024-4575-87f5-ba1b8cb6822d
To: /content/creditcard.csv
100% 151M/151M [00:01<00:00, 117MB/s]


In [None]:
data = pd.read_csv('creditcard.csv')
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [None]:
# Features and target
X = data.drop('Class', axis=1)
y = data['Class']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
# Initialize models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC()
}


In [None]:
# Define hyperparameter grids
param_grids = {
    'Logistic Regression': {
        'C': [0.1, 1, 10],
        'solver': ['lbfgs', 'liblinear'],
        'max_iter': [1000]
    },
    'Random Forest': {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20],
        'min_samples_split': [2, 5]
    },
    'SVM': {
        'C': [0.1, 1, 10],
        'kernel': ['rbf', 'linear'],
        'gamma': ['scale', 'auto']
    }
}

In [None]:
# Dictionary to store results
results = {
    'Model': [],
    'Accuracy': [],
    'Precision': [],
    'Recall': [],
    'F1-Score': [],
    'Best Parameters': []
}

In [None]:
# Function to evaluate model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    return {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred, average='weighted', zero_division=0),
        'Recall': recall_score(y_test, y_pred, average='weighted', zero_division=0),
        'F1-Score': f1_score(y_test, y_pred, average='weighted', zero_division=0)
    }

In [None]:
# Train and evaluate models with GridSearchCV
print("Training models with GridSearchCV...")
for model_name, model in models.items():
    print(f"\n{model_name}:")

    # GridSearchCV
    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grids[model_name],
        cv=5,
        scoring='f1_weighted',  # Use F1-score due to class imbalance
        n_jobs=-1
    )

    grid_search.fit(X_train, y_train)

    # Evaluate best model
    metrics = evaluate_model(grid_search.best_estimator_, X_test, y_test)

    # Store results
    results['Model'].append(model_name + ' (GridSearchCV)')
    results['Accuracy'].append(metrics['Accuracy'])
    results['Precision'].append(metrics['Precision'])
    results['Recall'].append(metrics['Recall'])
    results['F1-Score'].append(metrics['F1-Score'])
    results['Best Parameters'].append(grid_search.best_params_)

    # Print results
    print(f"Best Parameters: {grid_search.best_params_}")
    print(f"Metrics: {metrics}")
    print(f"Classification Report:\n{classification_report(y_test, grid_search.best_estimator_.predict(X_test), zero_division=0)}")

Training models with GridSearchCV...

Logistic Regression:
Best Parameters: {'C': 1, 'max_iter': 1000, 'solver': 'liblinear'}
Metrics: {'Accuracy': 0.9991573329588147, 'Precision': 0.9990915062313275, 'Recall': 0.9991573329588147, 'F1-Score': 0.9991041423931677}
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.83      0.64      0.72        98

    accuracy                           1.00     56962
   macro avg       0.91      0.82      0.86     56962
weighted avg       1.00      1.00      1.00     56962


Random Forest:


KeyboardInterrupt: 

In [None]:
# Train and evaluate models with RandomizedSearchCV
print("\nTraining models with RandomizedSearchCV...")
for model_name, model in models.items():
    print(f"\n{model_name}:")

    # RandomizedSearchCV
    random_search = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_grids[model_name],
        n_iter=8,
        cv=5,
        scoring='f1_weighted',  # Use F1-score due to class imbalance
        n_jobs=-1,
        random_state=42
    )

    random_search.fit(X_train, y_train)

    # Evaluate best model
    metrics = evaluate_model(random_search.best_estimator_, X_test, y_test)

    # Store results
    results['Model'].append(model_name + ' (RandomizedSearchCV)')
    results['Accuracy'].append(metrics['Accuracy'])
    results['Precision'].append(metrics['Precision'])
    results['Recall'].append(metrics['Recall'])
    results['F1-Score'].append(metrics['F1-Score'])
    results['Best Parameters'].append(random_search.best_params_)

    # Print results
    print(f"Best Parameters: {random_search.best_params_}")
    print(f"Metrics: {metrics}")
    print(f"Classification Report:\n{classification_report(y_test, random_search.best_estimator_.predict(X_test), zero_division=0)}")


Training models with RandomizedSearchCV...

Logistic Regression:
Best Parameters: {'solver': 'liblinear', 'max_iter': 1000, 'C': 1}
Metrics: {'Accuracy': 0.9991573329588147, 'Precision': 0.9990915062313275, 'Recall': 0.9991573329588147, 'F1-Score': 0.9991041423931677}
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.83      0.64      0.72        98

    accuracy                           1.00     56962
   macro avg       0.91      0.82      0.86     56962
weighted avg       1.00      1.00      1.00     56962


Random Forest:


In [None]:
# Create results DataFrame
results_df = pd.DataFrame(results)

# Find best performing model (based on F1-score due to class imbalance)
best_model_idx = results_df['F1-Score'].idxmax()
best_model = results_df.iloc[best_model_idx]



In [None]:
# Print summary
print("\nSummary of Results:")
print(results_df)
print("\nBest Performing Model:")
print(f"Model: {best_model['Model']}")
print(f"Accuracy: {best_model['Accuracy']:.4f}")
print(f"Precision: {best_model['Precision']:.4f}")
print(f"Recall: {best_model['Recall']:.4f}")
print(f"F1-Score: {best_model['F1-Score']:.4f}")
print(f"Best Parameters: {best_model['Best Parameters']}")

In [None]:
# Visualize results using a bar plot
plt.figure(figsize=(10, 6))
plt.bar(results_df['Model'], results_df['F1-Score'], color='lightcoral')
plt.xticks(rotation=45, ha='right')
plt.xlabel('Model')
plt.ylabel('F1-Score')
plt.title('Model Performance Comparison (F1-Score)')
plt.tight_layout()
plt.show()