# Algorithm Investigation

##### Authors: Mateus and Mughees
##### Step 3: Investigate Algorithms

In [1]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


from creditcard_preparation import create_creditcard_pipeline, prepare_creditcard_data

In [2]:
# Functions to evaluate an algorithm

def evaluate_algo(algo, X_train, y_train, X_dev, y_dev):
    # Create the pipeline

    pipeline = create_creditcard_pipeline()

    # Combine the pipeline and the algorithm
    pipeline_with_algo = Pipeline(steps=[
        ('preprocessor', pipeline),
        ('algo', algo)
    ])

    pipeline_with_algo.fit(X_train, y_train)
    y_pred = pipeline_with_algo.predict(X_dev)
    accuracy = accuracy_score(y_dev, y_pred)
    precision = precision_score(y_dev, y_pred)
    recall = recall_score(y_dev, y_pred)
    f1 = f1_score(y_dev, y_pred)
    return [accuracy, precision, recall, f1]

# Function for LogisticRegression


def evaluate_lr(X_train, y_train, X_dev, y_dev):
    print("Evaluating LogisticRegression...")
    return evaluate_algo(LogisticRegression(max_iter=1000, random_state=42), X_train, y_train, X_dev, y_dev)

# Function for SVC


def evaluate_svc(X_train, y_train, X_dev, y_dev):
    print("Evaluating SVC...")
    return evaluate_algo(SVC(random_state=42), X_train, y_train, X_dev, y_dev)

# Function for KNeighborsClassifier


def evaluate_knn(X_train, y_train, X_dev, y_dev):
    print("Evaluating KNeighborsClassifier...")
    return evaluate_algo(KNeighborsClassifier(), X_train, y_train, X_dev, y_dev)

# Function for DecisionTreeClassifier


def evaluate_dt(X_train, y_train, X_dev, y_dev):
    print("Evaluating DecisionTreeClassifier...")
    return evaluate_algo(DecisionTreeClassifier(random_state=42), X_train, y_train, X_dev, y_dev)

# Function for RandomForestClassifier


def evaluate_rf(X_train, y_train, X_dev, y_dev):
    print("Evaluating RandomForestClassifier...")
    return evaluate_algo(RandomForestClassifier(random_state=42), X_train, y_train, X_dev, y_dev)




# Prepare credit card data for train

X_train, X_dev, X_test, y_train, y_dev, y_test = prepare_creditcard_data(ratios=((1/10), (1/10)))



# Evaluate algorithms
lr_scores = evaluate_lr(X_train, y_train, X_dev, y_dev)
svc_scores = evaluate_svc(X_train, y_train, X_dev, y_dev)
knn_scores = evaluate_knn(X_train, y_train, X_dev, y_dev)
dt_scores = evaluate_dt(X_train, y_train, X_dev, y_dev)
rf_scores = evaluate_rf(X_train, y_train, X_dev, y_dev)

# Create DataFrame to store scores
scores_df = pd.DataFrame([lr_scores, svc_scores, knn_scores, dt_scores, rf_scores],
                         columns=['Accuracy', 'Precision', 'Recall', 'F1'],
                         index=['LogisticRegression', 'SVC', 'KNeighborsClassifier',
                                'DecisionTreeClassifier', 'RandomForestClassifier'])

print(scores_df)

Evaluating LogisticRegression...
Evaluating SVC...
Evaluating KNeighborsClassifier...
Evaluating DecisionTreeClassifier...
Evaluating RandomForestClassifier...
                        Accuracy  Precision    Recall        F1
LogisticRegression      0.964406   0.977698  0.950555  0.963936
SVC                     0.997186   0.996805  0.997575  0.997190
KNeighborsClassifier    0.998452   0.996917  1.000000  0.998456
DecisionTreeClassifier  0.997819   0.997297  0.998348  0.997822
RandomForestClassifier  0.999912   0.999824  1.000000  0.999912
