In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
import time

In [2]:
class BaseClassifier:
    def __init__(self, data_path):
        self.data = pd.read_csv(data_path)
        self.train_data, self.test_data = self.split_data()
        self.train_features, self.train_labels, self.test_features, self.test_labels = self.prepare_features()

    def split_data(self, test_size=0.3):
        return train_test_split(self.data, test_size=test_size)

    def prepare_features(self):
        train_features = self.train_data[[
            'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
        train_labels = self.train_data.Species
        test_features = self.test_data[[
            'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
        test_labels = self.test_data.Species
        return train_features, train_labels, test_features, test_labels

    def evaluate(self, predictions):
        accuracy = metrics.accuracy_score(predictions, self.test_labels)
        return {
            'accuracy': accuracy,
            'train_time': self.training_time,
            'predict_time': self.prediction_time,
        }

    def train(self):
        start_time = time.time() * 1000  # milliseconds from epoch
        self.model.fit(self.train_features, self.train_labels)
        self.training_time = time.time() * 1000 - start_time

    def predict(self):
        start_time = time.time() * 1000
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() * 1000 - start_time
        return predictions

In [3]:
class SVMClassifier(BaseClassifier):
    def __init__(self, data_path):
        super().__init__(data_path)
        # Composition: Compose an SVM model inside the class
        self.model = svm.SVC()

    def train(self):
        start_time = time.time() * 1000  # milliseconds from epoch
        self.model.fit(self.train_features, self.train_labels)
        self.training_time = time.time() * 1000 - start_time  # milliseconds duration

    def predict(self):
        start_time = time.time() * 1000
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() * 1000 - start_time
        return predictions

In [4]:
class LogisticRegressionClassifier(BaseClassifier):
    def __init__(self, data_path):
        super().__init__(data_path)
        # Composition: Compose a Logistic Regression model inside the class
        self.model = LogisticRegression(max_iter=1000)

    def train(self):
        start_time = time.time() * 1000  # milliseconds from epoch
        self.model.fit(self.train_features, self.train_labels)
        self.training_time = time.time() * 1000 - start_time  # milliseconds duration

    def predict(self):
        start_time = time.time() * 1000
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() * 1000 - start_time
        return predictions

In [5]:
class DecisionTreeClassifierStyle(BaseClassifier):
    def __init__(self, data_path):
        super().__init__(data_path)
        # Composition: Compose a Decision Tree model inside the class
        self.model = DecisionTreeClassifier()

    def train(self):
        start_time = time.time() * 1000  # milliseconds from epoch
        self.model.fit(self.train_features, self.train_labels)
        self.training_time = time.time() * 1000 - start_time  # milliseconds duration

    def predict(self):
        start_time = time.time() * 1000
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() * 1000 - start_time
        return predictions

In [6]:
class PerformanceStats:
    def __init__(self):
        self.total_accuracy = 0
        self.total_train_time = 0
        self.total_predict_time = 0
        self.runs = 0

    def add_run(self, accuracy, train_time, predict_time):
        self.total_accuracy += accuracy
        self.total_train_time += train_time
        self.total_predict_time += predict_time
        self.runs += 1

    def get_averages(self):
        return {
            'accuracy': self.total_accuracy / self.runs * 100,
            'train_time': self.total_train_time / self.runs,
            'predict_time': self.total_predict_time / self.runs,
        }

In [7]:
def run_classification():
    classifier_classes = [
        SVMClassifier,
        LogisticRegressionClassifier,
        DecisionTreeClassifierStyle,
    ]

    num_runs = 100

    for Classifier in classifier_classes:
        print(f"\n{Classifier.__name__}")
        stats = PerformanceStats()

        for _ in range(num_runs):
            classifier = Classifier("./Input_Data/Iris.csv")
            classifier.train()
            predictions = classifier.predict()
            result = classifier.evaluate(predictions)

            stats.add_run(result['accuracy'],
                          result['train_time'],
                          result['predict_time'])

        averages = stats.get_averages()
        print(f"Average Training Time: {averages['train_time']:.3f} ms")
        print(f"Average Prediction Time: {averages['predict_time']:.3f} ms")
        print(f"Average Accuracy: {averages['accuracy']:.2f}%")
        print(f"(averaged over {num_runs} runs)")

In [8]:
run_classification()


SVMClassifier
Average Training Time: 1.507 ms
Average Prediction Time: 0.924 ms
Average Accuracy: 95.93%
(averaged over 100 runs)

LogisticRegressionClassifier
Average Training Time: 38.234 ms
Average Prediction Time: 1.325 ms
Average Accuracy: 95.98%
(averaged over 100 runs)

DecisionTreeClassifierStyle
Average Training Time: 1.322 ms
Average Prediction Time: 0.695 ms
Average Accuracy: 94.87%
(averaged over 100 runs)
