In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
import time
import tracemalloc

In [2]:
class BaseClassifier:
    def __init__(self, data_path):
        self.data = pd.read_csv(data_path)
        self.train_data, self.test_data = self.split_data()
        self.train_features, self.train_labels, self.test_features, self.test_labels = self.prepare_features()

    def split_data(self, test_size=0.3):
        return train_test_split(self.data, test_size=test_size)

    def prepare_features(self):
        train_features = self.train_data[[
            'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
        train_labels = self.train_data.Species
        test_features = self.test_data[[
            'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
        test_labels = self.test_data.Species
        return train_features, train_labels, test_features, test_labels

    def evaluate(self, predictions, true_labels):
        accuracy = metrics.accuracy_score(predictions, true_labels)
        print(
            f'The accuracy of the {self.__class__.__name__} is: {accuracy * 100:.2f}%')
        return accuracy

    def time_method(self, method, *args, **kwargs):
        """Helper method to time any class method and return (result, duration_ms)"""
        start_time = time.time()
        result = method(*args, **kwargs)
        end_time = time.time()
        duration_ms = (end_time - start_time) * 1000  # Convert to milliseconds
        return result, duration_ms

In [3]:
class SVMClassifier(BaseClassifier):
    def __init__(self, data_path):
        super().__init__(data_path)
        # Composition: Compose an SVM model inside the class
        self.model = svm.SVC()  

    def train(self):
        tracemalloc.start()
        start_time = time.time()
        
        self.model.fit(self.train_features, self.train_labels)
        
        self.training_time = time.time() - start_time
        self.memory_usage = tracemalloc.get_traced_memory()[1]
        tracemalloc.stop()

    def predict(self):
        start_time = time.time()
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() - start_time
        return predictions

In [4]:
class LogisticRegressionClassifier(BaseClassifier):
    def __init__(self, data_path):
        super().__init__(data_path)
        # Composition: Compose a Logistic Regression model inside the class
        self.model = LogisticRegression()

    def train(self):
        tracemalloc.start()
        start_time = time.time()

        self.model.fit(self.train_features, self.train_labels)

        self.training_time = time.time() - start_time
        self.memory_usage = tracemalloc.get_traced_memory()[1]
        tracemalloc.stop()

    def predict(self):
        start_time = time.time()
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() - start_time
        return predictions

In [5]:
class DecisionTreeClassifierStyle(BaseClassifier):
    def __init__(self, data_path):
        super().__init__(data_path)
        # Composition: Compose a Decision Tree model inside the class
        self.model = DecisionTreeClassifier()

    def train(self):
        tracemalloc.start()
        start_time = time.time()

        self.model.fit(self.train_features, self.train_labels)

        self.training_time = time.time() - start_time
        self.memory_usage = tracemalloc.get_traced_memory()[1]
        tracemalloc.stop()

    def predict(self):
        start_time = time.time()
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() - start_time
        return predictions

In [6]:
class KNNClassifier(BaseClassifier):
    def __init__(self, data_path, n_neighbors=3):
        super().__init__(data_path)
        # Composition: Compose a KNN model inside the class
        self.model = KNeighborsClassifier(n_neighbors=n_neighbors)

    def train(self):
        tracemalloc.start()
        start_time = time.time()

        self.model.fit(self.train_features, self.train_labels)

        self.training_time = time.time() - start_time
        self.memory_usage = tracemalloc.get_traced_memory()[1]
        tracemalloc.stop()

    def predict(self):
        start_time = time.time()
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() - start_time
        return predictions

In [7]:
def run_classification():
    classifiers = [
        SVMClassifier("./Input_Data/Iris.csv"),
        LogisticRegressionClassifier("./Input_Data/Iris.csv"),
        DecisionTreeClassifierStyle("./Input_Data/Iris.csv"),
        KNNClassifier("./Input_Data/Iris.csv", n_neighbors=3)
    ]

    results = []

    for classifier in classifiers:
        print(f"\n{classifier.__class__.__name__}")

        # Time the training
        _, train_time = classifier.time_method(classifier.train)
        print(f"Training time: {train_time:.2f} ms")

        # Time the prediction
        predictions, predict_time = classifier.time_method(classifier.predict)
        print(f"Prediction time: {predict_time:.2f} ms")

        # Evaluate accuracy
        accuracy = classifier.evaluate(predictions, classifier.test_labels)

        # Store results
        results.append({
            'Classifier': classifier.__class__.__name__,
            'Accuracy': accuracy,
            'TrainTime_ms': train_time,
            'PredictTime_ms': predict_time,
            'TotalTime_ms': train_time + predict_time
        })

    # Print summary table
    print("\nSummary:")
    print(pd.DataFrame(results).to_string(index=False))

In [8]:
run_classification()


SVMClassifier
Training time: 15.36 ms
Prediction time: 3.18 ms
The accuracy of the SVMClassifier is: 86.67%

LogisticRegressionClassifier
Training time: 278.08 ms
Prediction time: 3.33 ms
The accuracy of the LogisticRegressionClassifier is: 95.56%

DecisionTreeClassifierStyle
Training time: 15.79 ms
Prediction time: 1.16 ms
The accuracy of the DecisionTreeClassifierStyle is: 93.33%

KNNClassifier
Training time: 22.01 ms
Prediction time: 16.85 ms
The accuracy of the KNNClassifier is: 97.78%

Summary:
                  Classifier  Accuracy  TrainTime_ms  PredictTime_ms  TotalTime_ms
               SVMClassifier  0.866667     15.362501        3.180504     18.543005
LogisticRegressionClassifier  0.955556    278.082848        3.334522    281.417370
 DecisionTreeClassifierStyle  0.933333     15.785456        1.162529     16.947985
               KNNClassifier  0.977778     22.010565       16.853809     38.864374


**Encapsulation**
  - encapsulate the model `train`, `predict`, and `evaluate` logic into a class
  - Ex: The `BaseClassifier` class encapsulates the data loading, splitting, and feature preparation logic. Each subclass (eg. `SVMClassifier`) encapsulates the training, prediction, and evaluation logic for a specific model

**Inheritance**:
  - Each subclass (eg. `SVMClassifier`, `LogisticRegressionClassifier`) inherits from the `BaseClassifier` class and implements the `train` and `predict` methods
  - Ex. The `SVMClassifier` class inherits the `split_data`, `prepare_features`, and `evaluate` methods from `BaseClassifier`

**Favor Composition over Inheritance**:
  - Makes the code more flexible and modular
  - Ex: The `SVMClassifier` class composes an `svm.SVC()` model instead of inheriting from it. Similarly, the `LogisticRegressionClassifier` composes a `LogisticRegression()` model.

**Maintainability**:
  - Ex: Adding a new classifier is as simple as creating a new subclass that composes the desired model and implements the `train` and `predict` methods

**DRY/KISS**:
  - Code is kept simple and straightforward, adhering to the KISS principle.
  - Ex: Instead of writing evaluation logic in each subclass, the `evaluate` method in `BaseClassifier` is reused by all subclasses.