In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
import time
import tracemalloc
import psutil

In [2]:
class BaseClassifier:
    def __init__(self, data_path):
        self.data = pd.read_csv(data_path)
        self.train_data, self.test_data = self.split_data()
        self.train_features, self.train_labels, self.test_features, self.test_labels = self.prepare_features()

    def split_data(self, test_size=0.3):
        return train_test_split(self.data, test_size=test_size)

    def prepare_features(self):
        train_features = self.train_data[[
            'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
        train_labels = self.train_data.Species
        test_features = self.test_data[[
            'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
        test_labels = self.test_data.Species
        return train_features, train_labels, test_features, test_labels

    def evaluate(self, predictions):
        accuracy = metrics.accuracy_score(predictions, self.test_labels)
        return {
            'accuracy': accuracy,
            'train_time': self.training_time,
            'predict_time': self.prediction_time,
            'memory': self.memory_usage
        }

    def time_method(self, method, *args, **kwargs):
        """Helper method to time any class method and return (result, duration_ms)"""
        start_time = time.time()
        result = method(*args, **kwargs)
        end_time = time.time()
        duration_ms = (end_time - start_time) * 1000  # Convert to milliseconds
        return result, duration_ms
    
    def train(self):
        process = psutil.Process()
        mem_before = process.memory_info().rss / 1024  # Resident Set Size in bytes

        start_time = time.time()
        self.model.fit(self.train_features, self.train_labels)
        train_time = (time.time() - start_time) * 1000  # ms

        mem_after = process.memory_info().rss / 1024
        self.memory_usage = (mem_after - mem_before)   # Convert to KB
        self.training_time = train_time
        
    def predict(self):
        start_time = time.time()
        predictions = self.model.predict(self.test_features)
        self.prediction_time = (time.time() - start_time) * 1000  # ms
        return predictions

In [3]:
class SVMClassifier(BaseClassifier):
    def __init__(self, data_path):
        super().__init__(data_path)
        # Composition: Compose an SVM model inside the class
        self.model = svm.SVC()  

    def train(self):
        tracemalloc.start()
        start_time = time.time()
        
        self.model.fit(self.train_features, self.train_labels)
        
        self.training_time = time.time() - start_time
        self.memory_usage = tracemalloc.get_traced_memory()[1]
        tracemalloc.stop()

    def predict(self):
        start_time = time.time()
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() - start_time
        return predictions

In [4]:
class LogisticRegressionClassifier(BaseClassifier):
    def __init__(self, data_path):
        super().__init__(data_path)
        # Composition: Compose a Logistic Regression model inside the class
        self.model = LogisticRegression()

    def train(self):
        tracemalloc.start()
        start_time = time.time()

        self.model.fit(self.train_features, self.train_labels)

        self.training_time = time.time() - start_time
        self.memory_usage = tracemalloc.get_traced_memory()[1]
        tracemalloc.stop()

    def predict(self):
        start_time = time.time()
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() - start_time
        return predictions

In [5]:
class DecisionTreeClassifierStyle(BaseClassifier):
    def __init__(self, data_path):
        super().__init__(data_path)
        # Composition: Compose a Decision Tree model inside the class
        self.model = DecisionTreeClassifier()

    def train(self):
        tracemalloc.start()
        start_time = time.time()

        self.model.fit(self.train_features, self.train_labels)

        self.training_time = time.time() - start_time
        self.memory_usage = tracemalloc.get_traced_memory()[1]
        tracemalloc.stop()

    def predict(self):
        start_time = time.time()
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() - start_time
        return predictions

In [6]:
class KNNClassifier(BaseClassifier):
    def __init__(self, data_path, n_neighbors=3):
        super().__init__(data_path)
        # Composition: Compose a KNN model inside the class
        self.model = KNeighborsClassifier(n_neighbors=n_neighbors)

    def train(self):
        tracemalloc.start()
        start_time = time.time()

        self.model.fit(self.train_features, self.train_labels)

        self.training_time = time.time() - start_time
        self.memory_usage = tracemalloc.get_traced_memory()[1]
        tracemalloc.stop()

    def predict(self):
        start_time = time.time()
        predictions = self.model.predict(self.test_features)
        self.prediction_time = time.time() - start_time
        return predictions

In [7]:
class PerformanceStats:
    def __init__(self):
        self.total_accuracy = 0
        self.total_train_time = 0
        self.total_predict_time = 0
        self.total_memory = 0
        self.runs = 0

    def add_run(self, accuracy, train_time, predict_time, memory):
        self.total_accuracy += accuracy
        self.total_train_time += train_time
        self.total_predict_time += predict_time
        self.total_memory += memory
        self.runs += 1

    def get_averages(self):
        return {
            'accuracy': self.total_accuracy / self.runs,
            'train_time': self.total_train_time / self.runs,
            'predict_time': self.total_predict_time / self.runs,
            'memory': self.total_memory / self.runs
        }

In [8]:
def run_classification():
    classifiers = [
        SVMClassifier("./Input_Data/Iris.csv"),
        LogisticRegressionClassifier("./Input_Data/Iris.csv"),
        DecisionTreeClassifierStyle("./Input_Data/Iris.csv"),
        KNNClassifier("./Input_Data/Iris.csv", n_neighbors=3)
    ]

    num_runs = 100

    for classifier in classifiers:
        print(f"\n{classifier.__class__.__name__}")
        stats = {
            'accuracy': [],
            'train_time': [],
            'predict_time': [],
            'memory': []
        }

        for _ in range(num_runs):
            # Force Python garbage collection
            import gc
            gc.collect()

            # Train and measure
            classifier.train()

            # Predict and evaluate
            predictions = classifier.predict()
            result = classifier.evaluate(predictions)

            # Store results
            for key in stats:
                stats[key].append(result[key])

        # Print averages
        print(f"Average Training time: {np.mean(stats['train_time']):.3f} ms")
        print(f"Average Memory usage: {np.mean(stats['memory']):.2f} KB")
        print(
            f"Average Prediction time: {np.mean(stats['predict_time']):.3f} ms")
        print(f"Average accuracy: {np.mean(stats['accuracy']) * 100:.2f}%")
        print(f"(Averaged over {num_runs} runs)")

In [9]:
run_classification()


SVMClassifier
Average Training time: 0.004 ms
Average Memory usage: 19154.33 KB
Average Prediction time: 0.001 ms
Average accuracy: 93.33%
(Averaged over 100 runs)

LogisticRegressionClassifier
Average Training time: 0.085 ms
Average Memory usage: 62999.96 KB
Average Prediction time: 0.002 ms
Average accuracy: 95.56%
(Averaged over 100 runs)

DecisionTreeClassifierStyle
Average Training time: 0.004 ms
Average Memory usage: 22380.36 KB
Average Prediction time: 0.001 ms
Average accuracy: 88.84%
(Averaged over 100 runs)

KNNClassifier
Average Training time: 0.004 ms
Average Memory usage: 17194.72 KB
Average Prediction time: 0.003 ms
Average accuracy: 97.78%
(Averaged over 100 runs)
