In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import time

def load_data(features_path, labels_path):
    """
    Load features and labels from given file paths.

    Args:
    features_path (str): Path to the features file.
    labels_path (str): Path to the labels file.

    Returns:
    np.ndarray: Loaded features.
    np.ndarray: Loaded labels.
    """
    features = np.load(features_path)
    labels = np.load(labels_path)
    return features, labels

def reshape_features(features):
    """
    Reshape features from 4D to 2D array.

    Args:
    features (np.ndarray): Features array.

    Returns:
    np.ndarray: Reshaped features array.
    """
    return features.reshape(features.shape[0], -1)

def train_and_evaluate(classifiers, X_train, X_test, y_train, y_test):
    """
    Train classifiers and evaluate their performance.

    Args:
    classifiers (dict): Dictionary of classifiers.
    X_train (np.ndarray): Training features.
    X_test (np.ndarray): Testing features.
    y_train (np.ndarray): Training labels.
    y_test (np.ndarray): Testing labels.

    Returns:
    dict: Dictionary containing performance metrics for each classifier.
    """
    results = {}
    for name, clf in classifiers.items():
        start_time = time.time()  # Start timer
        clf.fit(X_train, y_train)  # Train classifier
        train_time = time.time() - start_time  # Calculate training time
        start_time = time.time()  # Start timer for prediction
        y_pred = clf.predict(X_test)  # Predict using test set
        predict_time = time.time() - start_time  # Calculate prediction time
        # Calculate metrics for testing data
        test_accuracy = accuracy_score(y_test, y_pred)
        test_precision = precision_score(y_test, y_pred, average='weighted')
        test_recall = recall_score(y_test, y_pred, average='weighted')
        test_f1 = f1_score(y_test, y_pred, average='weighted')
        test_conf_matrix = confusion_matrix(y_test, y_pred)
        # Calculate metrics for training data
        train_y_pred = clf.predict(X_train)
        train_accuracy = accuracy_score(y_train, train_y_pred)
        train_precision = precision_score(y_train, train_y_pred, average='weighted')
        train_recall = recall_score(y_train, train_y_pred, average='weighted')
        train_f1 = f1_score(y_train, train_y_pred, average='weighted')
        train_conf_matrix = confusion_matrix(y_train, train_y_pred)
        # Store results
        results[name] = {
            "test_metrics": {
                "accuracy": test_accuracy,
                "precision": test_precision,
                "recall": test_recall,
                "f1": test_f1,
                "confusion_matrix": test_conf_matrix
            },
            "train_metrics": {
                "accuracy": train_accuracy,
                "precision": train_precision,
                "recall": train_recall,
                "f1": train_f1,
                "confusion_matrix": train_conf_matrix
            },
            "train_time": train_time,
            "predict_time": predict_time
        }
    return results

def main():
# Paths to the features and labels files
    features_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Lab04\extracted_features.npy'
    labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Lab04\labels.npy'

    # Load features and labels
    features, labels = load_data(features_path, labels_path)

    # Reshape features
    features = reshape_features(features)

    # Splitting the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

    # Dictionary of classifiers
    classifiers = {
        "CatBoost": CatBoostClassifier(verbose=0),
        "XGBoost": XGBClassifier(),
        "SVM": SVC(),
        "Random Forest": RandomForestClassifier(),
        "AdaBoost": AdaBoostClassifier(),
        "Decision Tree": DecisionTreeClassifier(),
        "Naive Bayes": GaussianNB()
    }

    # Train and evaluate classifiers
    results = train_and_evaluate(classifiers, X_train, X_test, y_train, y_test)

    # Print results
    for name, metrics in results.items():
        print(f"{name} Performance Metrics:")
        print("Testing Metrics:")
        print(f"Accuracy: {metrics['test_metrics']['accuracy']:.2f}")
        print(f"Precision: {metrics['test_metrics']['precision']:.2f}")
        print(f"Recall: {metrics['test_metrics']['recall']:.2f}")
        print(f"F1 Score: {metrics['test_metrics']['f1']:.2f}")
        print("Confusion Matrix:")
        print(metrics['test_metrics']['confusion_matrix'])
        print("\n")
        print("Training Metrics:")
        print(f"Accuracy: {metrics['train_metrics']['accuracy']:.2f}")
        print(f"Precision: {metrics['train_metrics']['precision']:.2f}")
        print(f"Recall: {metrics['train_metrics']['recall']:.2f}")
        print(f"F1 Score: {metrics['train_metrics']['f1']:.2f}")
        print("Confusion Matrix:")
        print(metrics['train_metrics']['confusion_matrix'])
        print("\n")
        print(f"Training Time: {metrics['train_time']:.2f} seconds")
        print(f"Prediction Time: {metrics['predict_time']:.2f} seconds")
        print("\n")

if __name__ == "__main__":
    main()

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


CatBoost Performance Metrics:
Testing Metrics:
Accuracy: 0.52
Precision: 0.53
Recall: 0.52
F1 Score: 0.52
Confusion Matrix:
[[ 108    5   54   10    2    6    3   65   66   26   56    6    1]
 [   7  513   18   49   56   21   13   60  109    8   61    4   17]
 [   9   18  936   16    7   31    5  148  196   73  180    6    3]
 [   4   51   30  326   34   18   19   56  134    7   60   11   10]
 [   1   67   10   30  418    6   14   36   62    0   26    1    4]
 [  15   30   94    8    5  474    2   39   76   56  100    0    2]
 [   1   26    8   46   14    5  149   43   60    2   30    3    3]
 [   9   20  165   25    7   19   15  901  109   21  286   12    7]
 [  16   61  226   56   21   32   12   93 1208   23   77    7   30]
 [  13    8  179   10    1   23    2   52   51  293  103    0    3]
 [  19   34  162   21   16   58    8  308  200   76 1123   11    9]
 [   7   26   32   47    8   17    2  112   79    6   95   43    3]
 [   0   13    4   24   11    6    6   10   62    2   11    

In [2]:
import numpy as np
from sklearn.decomposition import PCA

# Paths to the features and labels files
features_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Lab04\extracted_features.npy'
labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Lab04\labels.npy'# Load features and labels
features = np.load(features_path)
labels = np.load(labels_path)

# Reshape features from 4D (n_samples, height, width, channels) to 2D (n_samples, height*width*channels)
features = features.reshape(features.shape[0], -1)

# Applying PCA to capture 99% of the variance
pca = PCA(0.99)
features_pca = pca.fit_transform(features)

# Saving the reduced features to a new file
reduced_features_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Reduced Features\Features_reduced.npy'
np.save(reduced_features_path, features_pca)

# Optionally, save the labels if you need to keep them aligned with the reduced features for later use
reduced_labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Reduced Features\Labels_reduced.npy'
np.save(reduced_labels_path, labels)

# Number of components selected
n_components = pca.n_components_
print(f"Number of principal components selected to explain at least 99% of the variance: {n_components}")

Number of principal components selected to explain at least 99% of the variance: 270


In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import time

def load_data(features_path, labels_path):
    """
    Load features and labels from given file paths.

    Args:
    features_path (str): Path to the features file.
    labels_path (str): Path to the labels file.

    Returns:
    np.ndarray: Loaded features.
    np.ndarray: Loaded labels.
    """
    features = np.load(features_path)
    labels = np.load(labels_path)
    return features, labels

def reshape_features(features):
    """
    Reshape features from 4D to 2D array.

    Args:
    features (np.ndarray): Features array.

    Returns:
    np.ndarray: Reshaped features array.
    """
    return features.reshape(features.shape[0], -1)

def train_and_evaluate(classifiers, X_train, X_test, y_train, y_test):
    """
    Train classifiers and evaluate their performance.

    Args:
    classifiers (dict): Dictionary of classifiers.
    X_train (np.ndarray): Training features.
    X_test (np.ndarray): Testing features.
    y_train (np.ndarray): Training labels.
    y_test (np.ndarray): Testing labels.

    Returns:
    dict: Dictionary containing performance metrics for each classifier.
    """
    results = {}
    for name, clf in classifiers.items():
        start_time = time.time()  # Start timer
        clf.fit(X_train, y_train)  # Train classifier
        train_time = time.time() - start_time  # Calculate training time
        start_time = time.time()  # Start timer for prediction
        y_pred = clf.predict(X_test)  # Predict using test set
        predict_time = time.time() - start_time  # Calculate prediction time
        # Calculate metrics for testing data
        test_accuracy = accuracy_score(y_test, y_pred)
        test_precision = precision_score(y_test, y_pred, average='weighted')
        test_recall = recall_score(y_test, y_pred, average='weighted')
        test_f1 = f1_score(y_test, y_pred, average='weighted')
        test_conf_matrix = confusion_matrix(y_test, y_pred)
        # Calculate metrics for training data
        train_y_pred = clf.predict(X_train)
        train_accuracy = accuracy_score(y_train, train_y_pred)
        train_precision = precision_score(y_train, train_y_pred, average='weighted')
        train_recall = recall_score(y_train, train_y_pred, average='weighted')
        train_f1 = f1_score(y_train, train_y_pred, average='weighted')
        train_conf_matrix = confusion_matrix(y_train, train_y_pred)
        # Store results
        results[name] = {
            "test_metrics": {
                "accuracy": test_accuracy,
                "precision": test_precision,
                "recall": test_recall,
                "f1": test_f1,
                "confusion_matrix": test_conf_matrix
            },
            "train_metrics": {
                "accuracy": train_accuracy,
                "precision": train_precision,
                "recall": train_recall,
                "f1": train_f1,
                "confusion_matrix": train_conf_matrix
            },
            "train_time": train_time,
            "predict_time": predict_time
        }
    return results

def main():
   # Paths to the features and labels files
    features_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Reduced 512\Features_reduced.npy'
    labels_path = 'D:\SEM-4\ML\CODES\Machine-Learning\Reduced 512\Labels_reduced.npy'

    # Load features and labels
    features, labels = load_data(features_path, labels_path)

    # Reshape features
    features = reshape_features(features)

    # Splitting the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

    # Dictionary of classifiers
    classifiers = {
        "CatBoost": CatBoostClassifier(verbose=0),
        "XGBoost": XGBClassifier(),
        "SVM": SVC(),
        "Random Forest": RandomForestClassifier(),
        "AdaBoost": AdaBoostClassifier(),
        "Decision Tree": DecisionTreeClassifier(),
        "Naive Bayes": GaussianNB()
    }

    # Train and evaluate classifiers
    results = train_and_evaluate(classifiers, X_train, X_test, y_train, y_test)

    # Print results
    for name, metrics in results.items():
        print(f"{name} Performance Metrics:")
        print("Testing Metrics:")
        print(f"Accuracy: {metrics['test_metrics']['accuracy']:.2f}")
        print(f"Precision: {metrics['test_metrics']['precision']:.2f}")
        print(f"Recall: {metrics['test_metrics']['recall']:.2f}")
        print(f"F1 Score: {metrics['test_metrics']['f1']:.2f}")
        print("Confusion Matrix:")
        print(metrics['test_metrics']['confusion_matrix'])
        print("\n")
        print("Training Metrics:")
        print(f"Accuracy: {metrics['train_metrics']['accuracy']:.2f}")
        print(f"Precision: {metrics['train_metrics']['precision']:.2f}")
        print(f"Recall: {metrics['train_metrics']['recall']:.2f}")
        print(f"F1 Score: {metrics['train_metrics']['f1']:.2f}")
        print("Confusion Matrix:")
        print(metrics['train_metrics']['confusion_matrix'])
        print("\n")
        print(f"Training Time: {metrics['train_time']:.2f} seconds")
        print(f"Prediction Time: {metrics['predict_time']:.2f} seconds")
        print("\n")

if __name__ == "__main__":
    main()

CatBoost Performance Metrics:
Testing Metrics:
Accuracy: 0.50
Precision: 0.51
Recall: 0.50
F1 Score: 0.50
Confusion Matrix:
[[  99    6   55    9    2    6    5   68   68   31   55    3    1]
 [   2  504   20   43   70   33   15   54  108    4   76    0    7]
 [  10   18  880   12    9   32    6  149  224   63  218    4    3]
 [   2   49   30  312   34   22   14   56  166    4   52    9   10]
 [   1   78   12   34  393   10    8   42   71    0   21    1    4]
 [  16   46   94    7    2  477    2   47   75   48   84    2    1]
 [   3   35   15   44   12    5  137   36   62    3   35    1    2]
 [  12   17  138   30    3   39    6  903  123   24  293    5    3]
 [  11   65  235   70   24   22    9   92 1166   24   99   10   35]
 [  19   10  165    4    0   25    5   48   74  271  115    2    0]
 [  16   36  157   33   13   58    9  310  222   60 1119    9    3]
 [   4   22   34   45    9   16    5  116   86    7  107   23    3]
 [   0   19    7   25    7    8    8   14   74    2   16    