A2. Use cross-validation techniques (RandomizedSearchCV()) technique to tune the 
hyperparameters for your perceptron and MLP networks.

In [1]:
import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import accuracy_score

def load_data():
    """
    Load feature vectors and labels from files.
    """
    X = np.load(r"D:\SEM-4\ML\CODES\Machine-Learning\Lab04\extracted_features.npy")
    y = np.load(r"D:\SEM-4\ML\CODES\Machine-Learning\Lab04\labels.npy")
    return X, y

def split_data(X, y):
    """
    Split the dataset into training and testing sets.
    """
    return train_test_split(X, y, test_size=0.2, random_state=42)

def setup_search_params():
    """
    Setup the hyperparameter grids for both Perceptron and MLP.
    """
    param_grid_perceptron = {
        'max_iter': [1000, 3000, 5000],
        'eta0': [0.01, 0.1, 1.0],
        'penalty': [None, 'l2', 'l1', 'elasticnet']
    }

    param_grid_mlp = {
        'hidden_layer_sizes': [(50,), (100,), (50,50)],
        'activation': ['tanh', 'relu'],
        'max_iter': [200, 500, 1000],
        'learning_rate_init': [0.001, 0.01, 0.1]
    }

    return param_grid_perceptron, param_grid_mlp

def train_and_evaluate(model, X_train, y_train, X_test, y_test):
    """
    Train the model and evaluate it on both training and testing sets.
    """
    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    return train_accuracy, test_accuracy

def main():
    """
    Main function to load data, create models, and perform training and evaluation.
    """
    X, y = load_data()
    X_train, X_test, y_train, y_test = split_data(X, y)
    param_grid_perceptron, param_grid_mlp = setup_search_params()

    # Perceptron with Randomized Search CV
    random_search_perceptron = RandomizedSearchCV(
        Perceptron(), param_distributions=param_grid_perceptron, 
        n_iter=10, scoring='accuracy', cv=5, verbose=1, random_state=42)
    train_accuracy_perceptron, test_accuracy_perceptron = train_and_evaluate(
        random_search_perceptron, X_train, y_train, X_test, y_test)
    print("Perceptron best parameters:", random_search_perceptron.best_params_)
    print("Perceptron training accuracy:", train_accuracy_perceptron)
    print("Perceptron test accuracy:", test_accuracy_perceptron)

    # MLP with Randomized Search CV
    random_search_mlp = RandomizedSearchCV(
        MLPClassifier(), param_distributions=param_grid_mlp, 
        n_iter=10, scoring='accuracy', cv=5, verbose=1, random_state=42)
    train_accuracy_mlp, test_accuracy_mlp = train_and_evaluate(
        random_search_mlp, X_train, y_train, X_test, y_test)
    print("MLP best parameters:", random_search_mlp.best_params_)
    print("MLP training accuracy:", train_accuracy_mlp)
    print("MLP test accuracy:", test_accuracy_mlp)

if __name__ == "__main__":
    main()


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Perceptron best parameters: {'penalty': None, 'max_iter': 3000, 'eta0': 0.1}
Perceptron training accuracy: 0.3254705882352941
Perceptron test accuracy: 0.3115294117647059
Fitting 5 folds for each of 10 candidates, totalling 50 fits




MLP best parameters: {'max_iter': 1000, 'learning_rate_init': 0.01, 'hidden_layer_sizes': (50,), 'activation': 'relu'}
MLP training accuracy: 0.5925294117647059
MLP test accuracy: 0.5005882352941177


A3. Tabulate your results with various other classifiers such as Support Vector Machines, Decision 
Tree, RandomForest, CatBoost, AdaBoost, XGBoost, Naïve-Bayes. Tabulate your results for your 
problem using different performance metrics.

In [3]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from tabulate import tabulate

# Function to load data
def load_data(features_path, labels_path):
    features = np.load(features_path)
    labels = np.load(labels_path)
    return features, labels

# Function to reshape the feature array
def reshape_features(features):
    # Flatten the feature arrays from (num_samples, dim1, dim2, channels) to (num_samples, features)
    return features.reshape(features.shape[0], -1)

# Function to train classifiers and evaluate them
def evaluate_classifier(X_train, X_test, y_train, y_test, classifier):
    classifier.fit(X_train, y_train)
    y_pred_train = classifier.predict(X_train)
    y_pred_test = classifier.predict(X_test)

    # Performance metrics for training data
    accuracy_train = accuracy_score(y_train, y_pred_train)
    precision_train = precision_score(y_train, y_pred_train, average='weighted')
    recall_train = recall_score(y_train, y_pred_train, average='weighted')
    f1_train = f1_score(y_train, y_pred_train, average='weighted')
    
    # Performance metrics for test data
    accuracy_test = accuracy_score(y_test, y_pred_test)
    precision_test = precision_score(y_test, y_pred_test, average='weighted')
    recall_test = recall_score(y_test, y_pred_test, average='weighted')
    f1_test = f1_score(y_test, y_pred_test, average='weighted')

    return (accuracy_train, precision_train, recall_train, f1_train,
            accuracy_test, precision_test, recall_test, f1_test)

def main():
    features_path = r"D:\SEM-4\ML\CODES\Machine-Learning\Lab04\extracted_features.npy"
    labels_path = r"D:\SEM-4\ML\CODES\Machine-Learning\Lab04\labels.npy"
    features, labels = load_data(features_path, labels_path)

    # Reshape features
    features = reshape_features(features)

    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)
    
    classifiers = {
        "SVM": SVC(),
        "Decision Tree": DecisionTreeClassifier(),
        "Random Forest": RandomForestClassifier(),
        "AdaBoost": AdaBoostClassifier(),
        "XGBoost": XGBClassifier(),
        "CatBoost": CatBoostClassifier(verbose=0),  # verbose=0 to keep the output clean
        "Naive Bayes": GaussianNB()
    }

    results = []
    for name, clf in classifiers.items():
        metrics = evaluate_classifier(X_train, X_test, y_train, y_test, clf)
        results.append((name, *metrics))  # Corrected line

    # Sorting results by test accuracy for better presentation
    results.sort(key=lambda x: x[5], reverse=True)  # index 5 is test accuracy

    headers = ["Classifier", "Train Accuracy", "Train Precision", "Train Recall", "Train F1",
               "Test Accuracy", "Test Precision", "Test Recall", "Test F1"]
    print(tabulate(results, headers=headers))

if __name__ == "__main__":
    main()


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classifier       Train Accuracy    Train Precision    Train Recall    Train F1    Test Accuracy    Test Precision    Test Recall    Test F1
-------------  ----------------  -----------------  --------------  ----------  ---------------  ----------------  -------------  ---------
CatBoost               0.837479           0.843398        0.837479    0.837454         0.523608          0.527773       0.523608   0.516043
XGBoost                0.996773           0.996774        0.996773    0.996773         0.506039          0.517401       0.506039   0.497544
Random Forest          0.999966           0.999966        0.999966    0.999966         0.447686          0.472379       0.447686   0.427746
SVM                    0.42               0.462421        0.42        0.395505         0.410275          0.440147       0.410275   0.384816
AdaBoost               0.283597           0.270916        0.283597    0.271529         0.28102           0.266179       0.28102    0.268127
Naive Bayes         