#Notebook containing the code to train and test an SVM

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
import numpy as np

def train_svm_classifier(data_path):
    # Load your dataset
    data = pd.read_csv(data_path)

    # Convert labels to numeric if they are not already
    le = LabelEncoder()
    data['label'] = le.fit_transform(data['label'])

    # Split data into features (X) and labels (y)
    X = data.iloc[:, 1:]  # Assuming the first column is the label
    y = data['label']

        # Check for NaN values
    if X.isnull().values.any():
        print("NaN values found. Imputing with mean values.")

        # Create an imputer to replace NaNs with the mean
        imputer = SimpleImputer(missing_values=np.nan, strategy='mean')

        # Fit and transform the data
        X = imputer.fit_transform(X)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create an SVM classifier with the RBF kernel
    clf = svm.SVC(kernel='rbf', gamma=0.5, C=1)

    # Train the classifier
    clf.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = clf.predict(X_test)

    # Evaluate the accuracy of the classifier
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy:.2f}')

# Example usage
train_svm_classifier('typedCSV.csv')


NaN values found. Imputing with mean values.
Accuracy: 0.38


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import KNNImputer

def train_svm_classifier(data_path):
    # Load your dataset
    data = pd.read_csv(data_path)

    # Convert labels to numeric if they are not already
    le = LabelEncoder()
    data['label'] = le.fit_transform(data['label'])

    # Split data into features (X) and labels (y)
    X = data.iloc[:, 1:]  # Assuming the first column is the label
    y = data['label']

    # Check for NaN values
    if X.isnull().values.any():
        print("NaN values found. Imputing with KNN.")

        # Create an imputer to replace NaNs with KNN
        imputer = KNNImputer(n_neighbors=5)

        # Fit and transform the data
        X = imputer.fit_transform(X)

    # Scale the data
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define hyperparameter tuning space
    param_grid = {
        'C': [0.1, 1, 10],
        'gamma': ['scale', 0.1, 1],
        'kernel': ['linear', 'rbf']
    }

    # Perform grid search for hyperparameter tuning
    clf = svm.SVC()
    grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=3)
    grid_search.fit(X_train, y_train)

    # Get the best model
    best_clf = grid_search.best_estimator_

    # Make predictions on the test set
    y_pred = best_clf.predict(X_test)

    # Evaluate the accuracy and other metrics of the classifier
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Accuracy: {accuracy:.2f}')
    print("Classification Report:\n", classification_report(y_test, y_pred))

# Example usage
train_svm_classifier('typedCSV.csv')


NaN values found. Imputing with KNN.
