In [2]:
import pandas as pd

# Load the uploaded file to check its content
file_path = 'Classification iris.xlsx'
data = pd.read_excel(file_path)

# Display the first few rows of the dataset to understand its structure
data.head()


Unnamed: 0,instance_id,sepal length,sepal width,petal length,petal width,class
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [11]:
train_set['class'].value_counts()

class
Iris-setosa        35
Iris-versicolor    35
Iris-virginica     35
Name: count, dtype: int64

In [13]:
# Split the data into training and test sets (70%-30% split) while maintaining the original order per class

# First, let's count the number of instances per class
class_counts = data['class'].value_counts()

# Calculate 70% of instances per class
train_percentage = 0.7
train_counts = (class_counts * train_percentage).astype(int)

# Create empty lists for training and test data
train_set = pd.DataFrame()
test_set = pd.DataFrame()

# Perform the split per class
for iris_class in class_counts.index:
    class_data = data[data['class'] == iris_class]
    train_data = class_data.iloc[:train_counts[iris_class]]
    test_data = class_data.iloc[train_counts[iris_class]:]
    
    train_set = pd.concat([train_set, train_data], ignore_index=True)
    test_set = pd.concat([test_set, test_data], ignore_index=True)

# Output the instance IDs of the split sets
train_instance_ids = train_set['instance_id'].tolist()
test_instance_ids = test_set['instance_id'].tolist()

print(len(train_instance_ids))
 
print(len(test_instance_ids))


105
45


In [4]:
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Prepare the feature set (X) and the labels (y)
X_train = train_set[['sepal length', 'sepal width', 'petal length', 'petal width']]
y_train = train_set['class']
X_test = test_set[['sepal length', 'sepal width', 'petal length', 'petal width']]
y_test = test_set['class']

# Convert the class labels to numeric format using LabelEncoder
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Train the SVM model with a linear kernel and a large C value to simulate a hard margin
svm_model = svm.SVC(kernel='linear', C=1e5)
svm_model.fit(X_train, y_train_encoded)

# Make predictions on both the training and testing data
y_train_pred = svm_model.predict(X_train)
y_test_pred = svm_model.predict(X_test)

# Calculate training and testing errors (misclassification rates)
train_error = 1 - accuracy_score(y_train_encoded, y_train_pred)
test_error = 1 - accuracy_score(y_test_encoded, y_test_pred)

# Get the weight vector, bias, and support vector indices
w = svm_model.coef_[0]
b = svm_model.intercept_[0]
support_vectors = svm_model.support_

# Organize and display results
train_error, test_error, w, b, support_vectors


(0.01904761904761909,
 0.0,
 array([ 0.00945021,  0.53765319, -0.82682922, -0.38216595]),
 np.float64(0.7740991775898646),
 array([ 23,  24,  42,  55,  57,  62,  68,  76,  96,  97,  99, 103],
       dtype=int32))

In [5]:
# Calculate errors for each class separately
class_names = label_encoder.classes_
class_results = {}

# Loop through each class (one-vs-rest strategy)
for i, class_name in enumerate(class_names):
    # Create a binary classification for the current class (vs rest)
    y_train_binary = (y_train_encoded == i).astype(int)
    y_test_binary = (y_test_encoded == i).astype(int)
    
    # Train SVM for this class (one-vs-rest)
    svm_model_binary = svm.SVC(kernel='linear', C=1e5)
    svm_model_binary.fit(X_train, y_train_binary)
    
    # Predictions for this class
    y_train_pred_binary = svm_model_binary.predict(X_train)
    y_test_pred_binary = svm_model_binary.predict(X_test)
    
    # Calculate training and testing errors for this class
    train_error_class = 1 - accuracy_score(y_train_binary, y_train_pred_binary)
    test_error_class = 1 - accuracy_score(y_test_binary, y_test_pred_binary)
    
    # Get weight vector, bias, and support vectors for this class
    w_class = svm_model_binary.coef_[0]
    b_class = svm_model_binary.intercept_[0]
    support_vectors_class = svm_model_binary.support_
    
    # Store results for the class
    class_results[class_name] = {
        'training_error': train_error_class,
        'testing_error': test_error_class,
        'w': w_class,
        'b': b_class,
        'support_vector_indices': support_vectors_class
    }

# Check which classes are linearly separable (testing error == 0)
linearly_separable_classes = [class_name for class_name, results in class_results.items() if results['testing_error'] == 0]

#lass_results, linearly_separable_classes
# Function to format and output results according to the required format
def print_svm_results(train_error, test_error, class_results, linearly_separable_classes):
    print(f"Q2.2.2 Calculation using Standard SVM Model:")
    print(f"total training error: {train_error:.3f}, total testing error: {test_error:.3f}\n")
    
    for class_name, results in class_results.items():
        print(f"class {class_name.lower()}:")
        print(f"training error: {results['training_error']:.3f}, testing error: {results['testing_error']:.3f},")
        print(f"w: [{', '.join(f'{w_value:.6f}' for w_value in results['w'])}], b: {results['b']:.4f},")
        print(f"support vector indices: [{', '.join(map(str, results['support_vector_indices']))}]\n")
    
    print(f"Linear separable classes: {', '.join(linearly_separable_classes)}")

# Print results in the required format
print_svm_results(train_error, test_error, class_results, linearly_separable_classes)


Q2.2.2 Calculation using Standard SVM Model:
total training error: 0.019, total testing error: 0.000

class iris-setosa:
training error: 0.000, testing error: 0.000,
w: [0.009733, 0.537779, -0.827351, -0.382043], b: 0.7735,
support vector indices: [42, 23, 24]

class iris-versicolor:
training error: 0.248, testing error: 0.289,
w: [1.848600, -4.502374, -1.104339, 0.321285], b: 5.6773,
support vector indices: [1, 2, 8, 9, 13, 25, 27, 34, 71, 72, 73, 77, 78, 80, 81, 86, 88, 89, 90, 91, 92, 93, 96, 99, 100, 102, 103, 104, 35, 36, 37, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 52, 55, 58, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69]

class iris-virginica:
training error: 0.019, testing error: 0.000,
w: [-3.646504, -5.176364, 7.428526, 11.002416], b: -17.5704,
support vector indices: [55, 57, 62, 68, 96, 97, 99, 103]

Linear separable classes: Iris-setosa, Iris-virginica


In [6]:
from sklearn import svm
from sklearn.metrics import accuracy_score
import numpy as np

# Slack variable function
def compute_slack(X, y, w, b):
    f_x = np.dot(X, w) + b  # Compute f(X) = w.X + b
    slack = np.maximum(0, 1 - y * f_x)  # Compute slack variable as max(0, 1 - y * f(X))
    return slack

# Prepare the class results for each value of C
def train_svm_with_slack(C, X_train, y_train_encoded, X_test, y_test_encoded, class_names):
    class_results = {}
    models = {}

    for i, class_name in enumerate(class_names):
        # Create a binary classification for the current class (one-vs-rest)
        y_train_binary = (y_train_encoded == i).astype(int) * 2 - 1  # Convert to +1 and -1
        y_test_binary = (y_test_encoded == i).astype(int) * 2 - 1  # Convert to +1 and -1

        # Train SVM with linear kernel and slack (with given C)
        svm_model = svm.SVC(kernel='linear', C=C)
        svm_model.fit(X_train, y_train_binary)
        models[class_name] = svm_model

        # Predictions and errors
        y_train_pred = svm_model.predict(X_train)
        y_test_pred = svm_model.predict(X_test)

        train_error_class = 1 - accuracy_score(y_train_binary, y_train_pred)
        test_error_class = 1 - accuracy_score(y_test_binary, y_test_pred)

        # Get weight vector, bias, support vectors, and slack variables
        w_class = svm_model.coef_[0]
        b_class = svm_model.intercept_[0]
        support_vectors_class = svm_model.support_

        # Compute slack variables for support vectors
        slack = compute_slack(X_train.iloc[support_vectors_class], y_train_binary[support_vectors_class], w_class, b_class)

        # Store results for the class
        class_results[class_name] = {
            'training_error': train_error_class,
            'testing_error': test_error_class,
            'w': w_class,
            'b': b_class,
            'support_vector_indices': support_vectors_class,
            'slack_variable': slack
        }

    return class_results, models

# Function to format and print results for each C
def print_svm_slack_results(C, train_error, test_error, class_results):
    print(f"-------------------------------------------")
    print(f"C={C},")
    print(f"total training error: {train_error:.3f}, total testing error: {test_error:.3f}\n")
    
    for class_name, results in class_results.items():
        print(f"class {class_name.lower()}:")
        print(f"training error: {results['training_error']:.3f}, testing error: {results['testing_error']:.3f},")
        print(f"w: [{', '.join(f'{w_value:.6f}' for w_value in results['w'])}], b: {results['b']:.4f},")
        print(f"support vector indices: [{', '.join(map(str, results['support_vector_indices']))}]")
        print(f"slack variable: [{', '.join(f'{slack_value:.6f}' for slack_value in results['slack_variable'])}]\n")
    print(f"-------------------------------------------")

# Main loop for C values
C_values = [0.25 * t for t in range(1, 5)]
class_names = label_encoder.classes_

for C in C_values:
    # Train and test the SVM model for each C
    class_results, models = train_svm_with_slack(C, X_train, y_train_encoded, X_test, y_test_encoded, class_names)

    # Collect the predictions for each class
    y_train_pred = np.zeros((len(X_train), len(class_names)))
    y_test_pred = np.zeros((len(X_test), len(class_names)))

    for i, class_name in enumerate(class_names):
        y_train_pred[:, i] = models[class_name].predict(X_train)
        y_test_pred[:, i] = models[class_name].predict(X_test)

    # Take the class with the highest decision function (argmax) for final prediction
    y_train_final_pred = np.argmax(y_train_pred, axis=1)
    y_test_final_pred = np.argmax(y_test_pred, axis=1)

    # Calculate total training and testing errors
    total_train_error = 1 - accuracy_score(y_train_encoded, y_train_final_pred)
    total_test_error = 1 - accuracy_score(y_test_encoded, y_test_final_pred)

    # Print results for the current C
    print_svm_slack_results(C, total_train_error, total_test_error, class_results)


-------------------------------------------
C=0.25,
total training error: 0.371, total testing error: 0.356

class iris-setosa:
training error: 0.000, testing error: 0.000,
w: [-0.167742, 0.418548, -0.787097, -0.318548], b: 1.9413,
support vector indices: [42, 64, 23, 24]
slack variable: [0.207904, 0.000001, 0.030322, 0.000000]

class iris-versicolor:
training error: 0.333, testing error: 0.333,
w: [0.172386, -0.781574, -0.035647, -0.171586], b: 0.7823,
support vector indices: [1, 2, 3, 8, 9, 12, 13, 25, 29, 30, 34, 71, 72, 73, 75, 76, 77, 78, 81, 82, 83, 84, 86, 88, 89, 91, 92, 93, 95, 96, 97, 98, 99, 100, 102, 103, 104, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69]
slack variable: [0.198065, 0.010837, 0.064627, 0.190029, 0.133501, 0.197985, 0.122486, 0.208174, 0.000143, 0.095539, 0.133501, 0.164094, 0.090889, 0.093308, 0.152129, 0.220967, 0.240741, 0.467762, 0.260397, 0.053432, 0.289577, 0.0

In [17]:
from sklearn import svm
from sklearn.metrics import accuracy_score

# Function to train and test SVM with different kernel types
def train_svm_with_kernel(kernel_type, X_train, y_train_encoded, X_test, y_test_encoded, class_names, degree=None, gamma=None):
    class_results = {}

    for i, class_name in enumerate(class_names):
        # Create a binary classification for the current class (one-vs-rest)
        y_train_binary = (y_train_encoded == i).astype(int) * 2 - 1  # Convert to +1 and -1
        y_test_binary = (y_test_encoded == i).astype(int) * 2 - 1  # Convert to +1 and -1

        # Define the SVM model with the appropriate kernel and hyperparameters
        if kernel_type == 'poly':
            svm_model = svm.SVC(kernel=kernel_type, degree=degree, C=1e5)  # Polynomial kernel
        elif kernel_type == 'rbf':
            svm_model = svm.SVC(kernel=kernel_type, gamma=gamma, C=1e5)  # RBF kernel
        elif kernel_type == 'sigmoid':
            svm_model = svm.SVC(kernel=kernel_type, gamma=gamma, C=1e5)  # Sigmoidal kernel
        else:
            raise ValueError("Unknown kernel type")

        # Train the SVM
        svm_model.fit(X_train, y_train_binary)

        # Predictions and errors
        y_train_pred = svm_model.predict(X_train)
        y_test_pred = svm_model.predict(X_test)

        train_error_class = 1 - accuracy_score(y_train_binary, y_train_pred)
        test_error_class = 1 - accuracy_score(y_test_binary, y_test_pred)

        # Get support vectors
        support_vectors_class = svm_model.support_

        # Store results for the class
        class_results[class_name] = {
            'training_error': train_error_class,
            'testing_error': test_error_class,
            'support_vector_indices': support_vectors_class
        }

    return class_results

# Function to format and print results for each kernel
def print_svm_kernel_results(kernel_name, class_results):
    print(f"-------------------------------------------")
    print(f"({kernel_name}),")
    
    total_train_error = np.mean([results['training_error'] for results in class_results.values()])
    total_test_error = np.mean([results['testing_error'] for results in class_results.values()])
    
    print(f"total training error: {total_train_error:.3f}, total testing error: {total_test_error:.3f}\n")
    
    for class_name, results in class_results.items():
        print(f"class {class_name.lower()}:")
        print(f"training error: {results['training_error']:.3f}, testing error: {results['testing_error']:.3f},")
        print(f"support vector indices: [{', '.join(map(str, results['support_vector_indices']))}]\n")
    print(f"-------------------------------------------")


# Main experiment for different kernel functions
kernels = {
    '2nd-order Polynomial': {'kernel': 'poly', 'degree': 2},
    '3rd-order Polynomial': {'kernel': 'poly', 'degree': 3},
    'RBF with σ=1': {'kernel': 'rbf', 'gamma': 1},
    'Sigmoidal with σ=1': {'kernel': 'sigmoid', 'gamma': 1}
}

class_names = label_encoder.classes_

for kernel_name, params in kernels.items():
    # Train and test SVM with the specific kernel
    class_results = train_svm_with_kernel(
        kernel_type=params['kernel'],
        X_train=X_train,
        y_train_encoded=y_train_encoded,
        X_test=X_test,
        y_test_encoded=y_test_encoded,
        class_names=class_names,
        degree=params.get('degree'),
        gamma=params.get('gamma')
    )

    # Print the results for the current kernel
    print_svm_kernel_results(kernel_name, class_results)


-------------------------------------------
(2nd-order Polynomial),
total training error: 0.006, total testing error: 0.007

class iris-setosa:
training error: 0.000, testing error: 0.000,
support vector indices: [42, 23, 24]

class iris-versicolor:
training error: 0.010, testing error: 0.022,
support vector indices: [31, 76, 89, 97, 101, 103, 104, 55, 58, 62, 68]

class iris-virginica:
training error: 0.010, testing error: 0.000,
support vector indices: [32, 55, 57, 58, 68, 76, 89, 96, 97, 103]

-------------------------------------------
-------------------------------------------
(3rd-order Polynomial),
total training error: 0.003, total testing error: 0.015

class iris-setosa:
training error: 0.000, testing error: 0.000,
support vector indices: [42, 23, 24]

class iris-versicolor:
training error: 0.010, testing error: 0.044,
support vector indices: [23, 76, 89, 93, 103, 104, 55, 57, 58, 61, 68]

class iris-virginica:
training error: 0.000, testing error: 0.000,
support vector indic