In [187]:
import numpy as np
import pandas as pd

In [188]:
class LinearSVM:
    def __init__(self, learning_rate=0.001, epochs=1000, lambda_param=1e-4):
        self.w = None
        self.b = 0
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.lambda_param = lambda_param

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)

        # Training process
        for _ in range(self.epochs):
            for idx, x_i in enumerate(X):
                margin = y[idx] * (np.dot(x_i, self.w) + self.b)
                if margin >= 1:
                    dw = self.lambda_param * self.w
                    db = 0
                else:
                    dw = self.lambda_param * self.w - np.dot(x_i, y[idx])
                    db = -y[idx]

                # Update weights and bias
                self.w -= self.learning_rate * dw / n_samples
                self.b -= self.learning_rate * db / n_samples

    def predict(self, X):
        approximations = np.dot(X, self.w) + self.b
        return np.sign(approximations)

    def calculate_accuracy(self, X, y):
        predictions = self.predict(X)
        accuracy = np.mean(predictions == y)
        return accuracy

    def calculate_error_rate(self, X, y):
        accuracy = self.calculate_accuracy(X, y)
        return (1 - accuracy) * 100

In [189]:
train = pd.read_csv('coding5_train.csv')
test = pd.read_csv('coding5_test.csv')
train.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V248,V249,V250,V251,V252,V253,V254,V255,V256,Y
0,-1,-1.0,-1.0,-0.813,-0.671,-0.809,-0.887,-0.671,-0.853,-1.0,...,-0.671,-0.033,0.761,0.762,0.126,-0.095,-0.671,-0.828,-1.0,5
1,-1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-0.912,...,-0.257,0.48,0.966,0.408,-0.895,-1.0,-1.0,-1.0,-1.0,5
2,-1,-1.0,-1.0,-0.449,1.0,1.0,1.0,1.0,1.0,0.84,...,0.626,1.0,0.83,0.494,-0.343,-1.0,-1.0,-1.0,-1.0,5
3,-1,-1.0,-1.0,-1.0,-0.989,-0.185,0.626,1.0,0.672,0.372,...,0.765,0.372,-0.163,-0.968,-1.0,-1.0,-1.0,-1.0,-1.0,5
4,-1,-1.0,-0.999,-0.699,-0.684,-0.909,-1.0,-0.764,-0.684,-0.303,...,0.438,0.903,0.762,0.14,-0.716,-1.0,-1.0,-1.0,-1.0,5


In [190]:
X_train = train.iloc[:, :-1].values
y_train = train.iloc[:, -1].values
X_test = test.iloc[:, :-1].values
y_test = test.iloc[:, -1].values

# label is 5, 6. convert to -1 for 5, 1 for 6
y_train = np.where(y_train == 5, -1, 1)
y_test = np.where(y_test == 5, -1, 1)

In [191]:
model = LinearSVM()
model.fit(X_train, y_train)

In [192]:
y_pred = model.calculate_error_rate(X_test, y_test)

In [193]:
y_pred

2.833333333333332

In [194]:
import numpy as np

class PegasosSVM:
    def __init__(self, lambda_param=0.01, epochs=20, random_state=None):
        self.lambda_param = lambda_param
        self.epochs = epochs
        self.random_state = random_state
        self.w = None
        self.b = 0

    def fit(self, X, y):
        if self.random_state is not None:
            np.random.seed(self.random_state)

        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)

        t = 0  # Time step counter
        for _ in range(self.epochs):
            # Shuffle the data at the beginning of each epoch
            indices = np.random.permutation(n_samples)
            for i in indices:
                t += 1  # Increment time step
                eta_t = 1 / (t * self.lambda_param)  # Decaying learning rate
                y_i = y[i]
                x_i = X[i]

                condition = y_i * (np.dot(x_i, self.w) + self.b) < 1

                # Subgradient calculation
                if condition:
                    delta_w = self.lambda_param * self.w - y_i * x_i
                    delta_b = -y_i
                else:
                    delta_w = self.lambda_param * self.w
                    delta_b = 0

                # Update parameters
                self.w -= eta_t * delta_w
                self.b -= eta_t * delta_b  # Bias is updated only when condition is true

    def predict(self, X):
        approximations = np.dot(X, self.w) + self.b
        return np.sign(approximations)

    def calculate_accuracy(self, X, y):
        predictions = self.predict(X)
        accuracy = np.mean(predictions == y)
        return accuracy

    def calculate_error_rate(self, X, y):
        accuracy = self.calculate_accuracy(X, y)
        return (1 - accuracy) * 100




In [195]:
model = PegasosSVM(epochs=2000)
model.fit(X_train, y_train)
y_pred = model.calculate_error_rate(X_test, y_test)
y_pred

8.166666666666666