# **OML Assignment 10**
### B20CS033

In [None]:
import numpy as np
import pandas as pd
from sklearn import preprocessing


### Q1. SVM - Primal, Dual

In [1]:
def linear_kernel(x1, x2):
    return np.dot(x1, x2)

def svm_primal(X, y, C=1.0, learning_rate=0.01, epochs=1000):
    m, n = X.shape
    theta = np.zeros(n)
    bias = 0

    for epoch in range(epochs):
        for i in range(m):
            if y[i] * (np.dot(theta, X[i]) + bias) >= 1:
                theta -= learning_rate * (2 * C * theta)
            else:
                theta -= learning_rate * (2 * C * theta - np.dot(X[i], y[i]))
                bias -= learning_rate * y[i]

    return theta, bias

In [2]:
def svm_dual(X, y, C=1.0, learning_rate=0.01, epochs=1000):
    m, n = X.shape
    alpha = np.zeros(m)

    for epoch in range(epochs):
        for i in range(m):
            gradient = 0
            for j in range(m):
                gradient += alpha[j] * y[i] * y[j] * linear_kernel(X[i], X[j])

            if 1 - gradient * y[i] >= 0:
                alpha[i] += learning_rate
            else:
                alpha[i] -= learning_rate

    # Compute theta and bias from alpha
    theta = np.sum(alpha[i] * y[i] * X[i] for i in range(m))
    bias = np.mean(y - np.dot(X, theta))

    return theta, bias

### 4ColumnDataset

In [6]:
df = pd.read_csv("4ColumnDataset(3).csv")
X = df.drop("target", axis=1).values
y = df["target"].values

In [10]:
theta_primal, bias_primal = svm_primal(X, y)
print(f"Theta for primal of SVM is {theta_primal}")

theta_dual, bias_dual = svm_dual(X, y)
print(f"Theta for dual of SVM is {theta_dual}")

Theta for primal of SVM is [ 0.51437567  0.15972682 -0.15840418 -0.2238032 ]
Theta for dual of SVM is [  -83.77069761 -2726.01963201 -3066.24031772  3332.02759515]


  theta = np.sum(alpha[i] * y[i] * X[i] for i in range(m))


### Generated_test

In [24]:
df = pd.read_csv("generated_test.csv", header=None)
df.drop(index=400, inplace=True)
X = df.drop(2, axis=1).values.astype(float)
y = df[2].values.astype(float)

In [25]:
theta_primal, bias_primal = svm_primal(X, y)
print(f"Theta for primal of SVM is {theta_primal}")

theta_dual, bias_dual = svm_dual(X, y)
print(f"Theta for dual of SVM is {theta_dual}")

Theta for primal of SVM is [0.46300757 0.40782706]
Theta for dual of SVM is [0.44479136 0.13081562]


  theta = np.sum(alpha[i] * y[i] * X[i] for i in range(m))


### Diabetes

In [28]:
df = pd.read_csv("diabetes.csv")
X = df.drop("Outcome", axis=1).values.astype(float)
y = df["Outcome"].values.astype(float)

In [29]:
theta_primal, bias_primal = svm_primal(X, y)
print(f"Theta for primal of SVM is {theta_primal}")

theta_dual, bias_dual = svm_dual(X, y)
print(f"Theta for dual of SVM is {theta_dual}")

Theta for primal of SVM is [0.00949135 1.32422952 0.76354653 0.01143894 0.08316805 0.37576435
 0.00341356 0.49501219]
Theta for dual of SVM is [ 0.6      0.42     1.88    -1.6      1.84    -0.216    3.04836  1.16   ]


  theta = np.sum(alpha[i] * y[i] * X[i] for i in range(m))


### Breast-Cancer

In [48]:
df = pd.read_csv("breast-cancer.csv")
df.drop("id", axis=1 , inplace=True)
X = df.drop("diagnosis", axis=1).values.astype(float)
y = df["diagnosis"].values
y[y=="M"] = 1
y[y=="B"] = -1

In [49]:
theta_primal, bias_primal = svm_primal(X, y)
print(f"Theta for primal of SVM is {theta_primal}")

theta_dual, bias_dual = svm_dual(X, y)
print(f"Theta for dual of SVM is {theta_dual}")

Theta for primal of SVM is [-5.33866598e-01 -1.24114342e+00 -3.23648524e+00 -4.91241673e-01
 -6.85742320e-03 -3.73092732e-03  3.76585941e-03  1.43393424e-03
 -1.26868111e-02 -5.11200283e-03 -5.53995719e-03 -8.71976713e-02
 -3.16286809e-02  1.07836869e+00 -7.56173396e-04 -9.65178330e-04
 -4.67439876e-04 -3.89108658e-04 -1.46438066e-03 -2.94335298e-04
 -5.22954717e-01 -1.55266673e+00 -3.07676379e+00  8.55294391e+00
 -1.00039884e-02 -3.44413901e-03  8.96681206e-03  2.81864793e-04
 -1.85796626e-02 -5.95208787e-03]
Theta for dual of SVM is [-1.44184447e+04 -2.75088002e+04 -8.75051420e+04 -8.74378540e+04
 -1.56105172e+02 -4.03646934e+01  1.03459679e+02  5.36561899e+01
 -2.93041386e+02 -1.17546440e+02 -1.19575094e+02 -2.27858466e+03
 -6.95032202e+02  2.01847282e+04 -1.39762606e+01 -2.03789762e+01
 -2.01989519e+01 -9.46048696e+00 -3.74139325e+01 -5.94147140e+00
 -1.41229888e+04 -3.51088108e+04 -8.53383748e+04  6.36851800e+04
 -2.03972307e+02 -2.58393242e+01  1.52947496e+02  3.35488666e+01
 -4.

  theta = np.sum(alpha[i] * y[i] * X[i] for i in range(m))
