In [23]:
import numpy as np

class SVM:

    def __init__(self,learningrate=0.01,n_iters=1000,lambda_value=0.2):

        self.learningrate = learningrate
        self.n_iters = n_iters
        self.lambda_value = lambda_value
        self.weight = None
        self.bias = None

    def fit(self,X,y):
        n_samples,n_features = X.shape

        self.weight = np.zeros(n_features)
        self.bias = 0

        ##gradient descent

        for _ in range(self.n_iters):

            #label encoding
            y_label = np.where(y<=0,-1,1)

            for index,x_i in enumerate(X):

                y_i = y_label[index]

                condition = True if y_i*(np.dot(x_i,self.weight)-self.bias)  >= 1 else False

                if(condition):
                    dw = 2*self.lambda_value*self.weight
                    db = 0
                else:
                    dw = (2*self.lambda_value*self.weight) - np.dot(x_i,y_i)
                    db = y_i

                self.weight = self.weight-(self.learningrate*dw)
                self.bias = self.bias - (self.learningrate*db)


    def predict(self,X):

        output = np.dot(X,self.weight)-self.bias    #output will be any number in +ve or _ve

        #roundoff
        predicted_labels = np.sign(output)

        y_pred = np.where(predicted_labels<=-1,0,1)  #reverting values to back as in dataset

        return y_pred



In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

data = pd.read_csv("/content/diabetes.csv")

In [18]:
data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [19]:
data['Outcome'].value_counts()  #0 -> non diab  #1->diabe

0    500
1    268
Name: Outcome, dtype: int64

In [20]:

X,y = data.drop(columns='Outcome',axis=1),data['Outcome']

scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)



In [35]:
svm = SVM(learningrate=0.001,n_iters=1000,lambda_value=0.1)
svm.fit(X_train,y_train)

#predict using test values
y_pred = svm.predict(X_test)

print(accuracy_score(y_test,y_pred))


0.7727272727272727


In [37]:
#evaluating for overfitting
#predict values for train set
y_tr = svm.predict(X_train)
print(accuracy_score(y_train,y_tr))

0.7801302931596091
