In [52]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [90]:
class SVM:
    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None
        # init function is used to create a self object of the class to use across all the methods.

    def fit(self, X, y):
        n_samples, n_features = X.shape # this is the number of rows and columns in the X matrix 

        y_ = np.where(y <= 0, -1, 1)   # y_ = {-1, 1} 

        self.w = np.zeros(n_features, dtype=float) # this is the weight vector where the number of elements is equal to the number of features which is represented intially to 0.
        self.b = 0.0 # this is the bias which is intially set to 0.

        for _ in range (self.n_iters):
            # this is the training loop with n_iters number of iterations. which is defined in the init function.
            for idx, x_i in enumerate(X):
                # this is the enumerate function which is used to iterate over the X matrix.
                condition = y_[idx] * (np.dot(x_i, self.w) - self.b) >= 1 # this is the condition for the SVM algorithm. idx is the index of the row and x_i is the row itself.
                print(y_.dtype, x_i.dtype, self.w.dtype, self.b.dtype)
                # this condition is calculated with y_ with the index of the row and the dot product of the row and the weight vector minus the bias. which should be greater than or equal to 1.
                if condition:
                     # if the condition is true then the weight vector is updated with the learning rate and the lambda parameter multiplied by the weight vector.
                    self.w -= self.lr * (2 * self.lambda_param * self.w)  # L2 regularization to prevent overfitting
                else:
                    # if the condition is false then the weight vector is updated with the learning rate and the lambda parameter multiplied by the weight vector minus the dot product of the row and the y_ with the index of the row.
                    self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y_[idx]))  # This moves the weight vector towards the correct classification to improve the current sample
                    self.b -= self.lr * y_[idx]  # This helps adjust the decision boundary to improve the current sample

    def predict(self, X):
        linear_output = np.dot(X, self.w) - self.b
        return np.sign(linear_output)

In [79]:
df = pd.read_csv('/Users/sibivishtan/Downloads/learning_DL/data/student.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Hours Studied                     10000 non-null  int64  
 1   Previous Scores                   10000 non-null  int64  
 2   Extracurricular Activities        10000 non-null  object 
 3   Sleep Hours                       10000 non-null  int64  
 4   Sample Question Papers Practiced  10000 non-null  int64  
 5   Performance Index                 10000 non-null  float64
dtypes: float64(1), int64(4), object(1)
memory usage: 468.9+ KB


In [80]:
#lets convert extracirucular activites to binary no is 0 and yes is 1
df['Extracurricular Activities'] = df['Extracurricular Activities'].map({'No': 0, 'Yes': 1})


In [81]:
#performance index is converted to int from float
df['Performance Index'] = df['Performance Index'].astype(int)
df['Previous Scores'] = StandardScaler().fit_transform(df['Previous Scores'].values.reshape(-1,1))
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Hours Studied                     10000 non-null  int64  
 1   Previous Scores                   10000 non-null  float64
 2   Extracurricular Activities        10000 non-null  int64  
 3   Sleep Hours                       10000 non-null  int64  
 4   Sample Question Papers Practiced  10000 non-null  int64  
 5   Performance Index                 10000 non-null  int64  
dtypes: float64(1), int64(5)
memory usage: 468.9 KB


In [85]:
#split test train data
X_train, X_test, y_train, y_test = train_test_split(df[['Previous Scores', 'Extracurricular Activities']], df['Performance Index'], test_size=0.2, random_state=42)
X_train = X_train.np.asarray(float)
y_train = y_train.astype(float)


In [91]:
#lets pass the data to the SVM model
clf = SVM() 
clf.fit(X_train, y_train)

UFuncTypeError: ufunc 'multiply' did not contain a loop with signature matching types (dtype('<U15'), dtype('<U32')) -> None

In [88]:
print(X_train.dtypes)
print(y_train.dtypes)


Previous Scores               float64
Extracurricular Activities    float64
dtype: object
float64


In [69]:
from sklearn.svm import SVC
clf = SVC()
clf.fit(X_train, y_train)

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


In [71]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Assume clf is your trained model and X_test and y_test are your test data
y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("F1 Score:", f1_score(y_test, y_pred, average='macro'))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Accuracy: 0.04
Precision: 0.006643440393061271
Recall: 0.029056987171608647
F1 Score: 0.009786248180687023
Confusion Matrix:
 [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  _warn_prf(average, modifier, msg_start, len(result))
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if 