In [3]:
# ==========================
# Import & Load Dataset
# ==========================
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

data = pd.read_csv("drug_200.csv")
data.head()

# Encode categorical features and target
for col in data.columns:
    if data[col].dtype == 'object':
        data[col] = LabelEncoder().fit_transform(data[col])

X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values

kf = KFold(n_splits=5, shuffle=True, random_state=42)


In [None]:
# ==========================
# Q1: Logistic Regression (No Reg, L1, L2, Elastic Net)
# ==========================

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def logistic_train(X, y, lr=0.01, epochs=1000, reg='none', lam=0.1, alpha=0.5):
    X = np.c_[np.ones((X.shape[0],1)), X]
    w = np.zeros(X.shape[1])
    for _ in range(epochs):
        y_pred = sigmoid(X.dot(w))
        grad = X.T.dot(y_pred - y) / len(y)
        if reg == 'l1':        # Lasso
            grad += lam * np.sign(w)
        elif reg == 'l2':      # Ridge
            grad += lam * w
        elif reg == 'elastic': # Elastic Net
            grad += lam * (alpha * np.sign(w) + (1-alpha) * w)
        w -= lr * grad
    return w

def logistic_predict(X, w):
    X = np.c_[np.ones((X.shape[0],1)), X]
    return (sigmoid(X.dot(w)) >= 0.5).astype(int)

def cross_val_logistic(X, y, reg_type):
    accs = []
    for train, test in kf.split(X):
        X_train, X_test = X[train], X[test]
        y_train, y_test = y[train], y[test]
        w = logistic_train(X_train, y_train, lr=0.1, epochs=1000, reg=reg_type, lam=0.1)
        preds = logistic_predict(X_test, w)
        accs.append(accuracy_score(y_test, preds))
    return np.mean(accs)

for reg in ['none', 'l1', 'l2', 'elastic']:
    print(f"Logistic Regression ({reg.upper()}) Accuracy:", round(cross_val_logistic(X, y, reg),4))


Logistic Regression (NONE) Accuracy: 0.08
Logistic Regression (L1) Accuracy: 0.08
Logistic Regression (L2) Accuracy: 0.08
Logistic Regression (ELASTIC) Accuracy: 0.08


In [5]:
# ==========================
# Q2: K-Nearest Neighbors (K=1,3,5)
# ==========================

def knn_predict(X_train, y_train, X_test, k):
    preds = []
    for x in X_test:
        dist = np.sqrt(((X_train - x)**2).sum(axis=1))
        idx = np.argsort(dist)[:k]
        votes = np.bincount(y_train[idx])
        preds.append(np.argmax(votes))
    return np.array(preds)

def cross_val_knn(X, y, k):
    accs = []
    for train, test in kf.split(X):
        X_train, X_test = X[train], X[test]
        y_train, y_test = y[train], y[test]
        preds = knn_predict(X_train, y_train, X_test, k)
        accs.append(accuracy_score(y_test, preds))
    return np.mean(accs)

for k in [1,3,5]:
    print(f"KNN (k={k}) Accuracy:", round(cross_val_knn(X, y, k),4))


KNN (k=1) Accuracy: 0.735
KNN (k=3) Accuracy: 0.72
KNN (k=5) Accuracy: 0.655
