Q1)
(Based on Step-by-Step Implementation of Ridge Regression using Gradient
Descent Optimization)
Generate a dataset with atleast seven highly correlated columns and a target variable.
Implement Ridge Regression using Gradient Descent Optimization. Take different
values of learning rate (such as 0.0001,0.001,0.01,0.1,1,10) and regularization
parameter (10-15,10-10,10-5,10- 3,0,1,10,20). Choose the best parameters for which ridge
regression cost function is minimum and R2_score is maximum.

In [16]:

import numpy as np
import pandas as pd
from sklearn.metrics import r2_score


np.random.seed(42)
n = 200

a1 = np.random.rand(n)*10
a2 = a1*0.9 + np.random.randn(n)*0.1
a3 = a1*1.1 + np.random.randn(n)*0.2
a4 = a2*0.8 + np.random.randn(n)*0.2
a5 = a3*1.2 + np.random.randn(n)*0.3
a6 = (a4+a5)/2 + np.random.randn(n)*0.2
a7 = (a1+a3+a5)/3 + np.random.randn(n)*0.1


y = 3*a1 + 2*a2 - a3 + 0.5*a4 + 4*a5 - 2*a6 + a7 + np.random.randn(n)*2


df = pd.DataFrame({
    'a1':a1, 'a2':a2, 'a3':a3, 'a4':a4, 'a5':a5, 'a6':a6, 'a7':a7, 'y':y
})

X = df[['a1','a2','a3','a4','a5','a6','a7']].values.astype(np.float64)
Y = df['y'].values.reshape(-1,1).astype(np.float64)


X = (X - np.mean(X,axis=0))/np.std(X,axis=0)
X = np.c_[np.ones((X.shape[0],1)), X]



def ridge_cost(X,Y,w,lam):
    m = len(Y)
    preds = X.dot(w)
    err = preds - Y
    cost = (1/(2*m))*np.sum(err**2) + (lam/(2*m))*np.sum(w[1:]**2)
    return cost

def ridge_gradient_descent(X,Y,lr,lam,iterations=1000):
    m,n = X.shape
    w = np.zeros((n,1))
    last_cost = None

    for i in range(iterations):
        preds = X.dot(w)
        err = preds - Y
        grad = (1/m)*(X.T.dot(err)) + (lam/m)*np.r_[[[0]],w[1:]]
        w = w - lr*grad


        if np.any(np.isnan(w)) or np.any(np.isinf(w)):

            return None, np.inf
        last_cost = ridge_cost(X,Y,w,lam)
    return w, last_cost


learning_rates = [0.0001,0.001,0.01,0.1,1,10]
lambdas = [1e-15,1e-10,1e-5,1e-3,0,1,10,20]

best_r2 = -999
best_cost = 999999
best_lr = None
best_lam = None
best_w = None

for lr in learning_rates:
    for lam in lambdas:
        weights, final_cost = ridge_gradient_descent(X,Y,lr,lam,iterations=1000)


        if weights is None or np.isinf(final_cost):
            print(f"lr={lr}, lam={lam} -> diverged")
            continue

        preds = X.dot(weights)
        r2 = r2_score(Y, preds)

        print(f"lr={lr}, lam={lam}, cost={round(final_cost,5)}, r2={round(r2,5)}")

        if (r2 > best_r2) or (r2 == best_r2 and final_cost < best_cost):
            best_r2 = r2
            best_cost = final_cost
            best_lr = lr
            best_lam = lam
            best_w = weights

print("\n==========================")
print("Best Learning Rate :", best_lr)
print("Best Lambda (Reg)  :", best_lam)
print("Best R2 Score      :", round(best_r2,5))
print("Min Cost Function  :", round(best_cost,5))




lr=0.0001, lam=1e-15, cost=775.02358, r2=-1.48343
lr=0.0001, lam=1e-10, cost=775.02358, r2=-1.48343
lr=0.0001, lam=1e-05, cost=775.02358, r2=-1.48343
lr=0.0001, lam=0.001, cost=775.02367, r2=-1.48343
lr=0.0001, lam=0, cost=775.02358, r2=-1.48343
lr=0.0001, lam=1, cost=775.1138, r2=-1.48354
lr=0.0001, lam=10, cost=775.92333, r2=-1.48453
lr=0.0001, lam=20, cost=776.81786, r2=-1.48562
lr=0.001, lam=1e-15, cost=117.33338, r2=0.62403
lr=0.001, lam=1e-10, cost=117.33338, r2=0.62403
lr=0.001, lam=1e-05, cost=117.33338, r2=0.62403
lr=0.001, lam=0.001, cost=117.3336, r2=0.62403
lr=0.001, lam=0, cost=117.33338, r2=0.62403
lr=0.001, lam=1, cost=117.55515, r2=0.62402
lr=0.001, lam=10, cost=119.53693, r2=0.62396
lr=0.001, lam=20, cost=121.70935, r2=0.6238
lr=0.01, lam=1e-15, cost=2.39124, r2=0.99234
lr=0.01, lam=1e-10, cost=2.39124, r2=0.99234
lr=0.01, lam=1e-05, cost=2.39125, r2=0.99234
lr=0.01, lam=0.001, cost=2.39147, r2=0.99234
lr=0.01, lam=0, cost=2.39124, r2=0.99234
lr=0.01, lam=1, cost=2.614

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  cost = (1/(2*m))*np.sum(err**2) + (lam/(2*m))*np.sum(w[1:]**2)
  cost = (1/(2*m))*np.sum(err**2) + (lam/(2*m))*np.sum(w[1:]**2)


lr=1, lam=20 -> diverged
lr=10, lam=1e-15 -> diverged
lr=10, lam=1e-10 -> diverged
lr=10, lam=1e-05 -> diverged
lr=10, lam=0.001 -> diverged
lr=10, lam=0 -> diverged
lr=10, lam=1 -> diverged
lr=10, lam=10 -> diverged
lr=10, lam=20 -> diverged

Best Learning Rate : 0.1
Best Lambda (Reg)  : 0
Best R2 Score      : 0.99275
Min Cost Function  : 2.26382


Q2)
Load the Hitters dataset from the following link
https://drive.google.com/file/d/1qzCKF6JKKMB0p7ul_lLy8tdmRk3vE_bG/view?usp=sharing
(a) Pre-process the data (null values, noise, categorical to numerical encoding)
(b) Separate input and output features and perform scaling
(c) Fit a Linear, Ridge (use regularization parameter as 0.5748), and LASSO (use
regularization parameter as 0.5748) regression function on the dataset.
(d) Evaluate the performance of each trained model on test set. Which model
performs the best and Why?

Q3)
Cross Validation for Ridge and Lasso Regression
Explore Ridge Cross Validation (RidgeCV) and Lasso Cross Validation (LassoCV)
function of Python. Implement both on Boston House Prediction Dataset (load_boston
dataset from sklearn.datasets).

In [15]:

import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.metrics import r2_score

print("Loading California Housing dataset")
data = fetch_california_housing()

X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

print("Dataset shape:", X.shape)
print("Columns:", list(X.columns))

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)
print("Train/Test split completed.")

alphas_ridge = [0.001, 0.01, 0.1, 1, 5, 10, 50, 100]
ridge_model = RidgeCV(alphas=alphas_ridge, store_cv_values=True)
ridge_model.fit(X_train, y_train)

ridge_preds = ridge_model.predict(X_test)
ridge_r2 = r2_score(y_test, ridge_preds)

print("Ridge Regression Results")
print("Best Alpha (λ):", ridge_model.alpha_)
print("R2 Score:", round(ridge_r2, 4))

alphas_lasso = [0.0001, 0.001, 0.01, 0.1, 1, 10]
lasso_model = LassoCV(alphas=alphas_lasso, cv=5, max_iter=10000, random_state=42)
lasso_model.fit(X_train, y_train)

lasso_preds = lasso_model.predict(X_test)
lasso_r2 = r2_score(y_test, lasso_preds)

print("Lasso Regression Results")
print("Best Alpha (λ):", lasso_model.alpha_)
print("R2 Score:", round(lasso_r2, 4))

print(" Model Comparison")
if ridge_r2 > lasso_r2:
    print(f"Ridge performed better with R2 = {round(ridge_r2,4)}")
else:
    print(f"Lasso performed better with R2 = {round(lasso_r2,4)}")





Loading California Housing dataset
Dataset shape: (20640, 8)
Columns: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
Train/Test split completed.
Ridge Regression Results
Best Alpha (λ): 10.0
R2 Score: 0.5916




Lasso Regression Results
Best Alpha (λ): 0.0001
R2 Score: 0.5912
 Model Comparison
Ridge performed better with R2 = 0.5916


Q4)
Multiclass Logistic Regression: Implement Multiclass Logistic Regression (step-by step)
on Iris dataset using one vs. rest strategy?

In [17]:


import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


iris = load_iris()
X = iris.data
y = iris.target
labels = iris.target_names

print("Dataset shape:", X.shape)
print("Unique classes:", labels)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
print("Train/Test split done!")

model = LogisticRegression(multi_class='ovr', max_iter=200)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=labels)

print("Model Evaluation")
print("Accuracy:", round(acc * 100, 2), "%")
print("\nConfusion Matrix:\n", cm)
print("\nClassification Report:\n", report)

print("\nModel coefficients (for each class):")
for i, cls in enumerate(labels):
    print(f"{cls} → Coefficients:", model.coef_[i])

sample = X_test[0].reshape(1, -1)
predicted_class = model.predict(sample)[0]
print("\nExample Prediction:")
print("Actual Class:", labels[y_test[0]])
print("Predicted Class:", labels[predicted_class])


Dataset shape: (150, 4)
Unique classes: ['setosa' 'versicolor' 'virginica']
Train/Test split done!
Model Evaluation
Accuracy: 97.37 %

Confusion Matrix:
 [[15  0  0]
 [ 0 10  1]
 [ 0  0 12]]

Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       1.00      0.91      0.95        11
   virginica       0.92      1.00      0.96        12

    accuracy                           0.97        38
   macro avg       0.97      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38


Model coefficients (for each class):
setosa → Coefficients: [-0.4150181   0.86740485 -2.18505099 -0.90552924]
versicolor → Coefficients: [-0.15342167 -2.09570192  0.54584218 -0.97607025]
virginica → Coefficients: [-0.37075602 -0.50440372  2.72707974  2.02082242]

Example Prediction:
Actual Class: versicolor
Predicted Class: versicolor


