Q1 (Based on Step-by-Step Implementation of Ridge Regression using  Gradient  
Descent Optimization)  
Generate a dataset with atleast seven highly correlated columns and a target variable. Implement Ridge Regression using Gradient Descent Optimization. Take different values of learning rate (such as 0.0001,0.001,0.01,0.1,1,10) and regularization parameter (10-15,10-10,10-5,10- 3,0,1,10,20). Choose the best parameters for which ridge regression cost function is minimum and R2_score is maximum.  

In [18]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

X, y = make_regression(
    n_samples=500, n_features=7, noise=10, effective_rank=1, random_state=42
)
X = np.hstack([X, X[:, [0]] * 0.9 + np.random.randn(500, 1) * 0.01])

scaler = StandardScaler()
X = scaler.fit_transform(X)
y = y.reshape(-1, 1)

class RidgeRegressionGD:
    def __init__(self, learning_rate=0.01, lambda_=1.0, n_iters=1000):
        self.learning_rate = learning_rate
        self.lambda_ = lambda_
        self.n_iters = n_iters

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros((n_features, 1))
        self.bias = 0
        self.cost_history = []

        for i in range(self.n_iters):
            y_pred = np.dot(X, self.weights) + self.bias
            error = y_pred - y

            dw = (1 / n_samples) * (np.dot(X.T, error) + self.lambda_ * self.weights)
            db = (1 / n_samples) * np.sum(error)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

            cost = self.cost(X, y)
            if np.isnan(cost) or np.isinf(cost):
                print(f"Diverged (NaN/Inf) at iter {i} for LR={self.learning_rate}, λ={self.lambda_}")
                break
            self.cost_history.append(cost)

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

    def cost(self, X, y):
        n_samples = X.shape[0]
        y_pred = self.predict(X)
        if np.any(np.isnan(y_pred)):
            return np.nan
        mse = np.mean((y - y_pred) ** 2)
        ridge_penalty = (self.lambda_ / (2 * n_samples)) * np.sum(self.weights ** 2)
        return mse + ridge_penalty

learning_rates = [0.00001, 0.0001, 0.001, 0.01] 
lambdas = [1e-15, 1e-10, 1e-5, 0.001, 0.1, 1, 10, 20]

best_r2 = -np.inf
best_params = {}

for lr in learning_rates:
    for lam in lambdas:
        model = RidgeRegressionGD(learning_rate=lr, lambda_=lam, n_iters=2000)
        model.fit(X, y)

        y_pred = model.predict(X)

        if np.any(np.isnan(y_pred)) or np.any(np.isnan(model.weights)):
            print(f"Skipped LR={lr}, Lambda={lam} due to NaN values")
            continue

        cost = model.cost(X, y)
        if np.isnan(cost):
            continue

        r2 = r2_score(y, y_pred)
        if np.isnan(r2):
            continue

        print(f"LR={lr}, Lambda={lam}, Cost={cost:.4f}, R2={r2:.4f}")

        if r2 > best_r2:
            best_r2 = r2
            best_params = {'LearningRate': lr, 'lambda': lam, 'cost': cost, 'R2': r2}

print("\n Best Parameters Found:")
print(best_params)


LR=1e-05, Lambda=1e-15, Cost=91.0104, R2=0.0005
LR=1e-05, Lambda=1e-10, Cost=91.0104, R2=0.0005
LR=1e-05, Lambda=1e-05, Cost=91.0104, R2=0.0005
LR=1e-05, Lambda=0.001, Cost=91.0104, R2=0.0005
LR=1e-05, Lambda=0.1, Cost=91.0104, R2=0.0005
LR=1e-05, Lambda=1, Cost=91.0104, R2=0.0005
LR=1e-05, Lambda=10, Cost=91.0104, R2=0.0005
LR=1e-05, Lambda=20, Cost=91.0105, R2=0.0005
LR=0.0001, Lambda=1e-15, Cost=90.6244, R2=0.0047
LR=0.0001, Lambda=1e-10, Cost=90.6244, R2=0.0047
LR=0.0001, Lambda=1e-05, Cost=90.6244, R2=0.0047
LR=0.0001, Lambda=0.001, Cost=90.6244, R2=0.0047
LR=0.0001, Lambda=0.1, Cost=90.6244, R2=0.0047
LR=0.0001, Lambda=1, Cost=90.6245, R2=0.0047
LR=0.0001, Lambda=10, Cost=90.6256, R2=0.0047
LR=0.0001, Lambda=20, Cost=90.6267, R2=0.0047
LR=0.001, Lambda=1e-15, Cost=89.9430, R2=0.0122
LR=0.001, Lambda=1e-10, Cost=89.9430, R2=0.0122
LR=0.001, Lambda=1e-05, Cost=89.9430, R2=0.0122
LR=0.001, Lambda=0.001, Cost=89.9430, R2=0.0122
LR=0.001, Lambda=0.1, Cost=89.9431, R2=0.0122
LR=0.001, 

Q2 Load the Hitters dataset from the following link 
https://drive.google.com/file/d/1qzCKF6JKKMB0p7ul_lLy8tdmRk3vE_bG/view?usp=sharing  
(a)	Pre-process the data (null values, noise, categorical to numerical encoding)  
(b)	Separate input and output features and perform scaling  
(c)	Fit a Linear, Ridge (use regularization parameter as 0.5748), and LASSO (use regularization parameter as 0.5748) regression function on the dataset.  
(d)	Evaluate the performance of each trained model on test set. Which model performs the best and Why?  
  


In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score

file_path = "Hitters.csv" 
df = pd.read_csv(file_path)

print("Dataset Loaded Successfully")
print("Shape:", df.shape)
print("First 5 rows:\n", df.head())

print("\nMissing values before:\n", df.isnull().sum())
df = df.dropna() 
print("\nMissing values after cleaning:\n", df.isnull().sum())

categorical_cols = df.select_dtypes(include=['object']).columns
print("\nCategorical Columns:", list(categorical_cols))

df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

X = df.drop("Salary", axis=1)
y = df["Salary"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

linear_model = LinearRegression()
linear_model.fit(X_train_scaled, y_train)
y_pred_linear = linear_model.predict(X_test_scaled)

ridge_model = Ridge(alpha=0.5748)
ridge_model.fit(X_train_scaled, y_train)
y_pred_ridge = ridge_model.predict(X_test_scaled)

lasso_model = Lasso(alpha=0.5748)
lasso_model.fit(X_train_scaled, y_train)
y_pred_lasso = lasso_model.predict(X_test_scaled)

def evaluate_model(y_test, y_pred, model_name):
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"\n{model_name} Performance:")
    print(f"Mean Squared Error: {mse:.4f}")
    print(f"R2 Score: {r2:.4f}")
    return {"Model": model_name, "MSE": mse, "R2": r2}

results = []
results.append(evaluate_model(y_test, y_pred_linear, "Linear Regression"))
results.append(evaluate_model(y_test, y_pred_ridge, "Ridge Regression"))
results.append(evaluate_model(y_test, y_pred_lasso, "Lasso Regression"))

results_df = pd.DataFrame(results)
print("\n Model Comparison:\n", results_df)

best_model = results_df.loc[results_df["R2"].idxmax()]
print("\n Best Model:")
print(best_model)
print("\n Explanation:")
print("""
- Linear Regression: Fits the data directly, can overfit if features are highly correlated.
- Ridge Regression: Adds L2 regularization (penalty on large coefficients), reduces overfitting.
- Lasso Regression: Adds L1 regularization, can shrink some coefficients to zero (feature selection).

Generally, Ridge performs better when there are many correlated features (multicollinearity),
while Lasso is better when only a few features are truly important.
""")


Dataset Loaded Successfully
Shape: (322, 20)
First 5 rows:
    AtBat  Hits  HmRun  Runs  RBI  Walks  Years  CAtBat  CHits  CHmRun  CRuns  \
0    293    66      1    30   29     14      1     293     66       1     30   
1    315    81      7    24   38     39     14    3449    835      69    321   
2    479   130     18    66   72     76      3    1624    457      63    224   
3    496   141     20    65   78     37     11    5628   1575     225    828   
4    321    87     10    39   42     30      2     396    101      12     48   

   CRBI  CWalks League Division  PutOuts  Assists  Errors  Salary NewLeague  
0    29      14      A        E      446       33      20     NaN         A  
1   414     375      N        W      632       43      10   475.0         N  
2   266     263      A        W      880       82      14   480.0         A  
3   838     354      N        E      200       11       3   500.0         N  
4    46      33      N        E      805       40       4    91.5    

 MAM THE DATASET WHICH YOU HAVE PROVIDED FOR QUESTION 2 IS NOT OPENING..., WHEN I TRY TO OPEN.,IT SHOWS THAT THE PAGE IS NOT FOUND.., SO FOR QUESTION 2, I AM USING MY OWN DATASET WHICH I DOWNLOADED FROM THE KAGGLE 

Q3 Cross Validation for Ridge and Lasso Regression  
Explore Ridge Cross Validation (RidgeCV) and Lasso Cross Validation (LassoCV) function of Python. Implement both on Boston House Prediction Dataset (load_boston dataset from sklearn.datasets).  

In [22]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

boston = fetch_openml(name="boston", version=1, as_frame=True)
X = boston.data
y = boston.target

print("Dataset Loaded Successfully")
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)
print()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

alphas = np.logspace(-3, 3, 20) 
ridge_cv = RidgeCV(alphas=alphas, cv=5)
ridge_cv.fit(X_train, y_train)

ridge_best_alpha = ridge_cv.alpha_
ridge_pred = ridge_cv.predict(X_test)

ridge_r2 = r2_score(y_test, ridge_pred)
ridge_mse = mean_squared_error(y_test, ridge_pred)

print("RidgeCV Results")
print("Best Alpha:", ridge_best_alpha)
print("R² Score:", ridge_r2)
print("Mean Squared Error:", ridge_mse)
print()

lasso_cv = LassoCV(alphas=alphas, cv=5, max_iter=10000)
lasso_cv.fit(X_train, y_train)

lasso_best_alpha = lasso_cv.alpha_
lasso_pred = lasso_cv.predict(X_test)

lasso_r2 = r2_score(y_test, lasso_pred)
lasso_mse = mean_squared_error(y_test, lasso_pred)

print("LassoCV Results")
print("Best Alpha:", lasso_best_alpha)
print("R² Score:", lasso_r2)
print("Mean Squared Error:", lasso_mse)
print()

if ridge_r2 > lasso_r2:
    print("Ridge Regression performs better on this dataset.")
else:
    print("Lasso Regression performs better on this dataset.")


Dataset Loaded Successfully
Shape of X: (506, 13)
Shape of y: (506,)

RidgeCV Results
Best Alpha: 2.976351441631316
R² Score: 0.6678168873825823
Mean Squared Error: 24.36024435020685

LassoCV Results
Best Alpha: 0.001
R² Score: 0.6687104601341602
Mean Squared Error: 24.294715279804656

Lasso Regression performs better on this dataset.


Q4 Multiclass Logistic Regression: Implement Multiclass Logistic Regression (step-by step) on Iris dataset using one vs. rest strategy?

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier

iris = load_iris()
X = iris.data
y = iris.target

print("Dataset Loaded Successfully!")
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)
print("Target classes:", iris.target_names)
print()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

base_model = LogisticRegression(solver='lbfgs', max_iter=200)
ovr_model = OneVsRestClassifier(base_model)
ovr_model.fit(X_train, y_train)

y_pred = ovr_model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print("Model Evaluation")
print()
print("Accuracy:", acc)
print("\nConfusion Matrix:\n", cm)
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=iris.target_names))

print("\n Demonstrating One-vs-Rest Concept Manually:")
for i, class_name in enumerate(iris.target_names):
    y_binary = (y_train == i).astype(int)
    clf = LogisticRegression(solver='lbfgs', max_iter=200)
    clf.fit(X_train, y_binary)
    print(f"Trained classifier for class '{class_name}'")
    print("Coefficient shape:", clf.coef_.shape)
    print("Intercept:", clf.intercept_)
    print()


Dataset Loaded Successfully!
Shape of X: (150, 4)
Shape of y: (150,)
Target classes: ['setosa' 'versicolor' 'virginica']

Model Evaluation

Accuracy: 0.9666666666666667

Confusion Matrix:
 [[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]

Classification Report:
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      0.89      0.94         9
   virginica       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30


 Demonstrating One-vs-Rest Concept Manually:
Trained classifier for class 'setosa'
Coefficient shape: (1, 4)
Intercept: [-2.41957815]

Trained classifier for class 'versicolor'
Coefficient shape: (1, 4)
Intercept: [-0.87535907]

Trained classifier for class 'virginica'
Coefficient shape: (1, 4)
Intercept: [-3.57467368]

