## **Linear Regression, Ridge Regression & Lasso Regression**

In [1]:
# Importing necessary libraries.
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score

In [2]:
# Reading the csv file into dataframe DF. Then printing the first five rows.
DF = pd.read_csv('/content/Student_Marks.csv')
DF.head()

Unnamed: 0,number_courses,time_study,Marks
0,3,4.508,19.202
1,4,0.096,7.734
2,4,3.133,13.811
3,6,7.909,53.018
4,8,7.811,55.299


In [3]:
# Checking the DF for null values and datatypes.
DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   number_courses  100 non-null    int64  
 1   time_study      100 non-null    float64
 2   Marks           100 non-null    float64
dtypes: float64(2), int64(1)
memory usage: 2.5 KB


In [4]:
# Printing the min, max, mean, median(50%), Q1(25%), Q3(75%), standard deviation of each feature.
DF.describe()

Unnamed: 0,number_courses,time_study,Marks
count,100.0,100.0,100.0
mean,5.29,4.07714,24.41769
std,1.799523,2.372914,14.326199
min,3.0,0.096,5.609
25%,4.0,2.0585,12.633
50%,5.0,4.022,20.0595
75%,7.0,6.17925,36.67625
max,8.0,7.957,55.299


In [5]:
# Printing the pearson correlation coefficient between each feature.
DF.corr()

Unnamed: 0,number_courses,time_study,Marks
number_courses,1.0,0.204844,0.417335
time_study,0.204844,1.0,0.942254
Marks,0.417335,0.942254,1.0


In [6]:
# Scaling the data using MinMaxScaler. Then splitting the data into training and testing sets.
scaler = MinMaxScaler()
scaled_DF = scaler.fit_transform(DF)
DF = pd.DataFrame(scaled_DF, columns=DF.columns)

X = DF.iloc[:, 0:2]
y = DF.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
r2_lr = r2_score(y_test, y_pred_lr)
print(f'Linear Regression R² score: {r2_lr}')

Linear Regression R² score: 0.9459936100591213


In [8]:
# Ridge Regression with GridSearchCV
ridge = Ridge()
param_grid_ridge = {'alpha': [0.01, 0.1, 1.0, 10, 100]}
ridge_grid = GridSearchCV(ridge, param_grid_ridge, cv=5, scoring='r2')
ridge_grid.fit(X_train, y_train)

best_ridge = ridge_grid.best_estimator_
y_pred_ridge = best_ridge.predict(X_test)
r2_ridge = r2_score(y_test, y_pred_ridge)
print(f'Best Ridge Regression R² score: {r2_ridge} with alpha: {best_ridge.alpha}')

Best Ridge Regression R² score: 0.9424068989987331 with alpha: 0.1


In [10]:
# Lasso Regression with GridSearchCV
lasso = Lasso()
param_grid_lasso = {'alpha': [0.01, 0.1, 1.0, 10, 100]}
lasso_grid = GridSearchCV(lasso, param_grid_lasso, cv=5, scoring='r2')
lasso_grid.fit(X_train, y_train)

best_lasso = lasso_grid.best_estimator_
y_pred_lasso = best_lasso.predict(X_test)
r2_lasso = r2_score(y_test, y_pred_lasso)
print(f'Best Lasso Regression R² score: {r2_lasso} with alpha: {best_lasso.alpha}')

Best Lasso Regression R² score: 0.8889228459489505 with alpha: 0.01
