In [40]:
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge
import pandas as pd
import numpy as np
import os

# Change directory to the dataset location
os.chdir(r"D:\Datasets")

# Read the CSV data
df = pd.read_csv("Housing.csv")

# Encode categorical features using one-hot encoding
dum_df = pd.get_dummies(df, drop_first=True)

# Separate features (X) and target variable (y)
X = dum_df.drop(['price'], axis=1)
y = dum_df['price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24)

# Define a range of alpha values to try for Ridge regression
alpha = np.arange(0, 10, 0.25)
scores = []

# Iterate through the range of alpha values
for i in alpha:
    # Create a Ridge Regression model instance with the current alpha value
    ridge = Ridge(alpha=i)
    
    # Train the Ridge Regression model on the training data
    ridge.fit(X_train, y_train)
    
    # Make predictions on the test data
    y_pred = ridge.predict(X_test)
    
    # Calculate the R-squared score for the current alpha value
    print('Alpha: ', i)
    print(f"r2 squared: {r2_score(y_test, y_pred)})")
    scores.append(r2_score(y_test, y_pred))

# Find the best alpha value and the corresponding best score
print("----------------------------------------------")
i_max = np.argmax(scores)
print("Best alpha: ", alpha[i_max])
print("Best score: ", scores[i_max])

'''In summary, this code trains a Ridge Regression model with different regularization parameters (alpha) and chooses the alpha that 
leads to the best performance on unseen data (based on R-squared score). This helps prevent overfitting and improve the model's generalizability.'''

Alpha:  0.0
r2 squared: 0.6246856191453722)
Alpha:  0.25
r2 squared: 0.6245764372818154)
Alpha:  0.5
r2 squared: 0.6244646032856002)
Alpha:  0.75
r2 squared: 0.6243503431206507)
Alpha:  1.0
r2 squared: 0.6242338599592155)
Alpha:  1.25
r2 squared: 0.6241153366520427)
Alpha:  1.5
r2 squared: 0.6239949379104519)
Alpha:  1.75
r2 squared: 0.6238728122366688)
Alpha:  2.0
r2 squared: 0.6237490936338193)
Alpha:  2.25
r2 squared: 0.6236239031227466)
Alpha:  2.5
r2 squared: 0.6234973500892027)
Alpha:  2.75
r2 squared: 0.6233695334818521)
Alpha:  3.0
r2 squared: 0.623240542878878)
Alpha:  3.25
r2 squared: 0.6231104594386885)
Alpha:  3.5
r2 squared: 0.6229793567482498)
Alpha:  3.75
r2 squared: 0.6228473015808715)
Alpha:  4.0
r2 squared: 0.6227143545738014)
Alpha:  4.25
r2 squared: 0.6225805708347099)
Alpha:  4.5
r2 squared: 0.622446000485037)
Alpha:  4.75
r2 squared: 0.6223106891472239)
Alpha:  5.0
r2 squared: 0.6221746783819945)
Alpha:  5.25
r2 squared: 0.622038006081148)
Alpha:  5.5
r2 squared: 

"In summary, this code trains a Ridge Regression model with different regularization parameters (alpha) and chooses the alpha that \nleads to the best performance on unseen data (based on R-squared score). This helps prevent overfitting and improve the model's generalizability."

In [41]:
#another example for ridge regression with boston dataset

boston = pd.read_csv("Boston.csv")
x = boston.drop(['medv'],axis=1)
y = boston['medv']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,random_state = 24)

alpha = np.arange(0,10,0.25)
scores = []
for i in alpha:
    ridge = Ridge(alpha = i)
    ridge.fit(x_train,y_train)
    y_pred = ridge.predict(x_test)
    print('Alpha: ',i)
    print(f"r2 squared: {r2_score(y_test,y_pred)})")
    scores.append(r2_score(y_test,y_pred))

print("----------------------------------------------")
i_max = np.argmax(scores)
print("Best alpha: ",alpha[i_max])
print("Best score: ",scores[i_max])

Alpha:  0.0
r2 squared: 0.7133431144123458)
Alpha:  0.25
r2 squared: 0.7097105849145189)
Alpha:  0.5
r2 squared: 0.7069833171421402)
Alpha:  0.75
r2 squared: 0.7048924168951105)
Alpha:  1.0
r2 squared: 0.7032547843448866)
Alpha:  1.25
r2 squared: 0.7019478104266106)
Alpha:  1.5
r2 squared: 0.7008879641000546)
Alpha:  1.75
r2 squared: 0.7000169325521792)
Alpha:  2.0
r2 squared: 0.6992929955083689)
Alpha:  2.25
r2 squared: 0.6986856166001973)
Alpha:  2.5
r2 squared: 0.6981719819420982)
Alpha:  2.75
r2 squared: 0.6977347319733422)
Alpha:  3.0
r2 squared: 0.6973604399458246)
Alpha:  3.25
r2 squared: 0.6970385684455562)
Alpha:  3.5
r2 squared: 0.6967607389881809)
Alpha:  3.75
r2 squared: 0.6965202111053879)
Alpha:  4.0
r2 squared: 0.6963115044411509)
Alpha:  4.25
r2 squared: 0.6961301202927865)
Alpha:  4.5
r2 squared: 0.6959723334875078)
Alpha:  4.75
r2 squared: 0.6958350347890854)
Alpha:  5.0
r2 squared: 0.6957156101316802)
Alpha:  5.25
r2 squared: 0.69561184705186)
Alpha:  5.5
r2 squared: