In [2]:
import pandas as pd 
import numpy as np 
from sklearn.datasets import fetch_california_housing
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
cal_housing = fetch_california_housing()
df = pd.DataFrame(data = cal_housing.data , columns= cal_housing.feature_names)
df['target'] = cal_housing.target
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [4]:
x = df.drop('target' , axis = 1)
y = df['target']

In [5]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

num_transformer = Pipeline(steps = [
    ('scaler' , StandardScaler())
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num' ,num_transformer , x.columns )
    ]
)

preprocessed_data = preprocessor.fit_transform(x)
col_names = [col.replace('num__' , '').replace('num__' , '') for col in preprocessor.get_feature_names_out()]
x = pd.DataFrame(data = preprocessed_data , columns= col_names)
x.head(3)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,2.344766,0.982143,0.628559,-0.153758,-0.974429,-0.049597,1.052548,-1.327835
1,2.332238,-0.607019,0.327041,-0.263336,0.861439,-0.092512,1.043185,-1.322844
2,1.782699,1.856182,1.15562,-0.049016,-0.820777,-0.025843,1.038503,-1.332827


In [6]:
from sklearn.model_selection import train_test_split

def data_splitter(features : pd.DataFrame , target : pd.Series):
    
    try:
        
        if x.ndim!=2 and y.ndim!=1:
            raise ValueError('X must be 2 Dimensional and Y must be 1 Dimensional')
            
        x_train , x_test , y_train , y_test = train_test_split(features , target , test_size= 20 , random_state= 42)
        return x_train , x_test , y_train , y_test
    
    except Exception as e:
        print(f"An error occured {e}")

x_train , x_test , y_train , y_test = data_splitter( x, y)

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error , mean_absolute_error

def model_training(x_train , x_test , y_train , y_test):
    
    model = LinearRegression()
    model.fit(x_train , y_train)
    y_pred = model.predict(x_test)
    
    mse = mean_squared_error(y_pred , y_test)
    mae = mean_absolute_error(y_pred , y_test)
    
    return mse , mae

mse , mae = model_training(x_train , x_test , y_train , y_test)
print("Mean squared error : " , mse)
print("Mean absolute error : " , mae)

Mean squared error :  0.5830668749012882
Mean absolute error :  0.5880006847476305


In [10]:
from sklearn.linear_model import Lasso , Ridge , ElasticNet

lasso_model = Lasso(alpha = 0.1 , random_state= 42)
lasso_model.fit(x_train , y_train)

ypred_lasso = lasso_model.predict(x_test)

mse_lasso = mean_squared_error(ypred_lasso , y_test)
print(f'Lasso regression MSE : {mse_lasso}')

Lasso regression MSE : 0.8183152375223262


In [11]:
ridge_model = Ridge(alpha = 0.1 , random_state=42)
ridge_model.fit(x_train , y_train)

ypred_ridge = ridge_model.predict(x_test)
mse_ridge = mean_squared_error(ypred_ridge , y_test)
print(f'Ridge Regression MSE : {mse_ridge}')

Ridge Regression MSE : 0.5830734013151717


In [12]:
e_model = ElasticNet(alpha=0.1 , l1_ratio=0.5 , random_state= 42)
e_model.fit(x_train , y_train)

ypred_e = e_model.predict(x_test)
mse_elasticnet = mean_squared_error(ypred_e , y_test)
print(f"Elastic net regression MSE : {mse_elasticnet}")

Elastic net regression MSE : 0.7874990493979253


In [14]:
from sklearn.model_selection import GridSearchCV

# Define the hyperparameter grid for Lasso regression
lasso_grid = {'alpha': np.logspace(-4, 0, 50)}

# Perform grid search for Lasso regression
lasso_grid_search = GridSearchCV(Lasso(random_state=42), lasso_grid, cv=5)
lasso_grid_search.fit(x_train, y_train)

# Print the best parameters and the best score for Lasso regression
print("Lasso Regression Best Parameters: ", lasso_grid_search.best_params_)
print("Lasso Regression Best Score: ", lasso_grid_search.best_score_)

Lasso Regression Best Parameters:  {'alpha': np.float64(0.004291934260128779)}
Lasso Regression Best Score:  0.6024811566833712


In [19]:
ridge_grid = {'alpha': np.logspace(-4, 0, 50)}

# Perform grid search for Ridge regression
ridge_grid_search = GridSearchCV(Ridge(random_state=42), ridge_grid, cv=4)
ridge_grid_search.fit(x_train, y_train)

# Print the best parameters and the best score for Ridge regression
print("Ridge Regression Best Parameters: ", ridge_grid_search.best_params_)
print("Ridge Regression Best Score: ", ridge_grid_search.best_score_)

Ridge Regression Best Parameters:  {'alpha': np.float64(1.0)}
Ridge Regression Best Score:  0.6016013995988703
