In [115]:
# Import necessary libraries
from sklearn.linear_model import LinearRegression,Ridge,Lasso, ElasticNet
from sklearn.model_selection import KFold, cross_val_score,GridSearchCV #(cross_val_score ) is a function
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import warnings
import os

# Ignore warnings
warnings.simplefilter("ignore")

# Change directory to the dataset location
os.chdir(r"D:\Datasets")

# Read the CSV data
df = pd.read_csv("Housing.csv")

# Encode categorical features using one-hot encoding
dum_df = pd.get_dummies(df, drop_first=True)

# Separate features (X) and target variable (y)
x = dum_df.drop(['price'], axis=1)
y = dum_df['price']

# Define a KFold object with 5 folds
kfold = KFold(n_splits=5, shuffle=True,random_state=24)


In [114]:
# Create a Linear Regression model
lr = LinearRegression()

# Perform K-fold cross-validation on the Linear Regression model
results = cross_val_score(lr, x, y, cv=kfold)

# Print the average R-squared score
print("Average r-square: ", results.mean())

Average r-square:  0.6570808836039296


In [116]:
# Create a Ridge Regression model
ridge = Ridge()

# Perform K-fold cross-validation on the Ridge Regression model
results = cross_val_score(ridge, x, y, cv=kfold)

# Print the average R-squared score
print("Average r-square: ", results.mean())

Average r-square:  0.6573979310546161


In [117]:
# Create a Lasso Regression model
lasso = Lasso()

# Perform K-fold cross-validation on the Lasso Regression model
results = cross_val_score(lasso, x, y, cv=kfold)

# Print the average R-squared score
print("Average r-square: ", results.mean())

Average r-square:  0.6570821806267306


In [118]:
# Create an Elastic Net Regression model
el = ElasticNet()

# Perform K-fold cross-validation on the Elastic Net Regression model
results = cross_val_score(el, x, y, cv=kfold)

# Print the average R-squared score
print("Average r-square: ", results.mean())

Average r-square:  0.5674905101968157


In [119]:
# Define a range of alpha values to try for Ridge regression
alphas = np.arange(0, 10, 0.25)

# Initialize a list to store the scores
scores = []

# Iterate through the range of alpha values
for i in alphas:
    # Create a Ridge Regression model instance with the current alpha value
    ridge = Ridge(alpha=i)
    
    # Perform cross-validation on the Ridge Regression model
    results = cross_val_score(ridge, x, y, cv=kfold)
    
    # Append the mean of the cross-validation scores to the scores list
    scores.append(results.mean())

# Find the index of the best alpha value
i_max = np.argmax(scores)

# Print the best alpha value and the corresponding best score
print("Best alpha: ", alphas[i_max])
print("Best score: ", scores[i_max])

Best alpha:  4.0
Best score:  0.6577381120548891


In [120]:
# Ignore warnings
import warnings
warnings.simplefilter("ignore")

# Define a range of alpha values to try for Elastic Net regression
alphas = np.arange(0, 10, 0.25)

# Initialize a list to store the scores
scores = []

# Iterate through the range of alpha values
for i in alphas:
    # Create an Elastic Net Regression model instance with the current alpha value
    el = ElasticNet(alpha=i)
    
    # Perform cross-validation on the Elastic Net Regression model
    results = cross_val_score(el, x, y, cv=kfold)
    
    # Calculate the mean of the cross-validation scores
    score = results.mean()
    
    # Append the score to the scores list
    scores.append(score)

# Find the index of the best alpha value
i_max = np.argmax(scores)

# Print the best alpha value and the corresponding best score
print("Best alpha: ", alphas[i_max])
print("Best score: ", scores[i_max])

Best alpha:  0.0
Best score:  0.6570808836039296


In [121]:
# Ignore warnings
import warnings
warnings.simplefilter("ignore")

# Define a range of alpha values to try for Lasso regression
alphas = np.arange(0, 10, 0.25)

# Initialize a list to store the scores
scores = []

# Iterate through the range of alpha values
for i in alphas:
    # Create a Lasso Regression model instance with the current alpha value
    lasso = Lasso(alpha=i)
    
    # Perform cross-validation on the Lasso Regression model
    results = cross_val_score(lasso, x, y, cv=kfold)
    
    # Calculate the mean of the cross-validation scores
    score = results.mean()
    
    # Append the score to the scores list
    scores.append(score)

# Find the index of the best alpha value
i_max = np.argmax(scores)

# Print the best alpha value and the corresponding best score
print("Best alpha: ", alphas[i_max])
print("Best score: ", scores[i_max])

Best alpha:  9.5
Best score:  0.657087435238261


In [111]:
# Define a range of alpha values to try for Elastic Net regression
alphas = np.arange(0, 10, 0.25)

# Define a range of l1_ratio values to try for Elastic Net regression
l1 = np.linspace(0, 1, 11)

# Initialize a list to store the scores
scores = []

# Iterate through the ranges of alpha and l1_ratio
for i in alphas:
    for j in l1:
        # Create an Elastic Net Regression model instance with the current alpha and l1_ratio
        e1 = ElasticNet(alpha=i, l1_ratio=j)
        
        # Perform cross-validation on the Elastic Net Regression model
        results = cross_val_score(e1, x, y, cv=kfold)
        
        # Calculate the mean R-squared score from the cross-validation results
        r2 = results.mean()
        
        # Append the alpha, l1_ratio, and R-squared score to the scores list
        scores.append([i, j, r2])
        
# Convert the scores list to a pandas DataFrame
pd_scores = pd.DataFrame(scores, columns=['alpha', 'l1', 'r2'])

# Sort the DataFrame by the R-squared score in descending order and select the first row
best_model = pd_scores.sort_values(by='r2', ascending=False).iloc[0]

# Print the best alpha, l1_ratio, and R-squared score
print("Best alpha: ", best_model[0])
print("Best l1_ratio: ", best_model[1])
print("Best R-squared score: ", best_model[2])

Best Parameters: {'alpha': 4.25}
Best Score: -243700493.29235244
Best alpha:  0.0
Best l1_ratio:  0.0
Best R-squared score:  0.6066065590942402


In [113]:
# Read the CSV data
df = pd.read_csv("Housing.csv")

# Encode categorical features using one-hot encoding
dum_df = pd.get_dummies(df, drop_first=True)

# Separate features (X) and target variable (y)
x = dum_df.drop(['price'], axis=1)
y = dum_df['price']

# Define the parameter grid for the grid search
alphas = np.arange(0, 10, 0.25)
params = {'alpha': alphas}

# Create a Ridge Regression model
ridge = Ridge()

# Perform grid search cross-validation
gcv_r = GridSearchCV(ridge, param_grid=params, cv=kfold, scoring='neg_mean_squared_error')
gcv_r.fit(x, y)

# Print the best parameters and the best score
print("Best Parameters:", gcv_r.best_params_)
print("Best Score:", gcv_r.best_score_)

Best Parameters: {'alpha': 4.25}
Best Score: -243700493.29235244


In [125]:
# Ignore warnings
import warnings
warnings.simplefilter("ignore")

# Read the CSV data
df = pd.read_csv("Housing.csv")

# Encode categorical features using one-hot encoding
dum_df = pd.get_dummies(df, drop_first=True)

# Separate features (X) and target variable (y)
x = dum_df.drop(['price'], axis=1)
y = dum_df['price']

# Create a Lasso Regression model
lasso = Lasso()

# Define the parameter grid for the grid search
params = {'alpha': np.linspace(0.001, 1, 20)}

# Perform grid search cross-validation
gcv_l = GridSearchCV(lasso, param_grid=params, cv=kfold)
gcv_l.fit(x, y)

# Print the best parameters and the best score
print("Best Parameters:", gcv_l.best_params_)
print("Best Score:", gcv_l.best_score_)

Best Parameters: {'alpha': 1.0}
Best Score: 0.6570821806267306


In [126]:
# Ignore warnings
import warnings
warnings.simplefilter("ignore")

# Read the CSV data
df = pd.read_csv("Housing.csv")

# Encode categorical features using one-hot encoding
dum_df = pd.get_dummies(df, drop_first=True)

# Separate features (X) and target variable (y)
x = dum_df.drop(['price'], axis=1)
y = dum_df['price']

# Define the parameter grid for the grid search
alphas = np.arange(0, 10, 0.25)
params = {'alpha': alphas}

# Create an Elastic Net Regression model
el = ElasticNet()

# Perform grid search cross-validation
gcv_e = GridSearchCV(el, param_grid=params, cv=kfold)
gcv_e.fit(x, y)

# Print the best parameters and the best score
print("Best Parameters:", gcv_e.best_params_)
print("Best Score:", gcv_e.best_score_)

# Save the grid search results to a CSV file
pd_cv = pd.DataFrame(gcv_e.cv_results_)
pd_cv.to_csv("GridSearch_Results.csv", index=False)

Best Parameters: {'alpha': 0.0}
Best Score: 0.6570808836039296


In [127]:
# Ignore warnings
import warnings
warnings.simplefilter("ignore")

# Change directory to the dataset location
os.chdir(r"D:\Datasets")

# Read the CSV data
conc = pd.read_csv("Concrete_Data.csv")

# Separate features (X) and target variable (y)
x = conc.drop(['Strength'], axis=1)
y = conc['Strength']

# Define the parameter grid for the grid search
params = {
    'alpha': np.linspace(0.001, 1, 20),
    'l1_ratio': np.linspace(0, 1, 11)
}

# Create an Elastic Net Regression model
el = ElasticNet()

# Perform grid search cross-validation
gcv_e = GridSearchCV(el, param_grid=params, cv=kfold)
gcv_e.fit(x, y)

# Print the best parameters and the best score
print("Best Parameters:", gcv_e.best_params_)
print("Best Score:", gcv_e.best_score_)

# Save the grid search results to a CSV file
pd_cv = pd.DataFrame(gcv_e.cv_results_)
pd_cv.to_csv("GridSearch_Results.csv", index=False)

Best Parameters: {'alpha': 0.05357894736842105, 'l1_ratio': 0.0}
Best Score: 0.6066066097074024


In [128]:
# Ignore warnings
import warnings
warnings.simplefilter("ignore")

# Change directory to the dataset location
os.chdir(r"D:\Datasets")

# Read the CSV data
conc = pd.read_csv("Concrete_Data.csv")

# Separate features (X) and target variable (y)
x = conc.drop(['Strength'], axis=1)
y = conc['Strength']

# Define the parameter grid for the grid search
params = {
    'alpha': np.linspace(0.001, 10, 20)
}

# Create a Ridge Regression model
ridge = Ridge()

# Perform grid search cross-validation
gcv_r = GridSearchCV(ridge, param_grid=params, cv=kfold)
gcv_r.fit(x, y)

# Print the best parameters and the best score
print("Best Parameters:", gcv_r.best_params_)
print("Best Score:", gcv_r.best_score_)

# Save the grid search results to a CSV file
pd_cv = pd.DataFrame(gcv_r.cv_results_)
pd_cv.to_csv("GridSearch_Results.csv", index=False)

Best Parameters: {'alpha': 10.0}
Best Score: 0.6066065800792033


In [110]:
# Ignore warnings
import warnings
warnings.simplefilter("ignore")

# Change directory to the dataset location
os.chdir(r"D:\Datasets")

# Read the CSV data
conc = pd.read_csv("Concrete_Data.csv")

# Separate features (X) and target variable (y)
x = conc.drop(['Strength'], axis=1)
y = conc['Strength']

# Create a Lasso Regression model
lasso = Lasso()

gcv_l = GridSearchCV(lasso,param_grid=params,cv=kfold)
gcv_l.fit(x,y)

# Print the best parameters and the best score
print("Best Parameters:", gcv_l.best_params_)
print("Best Score:", gcv_l.best_score_)

# Save the grid search results to a CSV file
pd_cv = pd.DataFrame(gcv_l.cv_results_)
pd_cv.to_csv("GridSearch_Results.csv", index=False)


Best Parameters: {'alpha': 0.001, 'max_iter': 1000}
Best Score: 0.6066064857714588


In [109]:
# Inferencing
# Get the best model from the GridSearchCV object
best_model = gcv_r.best_estimator_

# Load the test data
tst = pd.read_csv("testConcrete.csv")

# Make predictions using the best model
pred_str = best_model.predict(tst)

# Print the predicted values
print(pred_str)

[68.73876931 31.78900645 19.49352047 46.84974452 58.04413155 16.09636391
 50.35237168 80.31308633 31.13698396 41.57074962 43.44491821 60.77326762
 52.3709778  15.38705846]
