In [47]:
# Import necessary libraries
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
import pandas as pd
import numpy as np
import warnings
import os

In [48]:
# Ignore warnings
warnings.simplefilter("ignore")

# Change the current working directory to "D:\Datasets"
os.chdir(r"D:\Datasets")

# Load the training data into a DataFrame named "mohs"
mohs = pd.read_csv("train.csv", index_col=0)

# Extract the feature variables "X" (all columns except 'Hardness') and the target variable "y" (Hardness)
X = mohs.drop('Hardness', axis=1)
y = mohs['Hardness']

In [49]:
# Define the hyperparameter grid for Ridge regression
alphas = np.arange(0, 10, 0.25)
params = {'alpha': alphas}

# Define the Ridge regression model
ridge = Ridge()

# Define the k-fold cross-validation object
kfold = KFold(n_splits=5, shuffle=True, random_state=24)

# Perform grid search for Ridge regression
gcv_r = GridSearchCV(ridge, param_grid=params, cv=kfold)
gcv_r.fit(X, y)

# Print the best hyperparameters and the best score for Ridge regression
print(gcv_r.best_params_)
print(gcv_r.best_score_)

{'alpha': 0.25}
0.24214792962324613


In [50]:
# Define the Lasso regression model
lasso = Lasso()

# Perform grid search for Lasso regression
gcv_l = GridSearchCV(lasso, param_grid=params, cv=kfold)
gcv_l.fit(X, y)

# Print the best hyperparameters and the best score for Lasso regression
print(gcv_l.best_params_)
print(gcv_l.best_score_)


{'alpha': 0.0}
0.24214703163526088


In [51]:
# Define the Elastic Net regression model
el = ElasticNet()

# Define the hyperparameter grid for Elastic Net regression
alphas = np.arange(0, 10, 0.25)
l1 = np.arange(0, 1, 0.1)
params = {'alpha': alphas, 'l1_ratio': l1}

# Perform grid search for Elastic Net regression
gcv_el = GridSearchCV(el, param_grid=params, cv=kfold)
gcv_el.fit(X, y)

# Print the best hyperparameters and the best score for Elastic Net regression
print(gcv_el.best_params_)
print(gcv_el.best_score_)

{'alpha': 0.0, 'l1_ratio': 0.0}
0.24214703163526088


In [52]:
# Inferencing
# Use the best Ridge regression model for making predictions
best_model = gcv_r.best_estimator_

# Load the test data into a DataFrame named "test"
test = pd.read_csv("test.csv", index_col=0)

# Make predictions using the best Ridge regression model
pred_hard = best_model.predict(test)

# Load the sample submission file into a DataFrame named "submit"
submit = pd.read_csv("sample_submission.csv")

# Replace the predicted hardness values in the submission file
submit['Hardness'] = pred_hard

# Save the submission file with the predicted hardness values
submit.to_csv("sbt_rg_19_jun.csv", index=False)