In [3]:
from sklearn.model_selection import GridSearchCV

In [4]:
import pandas as pd

from sklearn.datasets import fetch_california_housing
california = fetch_california_housing()  # Bunch object

df = pd.DataFrame(california.data, columns = california.feature_names)
df["Value"] = california.target

features = df.drop("Value", axis=1)
target = df["Value"]

In [5]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

#split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, random_state=3000)


model = Ridge(alpha = 10).fit(X=X_train, y=y_train)

print("R-squared value for training set: ", r2_score(y_train, model.predict(X_train)))
print("R-squared value for testing set: ", r2_score(y_test, model.predict(X_test)))


R-squared value for training set:  0.6095096727794165
R-squared value for testing set:  0.5957098932871041


In [7]:
param_grid = {'alpha' : [0.001, 0.01, 0.1, 1, 10, 100]}

In [8]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

#import GridSearchCV and fit GridSearchCV
from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(Ridge(), param_grid, cv = 5)

#split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, random_state=3000)

#fit the grid search object on the training data (CV will be performed on this)
grid_search.fit(X = X_train, y = y_train)

#result of grid search
print("Best parameters: ", grid_search.best_params_)
print("Best cross-validation score: ", grid_search.best_score_)

#the performance of the best found parameters on the test set
#this is what you report for the evaluation of your model
print("Test set score: ", grid_search.score(X_test, y_test))

Best parameters:  {'alpha': 0.001}
Best cross-validation score:  0.6072051741818467
Test set score:  0.5954462604953146


In [9]:
param_grid = {"max_depth":[1, 10, 100], "min_samples_split": [2, 10, 100]}

In [10]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split


from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(DecisionTreeRegressor(), param_grid, cv=5)

#split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, random_state=3000)

#fit the grid search object on the training data (CV will be performed on this)
grid_search.fit(X=X_train, y=y_train)

#this is the best performance during training
print("Best cross-validation score: ", grid_search.best_score_)

#result of grid search
print("Best parameters: ", grid_search.best_params_)

#the performance of the best found parameters on the test set
print("Test set score: ", grid_search.score(X_test, y_test))

Best cross-validation score:  0.7041358008022387
Best parameters:  {'max_depth': 100, 'min_samples_split': 100}
Test set score:  0.6958656467897454
