In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import pandas as pd
import numpy as np
import time as time
from sklearn.model_selection import GridSearchCV

#### Load Preprocessed Datasets

In [None]:
housing_features_simple_imputer = pd.read_csv("/Users/ukannika/work/personal/machine-learning/datasets/housing_features_simple_imputer.csv", 
                 sep=",", header=None)

housing_features_iterative_imputer = pd.read_csv("/Users/ukannika/work/personal/machine-learning/datasets/housing_features_iterative_imputer.csv", 
                 sep=",", header=None)

housing_features_knn_imputer = pd.read_csv("/Users/ukannika/work/personal/machine-learning/datasets/housing_features_knn_imputer.csv", 
                 sep=",", header=None)

housing_labels = pd.read_csv("/Users/ukannika/work/personal/machine-learning/datasets/housing_labels.csv", sep=",", header=None)

test_housing_features = pd.read_csv("/Users/ukannika/work/personal/machine-learning/datasets/test_housing_features.csv", sep=",", header=None)
test_housing_labels = pd.read_csv("/Users/ukannika/work/personal/machine-learning/datasets/test_housing_labels.csv", sep=",", header=None)

print("housing_features_simple_imputer %s " % (housing_features_simple_imputer.shape,))
print("housing_features_iterative_imputer %s " % (housing_features_iterative_imputer.shape,))
print("housing_features_knn_imputer %s " % (housing_features_knn_imputer.shape,))
print("housing_labels %s " % (housing_labels.shape,))


print("test_housing_features %s " % (test_housing_features.shape,))
print("test_housing_labels %s " % (test_housing_labels.shape,))

#### Linear Regression

In linear regression, the target value is expected to be a combination of the features.

y(W, X) = XW + $\epsilon$

Closed form solution for W

W = $(X^TX)^{-1}X^TY$

**Cost Function** <br>
*MSE(Mean Squared Error)* <br>
*MAE(Mean Absolute Error)*


In [None]:
linear_regression = LinearRegression().fit(housing_features_simple_imputer, housing_labels)
housing_predictions = linear_regression.predict(test_housing_features)

# Calculate Error
mse = mean_squared_error(y_true=test_housing_labels, y_pred=housing_predictions, squared=True)
rmse = mean_squared_error(y_true=test_housing_labels, y_pred=housing_predictions, squared=False)
mae = mean_absolute_error(y_true=test_housing_labels, y_pred=housing_predictions)

print("MSE : %0.3f " % mse)
print("RMSE : %0.3f " % rmse)
print("MAE : %0.3f " % rmse)

In [None]:
linear_regression = LinearRegression().fit(housing_features_iterative_imputer, housing_labels)
housing_predictions = linear_regression.predict(test_housing_features)

# Calculate Error
mse = mean_squared_error(y_true=test_housing_labels, y_pred=housing_predictions, squared=True)
rmse = mean_squared_error(y_true=test_housing_labels, y_pred=housing_predictions, squared=False)
mae = mean_absolute_error(y_true=test_housing_labels, y_pred=housing_predictions)

print("MSE : %0.3f " % mse)
print("RMSE : %0.3f " % rmse)
print("MAE : %0.3f " % rmse)

In [None]:
linear_regression.coef_

#### Regularization L1 and L2

L1 => Lasso (Sparsity) <br>
L2 => Ridge (Shrink weights towards to zero)

In [None]:
# For this example, we may not see any improve by using Lasso/Ridge Regression.
# Tune hyperparameter alpha
ridge = Ridge(alpha=0.3, max_iter=10000).fit(housing_features_simple_imputer, housing_labels)
housing_predictions = ridge.predict(test_housing_features)

# Calculate Error
mse = mean_squared_error(y_true=test_housing_labels, y_pred=housing_predictions, squared=True)
rmse = mean_squared_error(y_true=test_housing_labels, y_pred=housing_predictions, squared=False)
mae = mean_absolute_error(y_true=test_housing_labels, y_pred=housing_predictions)

print("MSE : %0.3f " % mse)
print("RMSE : %0.3f " % rmse)
print("MAE : %0.3f " % rmse)

In [None]:
lasso = Lasso(alpha=0.3, max_iter=50000).fit(housing_features_simple_imputer, housing_labels)
housing_predictions = lasso.predict(test_housing_features)

# Calculate Error
mse = mean_squared_error(y_true=test_housing_labels, y_pred=housing_predictions, squared=True)
rmse = mean_squared_error(y_true=test_housing_labels, y_pred=housing_predictions, squared=False)
mae = mean_absolute_error(y_true=test_housing_labels, y_pred=housing_predictions)

print("MSE : %0.3f " % mse)
print("RMSE : %0.3f " % rmse)
print("MAE : %0.3f " % rmse)

In [None]:
# construct the set of hyperparameters to tune
print("Tuning hyperparameters via grid search")
params = {"alpha": np.arange(0.1, 2.0, 0.2)}
grid = GridSearchCV(ridge, params)
start = time.time()
grid.fit(housing_features_simple_imputer, housing_labels)

# evaluate the best grid searched model on the testing data
print("Grid search took {:.2f} seconds".format(time.time() - start))
acc = grid.score(test_housing_features, test_housing_labels)

print("Grid search accuracy: {:.2f}%".format(acc * 100))
print("Grid search best parameters: {}".format(grid.best_params_))

In [None]:
housing_predictions[:1]

#### K-NN Regression

In [None]:
knn_regressor = KNeighborsRegressor(n_neighbors=5, leaf_size=30, p=2, metric='minkowski',
                                    weights='uniform', algorithm='ball_tree')

print("Tuning hyperparameters via grid search")
params = {"n_neighbors": np.arange(5, 15, 2), "leaf_size": np.arange(10, 30, 2), "p" : [1, 2]}
grid = GridSearchCV(knn_regressor, params)
start = time.time()
grid.fit(housing_features_simple_imputer, housing_labels)

# evaluate the best grid searched model on the testing data
print("Grid search took {:.2f} seconds".format(time.time() - start))
acc = grid.score(test_housing_features, test_housing_labels)

print("Grid search accuracy: {:.2f}%".format(acc * 100))
print("Grid search best parameters: {}".format(grid.best_params_))

In [None]:
knn_regressor = KNeighborsRegressor(n_neighbors=9, leaf_size=26, p=2, metric='minkowski',
                                    weights='uniform', algorithm='ball_tree').fit(housing_features_simple_imputer, housing_labels)

housing_predictions = knn_regressor.predict(test_housing_features)

# Calculate Error
mse = mean_squared_error(y_true=test_housing_labels, y_pred=housing_predictions, squared=True)
rmse = mean_squared_error(y_true=test_housing_labels, y_pred=housing_predictions, squared=False)
mae = mean_absolute_error(y_true=test_housing_labels, y_pred=housing_predictions)

print("MSE : %0.3f " % mse)
print("RMSE : %0.3f " % rmse)
print("MAE : %0.3f " % rmse)