In [11]:
from sklearn.model_selection import RandomizedSearchCV, cross_val_score
from sklearn.linear_model import SGDRegressor
from scipy.stats import uniform, randint
import pandas as pd
import numpy as np

In [6]:
df = pd.read_csv('data/train_processed.csv')
X_train, y_train = df.drop(columns=['SalePrice']), df['SalePrice']
X_test = pd.read_csv('data/test-processed.csv')

In [7]:
params = {
    'loss': ['squared_error', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'],
    'penalty': ['l2', 'l1', 'elasticnet', None],
    'alpha': uniform(0.0001, 0.1),
    'l1_ratio': uniform(0, 1)
}

In [8]:
sgd = SGDRegressor()
random_search = RandomizedSearchCV(
    estimator=sgd,
    param_distributions=params,
    n_iter=50,
    scoring='neg_mean_squared_error',
    cv=5
)
random_search.fit(X_train, y_train)

In [12]:
best_model = random_search.best_estimator_
scores = cross_val_score(best_model, X_train, y_train, scoring='neg_mean_squared_error', cv=10)
scores = np.sqrt(-scores)

In [None]:
predictions = pd.DataFrame({'Id': X_test['Id'], 'SalePrice': best_model.predict(X_test)})
predictions.set_index('Id', inplace=True)
predictions.to_csv('data/predictions/sgd.csv')