In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from statsmodels.tsa.arima.model import ARIMA
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt




In [2]:
# Load the dataset
file_path = 'household_power_consumption.txt'  
data = pd.read_csv(file_path, delimiter=';', parse_dates={'Datetime': ['Date', 'Time']}, infer_datetime_format=True, low_memory=False, na_values=['nan','?'])

  data = pd.read_csv(file_path, delimiter=';', parse_dates={'Datetime': ['Date', 'Time']}, infer_datetime_format=True, low_memory=False, na_values=['nan','?'])
  data = pd.read_csv(file_path, delimiter=';', parse_dates={'Datetime': ['Date', 'Time']}, infer_datetime_format=True, low_memory=False, na_values=['nan','?'])


In [4]:
# Handling missing values and data preparation
data.replace('?', np.nan, inplace=True)
data = data.dropna()
cols = ['Global_active_power', 'Global_reactive_power', 'Voltage', 'Global_intensity', 'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']
data[cols] = data[cols].apply(pd.to_numeric, errors='coerce', axis=1)
data.set_index('Datetime', inplace=True)
data['hour'] = data.index.hour
data['day_of_week'] = data.index.dayofweek
data['month'] = data.index.month

KeyError: "None of ['Datetime'] are in the columns"

In [5]:
# Feature and target variables
X = data[['hour', 'day_of_week', 'month']]
y = data['Global_active_power']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
ridge_model = Ridge()
ridge_params = {'alpha': [1e-3, 1e-2, 1e-1, 1, 10, 100]}
ridge_search = GridSearchCV(ridge_model, ridge_params, cv=3, scoring='neg_mean_squared_error')
ridge_search.fit(X_train, y_train)
best_ridge = ridge_search.best_estimator_
ridge_predictions = best_ridge.predict(X_test)
ridge_rmse = np.sqrt(mean_squared_error(y_test, ridge_predictions))
print(f"Ridge - Best Params: {ridge_search.best_params_}, RMSE: {ridge_rmse}")


In [None]:
gb_model = GradientBoostingRegressor()
gb_params = {'n_estimators': [100, 200], 'learning_rate': [0.01, 0.1], 'max_depth': [3, 4]}
gb_search = GridSearchCV(gb_model, gb_params, cv=3, scoring='neg_mean_squared_error')
gb_search.fit(X_train, y_train)
best_gb = gb_search.best_estimator_
gb_predictions = best_gb.predict(X_test)
gb_rmse = np.sqrt(mean_squared_error(y_test, gb_predictions))
print(f"GradientBoosting - Best Params: {gb_search.best_params_}, RMSE: {gb_rmse}")

In [None]:
rf_model = RandomForestRegressor()
rf_params = {'n_estimators': [100, 200], 'max_features': ['auto', 'sqrt'], 'max_depth': [5, 10]}
rf_search = RandomizedSearchCV(rf_model, rf_params, n_iter=10, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, random_state=42)
rf_search.fit(X_train, y_train)
best_rf = rf_search.best_estimator_
rf_predictions = best_rf.predict(X_test)
rf_rmse = np.sqrt(mean_squared_error(y_test, rf_predictions))
print(f"RandomForest - Best Params: {rf_search.best_params_}, RMSE: {rf_rmse}")


In [None]:
svr_model = SVR()
svr_params = {'C': [0.1, 1], 'gamma': [1, 0.1], 'kernel': ['rbf', 'poly']}
svr_search = RandomizedSearchCV(svr_model, svr_params, n_iter=10, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, random_state=42)
svr_search.fit(X_train, y_train)
best_svr = svr_search.best_estimator_
svr_predictions = best_svr.predict(X_test)
svr_rmse = np.sqrt(mean_squared_error(y_test, svr_predictions))
print(f"SVR - Best Params: {svr_search.best_params_}, RMSE: {svr_rmse}")


In [None]:
mlp_model = MLPRegressor()
mlp_params = {'hidden_layer_sizes': [(50,), (100,)], 'activation': ['relu', 'tanh'], 'solver': ['adam'], 'alpha': [0.0001, 0.05]}
mlp_search = RandomizedSearchCV(mlp_model, mlp_params, n_iter=10, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, random_state=42)
mlp_search.fit(X_train, y_train)
best_mlp = mlp_search.best_estimator_
mlp_predictions = best_mlp.predict(X_test)
mlp_rmse = np.sqrt(mean_squared_error(y_test, mlp_predictions))
print(f"MLPRegressor - Best Params: {mlp_search.best_params_}, RMSE: {mlp_rmse}")


In [None]:
# ARIMA Model
arima_model = ARIMA(y_train, order=(5,1,0))
arima_model_fit = arima_model.fit()
arima_predictions = arima_model_fit.forecast(steps=len(y_test))
arima_rmse = np.sqrt(mean_squared_error(y_test, arima_predictions))
print(f"ARIMA RMSE: {arima_rmse}")

In [None]:
# LSTM Model
lstm_model = Sequential()
lstm_model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], 1)))
lstm_model.add(Dense(1))
lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.fit(X_train, y_train, epochs=50, batch_size=72, validation_data=(X_test, y_test), verbose=2)
lstm_predictions = lstm_model.predict(X_test).flatten()
lstm_rmse = np.sqrt(mean_squared_error(y_test, lstm_predictions))
print(f"LSTM RMSE: {lstm_rmse}")

In [7]:
from sklearn.model_selection import RandomizedSearchCV

# RandomForest
rf_params = {
    'n_estimators': [100, 200, 300],
    'max_features': ['auto', 'sqrt'],
    'max_depth': [10, 15, 20],
    'min_samples_split': [2, 4, 6],
    'min_samples_leaf': [1, 2, 3]
}
rf_model = RandomForestRegressor()
rf_search = RandomizedSearchCV(rf_model, rf_params, n_iter=50, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, random_state=42)
rf_search.fit(X_train, y_train)
rf_best = rf_search.best_estimator_
rf_predictions = rf_best.predict(X_test)
rf_rmse = np.sqrt(mean_squared_error(y_test, rf_predictions))
print(f"RandomForest - Best Params: {rf_search.best_params_}, RMSE: {rf_rmse}")

# GradientBoosting
gb_params = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 4, 5],
    'subsample': [0.8, 0.9, 1.0]
}
gb_model = GradientBoostingRegressor()
gb_search = RandomizedSearchCV(gb_model, gb_params, n_iter=50, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, random_state=42)
gb_search.fit(X_train, y_train)
gb_best = gb_search.best_estimator_
gb_predictions = gb_best.predict(X_test)
gb_rmse = np.sqrt(mean_squared_error(y_test, gb_predictions))
print(f"GradientBoosting - Best Params: {gb_search.best_params_}, RMSE: {gb_rmse}")


69 fits failed out of a total of 150.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
13 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\yatish\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\yatish\anaconda3\Lib\site-packages\sklearn\base.py", line 1144, in wrapper
    estimator._validate_params()
  File "C:\Users\yatish\anaconda3\Lib\site-packages\sklearn\base.py", line 637, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\yatish\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidPara

RandomForest - Best Params: {'n_estimators': 300, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 20}, RMSE: 0.8727657225945454
GradientBoosting - Best Params: {'subsample': 1.0, 'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.1}, RMSE: 0.8770743603644845


In [8]:
ridge_model = Ridge()
ridge_params = {'alpha': np.logspace(-3, 3, 50)}
ridge_search = GridSearchCV(ridge_model, ridge_params, cv=3, scoring='neg_mean_squared_error')
ridge_search.fit(X_train, y_train)
ridge_best = ridge_search.best_estimator_
ridge_predictions = ridge_best.predict(X_test)
ridge_rmse = np.sqrt(mean_squared_error(y_test, ridge_predictions))
print(f"Ridge - Best Params: {ridge_search.best_params_}, RMSE: {ridge_rmse}")


Ridge - Best Params: {'alpha': 1000.0}, RMSE: 1.0134337668675093


In [9]:
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2, interaction_only=True)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)


In [10]:
from sklearn.ensemble import StackingRegressor

estimators = [
    ('rf', RandomForestRegressor(n_estimators=300, min_samples_split=6, min_samples_leaf=1, max_features='sqrt', max_depth=20)),
    ('gb', GradientBoostingRegressor(subsample=1.0, n_estimators=300, max_depth=5, learning_rate=0.1))
]
stack_reg = StackingRegressor(estimators=estimators, final_estimator=Ridge(alpha=1000.0))
stack_reg.fit(X_train_poly, y_train)
stack_predictions = stack_reg.predict(X_test_poly)
stack_rmse = np.sqrt(mean_squared_error(y_test, stack_predictions))
print(f"Stacking RMSE: {stack_rmse}")


Stacking RMSE: 0.8727982928004514


In [11]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(stack_reg, X_train_poly, y_train, scoring='neg_mean_squared_error', cv=5)
cv_rmse = np.sqrt(-scores.mean())
print(f"Cross-Validated RMSE: {cv_rmse}")


Cross-Validated RMSE: 0.8704581764037169


In [12]:
from sklearn.feature_selection import RFE

# Assuming rf_best is your best RandomForest model
selector = RFE(rf_best, n_features_to_select=5, step=1)
selector = selector.fit(X_train, y_train)
X_train_rfe = selector.transform(X_train)
X_test_rfe = selector.transform(X_test)

rf_best.fit(X_train_rfe, y_train)
predictions_rfe = rf_best.predict(X_test_rfe)
rmse_rfe = np.sqrt(mean_squared_error(y_test, predictions_rfe))
print(f"RMSE after RFE: {rmse_rfe}")


RMSE after RFE: 0.8727656117086091


In [17]:
pip install xgboost

Collecting xgboost
  Obtaining dependency information for xgboost from https://files.pythonhosted.org/packages/bc/43/242432efc3f60052a4a534dc4926b21e236ab4ec8d4920c593da3f65c65d/xgboost-2.0.2-py3-none-win_amd64.whl.metadata
  Downloading xgboost-2.0.2-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-2.0.2-py3-none-win_amd64.whl (99.8 MB)
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB 1.4 MB/s eta 0:01:14
   ---------------------------------------- 0.4/99.8 MB 6.6 MB/s eta 0:00:16
   - -------------------------------------- 4.0/99.8 MB 31.7 MB/s eta 0:00:04
   --- ------------------------------------ 9.5/99.8 MB 55.2 MB/s eta 0:00:02
   ----- ---------------------------------- 14.9/99.8 MB 110.0 MB/s eta 0:00:01
   -------- ------------------------------- 20.6/99.8 MB 108.8 MB/s eta 0:00:01
   ---------- ----------------------------- 26.2/99.8 MB 108.8 MB/s eta 0:00:01
   ------------ ------------

In [18]:
pip install catboost

Collecting catboost
  Obtaining dependency information for catboost from https://files.pythonhosted.org/packages/e2/63/379617e3d982e8a66c9d66ebf4621d3357c7c18ad356473c335bffd5aba6/catboost-1.2.2-cp311-cp311-win_amd64.whl.metadata
  Downloading catboost-1.2.2-cp311-cp311-win_amd64.whl.metadata (1.2 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
     ---------------------------------------- 0.0/47.0 kB ? eta -:--:--
     --------------------------------- ---- 41.0/47.0 kB 653.6 kB/s eta 0:00:01
     -------------------------------------- 47.0/47.0 kB 782.9 kB/s eta 0:00:00
Downloading catboost-1.2.2-cp311-cp311-win_amd64.whl (101.0 MB)
   ---------------------------------------- 0.0/101.0 MB ? eta -:--:--
   ---------------------------------------- 0.2/101.0 MB 7.3 MB/s eta 0:00:14
   - -------------------------------------- 2.6/101.0 MB 27.6 MB/s eta 0:00:04
   -- ------------------------------------- 7.3/101.0 MB 51.9 MB/s eta 0:00:02
   

In [19]:
from xgboost import XGBRegressor

xgb_model = XGBRegressor()
xgb_params = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}
xgb_search = RandomizedSearchCV(xgb_model, xgb_params, n_iter=10, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, random_state=42)
xgb_search.fit(X_train, y_train)
xgb_best = xgb_search.best_estimator_
xgb_predictions = xgb_best.predict(X_test)
xgb_rmse = np.sqrt(mean_squared_error(y_test, xgb_predictions))
print(f"XGBoost - Best Params: {xgb_search.best_params_}, RMSE: {xgb_rmse}")


XGBoost - Best Params: {'n_estimators': 300, 'max_depth': 5, 'learning_rate': 0.1}, RMSE: 0.8771406367450417


In [20]:
from catboost import CatBoostRegressor

cat_model = CatBoostRegressor()
cat_params = {
    'iterations': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'depth': [4, 6, 8]
}
cat_search = RandomizedSearchCV(cat_model, cat_params, n_iter=10, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, random_state=42)
cat_search.fit(X_train, y_train)
cat_best = cat_search.best_estimator_
cat_predictions = cat_best.predict(X_test)
cat_rmse = np.sqrt(mean_squared_error(y_test, cat_predictions))
print(f"CatBoost - Best Params: {cat_search.best_params_}, RMSE: {cat_rmse}")


0:	learn: 1.0152617	total: 180ms	remaining: 53.9s
1:	learn: 0.9861950	total: 214ms	remaining: 31.9s
2:	learn: 0.9662492	total: 245ms	remaining: 24.3s
3:	learn: 0.9521952	total: 277ms	remaining: 20.5s
4:	learn: 0.9418248	total: 310ms	remaining: 18.3s
5:	learn: 0.9313649	total: 342ms	remaining: 16.8s
6:	learn: 0.9238614	total: 374ms	remaining: 15.7s
7:	learn: 0.9182535	total: 408ms	remaining: 14.9s
8:	learn: 0.9136505	total: 442ms	remaining: 14.3s
9:	learn: 0.9098384	total: 474ms	remaining: 13.8s
10:	learn: 0.9071500	total: 506ms	remaining: 13.3s
11:	learn: 0.9044216	total: 541ms	remaining: 13s
12:	learn: 0.9024653	total: 575ms	remaining: 12.7s
13:	learn: 0.9010250	total: 610ms	remaining: 12.5s
14:	learn: 0.8993137	total: 645ms	remaining: 12.3s
15:	learn: 0.8977772	total: 680ms	remaining: 12.1s
16:	learn: 0.8966459	total: 712ms	remaining: 11.9s
17:	learn: 0.8956482	total: 745ms	remaining: 11.7s
18:	learn: 0.8947469	total: 777ms	remaining: 11.5s
19:	learn: 0.8938529	total: 812ms	remaining

In [22]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

dnn_model = Sequential()
dnn_model.add(Dense(128, activation='relu', input_dim=X_train.shape[1]))
dnn_model.add(Dropout(0.2))
dnn_model.add(Dense(64, activation='relu'))
dnn_model.add(Dense(1))
dnn_model.compile(optimizer='adam', loss='mse')
dnn_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose=2)
dnn_predictions = dnn_model.predict(X_test).flatten()
dnn_rmse = np.sqrt(mean_squared_error(y_test, dnn_predictions))
print(f"DNN RMSE: {dnn_rmse}")




Epoch 1/100

51232/51232 - 39s - loss: 0.8914 - val_loss: 0.8439 - 39s/epoch - 761us/step
Epoch 2/100
51232/51232 - 39s - loss: 0.8401 - val_loss: 0.8178 - 39s/epoch - 767us/step
Epoch 3/100
51232/51232 - 40s - loss: 0.8287 - val_loss: 0.8208 - 40s/epoch - 772us/step
Epoch 4/100
51232/51232 - 39s - loss: 0.8236 - val_loss: 0.8083 - 39s/epoch - 760us/step
Epoch 5/100
51232/51232 - 39s - loss: 0.8194 - val_loss: 0.8109 - 39s/epoch - 756us/step
Epoch 6/100
51232/51232 - 39s - loss: 0.8177 - val_loss: 0.8155 - 39s/epoch - 767us/step
Epoch 7/100
51232/51232 - 39s - loss: 0.8166 - val_loss: 0.8129 - 39s/epoch - 764us/step
Epoch 8/100
51232/51232 - 38s - loss: 0.8146 - val_loss: 0.8235 - 38s/epoch - 746us/step
Epoch 9/100
51232/51232 - 38s - loss: 0.8130 - val_loss: 0.8194 - 38s/epoch - 744us/step
Epoch 10/100
51232/51232 - 39s - loss: 0.8119 - val_loss: 0.8214 - 39s/epoch - 763us/step
Epoch 11/100
51232/51232 - 39s - loss: 0.8110 - val_loss: 0.8038 - 39s/epoch - 770us/step
Epoch 12/100
512

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor

# Load and preprocess the dataset
data = pd.read_csv('household_power_consumption.txt', delimiter=';', na_values=['nan', '?'])
data['Datetime'] = pd.to_datetime(data['Date'] + ' ' + data['Time'], dayfirst=True)
data.set_index('Datetime', inplace=True)
data.drop(['Date', 'Time'], axis=1, inplace=True)
data.dropna(inplace=True)
sampled_data = data.sample(frac=0.1, random_state=42)

# Feature engineering
sampled_data['hour'] = sampled_data.index.hour
sampled_data['day_of_week'] = sampled_data.index.dayofweek
sampled_data['month'] = sampled_data.index.month
X_sample = sampled_data[['hour', 'day_of_week', 'month']]
y_sample = sampled_data['Global_active_power']

# Split the data
X_train_sample, X_test_sample, y_train_sample, y_test_sample = train_test_split(X_sample, y_sample, test_size=0.2, random_state=42)

# Model training and evaluation
models = {
    'RandomForest': (RandomForestRegressor(), {'n_estimators': [100, 200], 'max_features': ['auto', 'sqrt'], 'max_depth': [5, 10]}),
    'GradientBoosting': (GradientBoostingRegressor(), {'n_estimators': [100, 200], 'learning_rate': [0.01, 0.1], 'max_depth': [3, 4]}),
    'Ridge': (Ridge(), {'alpha': [1e-3, 1e-2, 1e-1, 1, 10, 100]}),
    'SVR': (SVR(), {'C': [0.1, 1], 'gamma': [1, 0.1], 'kernel': ['rbf', 'poly']}),
    'MLPRegressor': (MLPRegressor(), {'hidden_layer_sizes': [(50,), (100,)], 'activation': ['relu', 'tanh'], 'solver': ['adam'], 'alpha': [0.0001, 0.05]}),
    'XGBoost': (XGBRegressor(), {'n_estimators': [100, 200, 300], 'learning_rate': [0.01, 0.1, 0.2], 'max_depth': [3, 4, 5]})
}

for model_name, (model, params) in models.items():
    if model_name in ['RandomForest', 'SVR', 'MLPRegressor', 'XGBoost']:
        search = RandomizedSearchCV(model, params, n_iter=10, cv=3, scoring='neg_mean_squared_error', n_jobs=-1, random_state=42)
    else:
        search = GridSearchCV(model, params, cv=3, scoring='neg_mean_squared_error')
    
    search.fit(X_train_sample, y_train_sample)
    best_model = search.best_estimator_
    predictions = best_model.predict(X_test_sample)
    rmse = np.sqrt(mean_squared_error(y_test_sample, predictions))
    print(f"{model_name} - Best Params: {search.best_params_}, RMSE: {rmse}")

12 fits failed out of a total of 24.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
9 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\yatish\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\yatish\anaconda3\Lib\site-packages\sklearn\base.py", line 1144, in wrapper
    estimator._validate_params()
  File "C:\Users\yatish\anaconda3\Lib\site-packages\sklearn\base.py", line 637, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\yatish\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParame

RandomForest - Best Params: {'n_estimators': 200, 'max_features': 'sqrt', 'max_depth': 10}, RMSE: 0.8901836707859893
GradientBoosting - Best Params: {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 200}, RMSE: 0.891527470007352
Ridge - Best Params: {'alpha': 100}, RMSE: 1.0182476148908415




In [None]:
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2, interaction_only=True)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

In [None]:
# RandomForest
rf_model = RandomForestRegressor()
rf_params = {'n_estimators': [300, 400], 'max_depth': [20, 25], 'min_samples_split': [4, 6], 'min_samples_leaf': [1, 2]}
rf_search = GridSearchCV(rf_model, rf_params, cv=3, scoring='neg_mean_squared_error')
rf_search.fit(X_train_poly, y_train)
rf_best = rf_search.best_estimator_
rf_predictions = rf_best.predict(X_test_poly)
rf_rmse = np.sqrt(mean_squared_error(y_test, rf_predictions))
print(f"RandomForest RMSE: {rf_rmse}")

In [None]:
# GradientBoosting
gb_model = GradientBoostingRegressor()
gb_params = {'n_estimators': [300, 400], 'learning_rate': [0.05, 0.1], 'max_depth': [5, 6]}
gb_search = GridSearchCV(gb_model, gb_params, cv=3, scoring='neg_mean_squared_error')
gb_search.fit(X_train_poly, y_train)
gb_best = gb_search.best_estimator_
gb_predictions = gb_best.predict(X_test_poly)
gb_rmse = np.sqrt(mean_squared_error(y_test, gb_predictions))
print(f"GradientBoosting RMSE: {gb_rmse}")

In [None]:
# Ridge
ridge_model = Ridge()
ridge_params = {'alpha': [1000, 10000]}
ridge_search = GridSearchCV(ridge_model, ridge_params, cv=3, scoring='neg_mean_squared_error')
ridge_search.fit(X_train_poly, y_train)
ridge_best = ridge_search.best_estimator_
ridge_predictions = ridge_best.predict(X_test_poly)
ridge_rmse = np.sqrt(mean_squared_error(y_test, ridge_predictions))
print(f"Ridge RMSE: {ridge_rmse}")

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

dnn_model = Sequential()
dnn_model.add(Dense(128, activation='relu', input_dim=X_train_poly.shape[1]))
dnn_model.add(Dropout(0.2))
dnn_model.add(Dense(64, activation='relu'))
dnn_model.add(Dense(1))
dnn_model.compile(optimizer='adam', loss='mean_squared_error')
dnn_model.fit(X_train_poly, y_train, epochs=100, batch_size=32, validation_split=0.2)

dnn_predictions = dnn_model.predict(X_test_poly).flatten()
dnn_rmse = np.sqrt(mean_squared_error(y_test, dnn_predictions))
print(f"DNN RMSE: {dnn_rmse}")