In [17]:
from itertools import product
import pickle

import numpy as np

from algorithms.linear import LinearRegression
from algorithms.preprocessing import load_and_preprocess_dataset
import config

In [18]:
ridge_logs_dir = config.LOGS_DIR / 'ridge'
ridge_logs_dir.mkdir(exist_ok=True)

In [19]:
X, y, X_test, y_test = load_and_preprocess_dataset()

In [20]:
X = np.asarray(X)
y = np.asarray(y)
X_test = np.asarray(X_test)
y_test = np.asarray(y_test)

In [21]:
alphas = [1e-2, 5e-3, 1e-3, 5e-4]

In [22]:
lambdas = [1e-2, 1e-3, 1e-4, 0]

In [23]:
batch_sizes = [32, 64]

In [24]:
indices = np.random.permutation(len(y))
fold_size = len(indices) // 5
scores = []
for alpha, lambda_, batch_size in product(alphas, lambdas, batch_sizes):
    model = LinearRegression(alpha, lambda_)
    fold_scores = []
    for fold in range(5):
        valid_indices = indices[fold * fold_size: (fold + 1) * fold_size]
        train_indices = indices[~np.isin(indices, valid_indices)]

        X_train, y_train = X[train_indices], y[train_indices]
        X_valid, y_valid = X[valid_indices], y[valid_indices]

        history = model.fit(X_train, y_train, X_valid, y_valid, epochs=500,
                            batch_size=batch_size, cold_start=True,
                            patience=50, min_delta=1e-3)
        fold_scores.append(history)
    with open(ridge_logs_dir / f'alpha_{alpha}-lambda_{lambda_}-batch_size_{batch_size}.pkl', 'wb') as f:
        pickle.dump(fold_scores, f)

alpha=0.01, lambda=0.01, batch_size=32:  13%|█▎        | 63/500 [00:05<00:39, 11.14it/s, train_mse=0.1425724, valid_mse=0.1305304]
alpha=0.01, lambda=0.01, batch_size=32:  17%|█▋        | 83/500 [00:07<00:36, 11.34it/s, train_mse=0.1418094, valid_mse=0.1465204]
alpha=0.01, lambda=0.01, batch_size=32: 100%|██████████| 500/500 [00:47<00:00, 10.48it/s, train_mse=0.1434628, valid_mse=0.1246781]
alpha=0.01, lambda=0.01, batch_size=32:  12%|█▏        | 59/500 [00:05<00:44,  9.91it/s, train_mse=0.1374599, valid_mse=0.1491288]
alpha=0.01, lambda=0.01, batch_size=32: 100%|██████████| 500/500 [00:40<00:00, 12.30it/s, train_mse=0.1338729, valid_mse=0.1607328]
alpha=0.01, lambda=0.01, batch_size=64:  15%|█▍        | 73/500 [00:03<00:18, 22.86it/s, train_mse=0.1455581, valid_mse=0.1319622]
alpha=0.01, lambda=0.01, batch_size=64:  16%|█▌        | 80/500 [00:03<00:18, 22.85it/s, train_mse=0.1464755, valid_mse=0.1546311]
alpha=0.01, lambda=0.01, batch_size=64: 100%|██████████| 500/500 [00:22<00:00, 22