In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import cross_validate
import tqdm
import random

In [2]:
from hyperopt.pyll import scope as ho_scope
from hyperopt import fmin, tpe, hp

### Prepare data

In [3]:
from common import get_data, get_freq_data, signal_cyclic_shift, generate_multi_signal

In [4]:
X_origin, y_origin = get_data(data_path='./data/na62_11_pulses.txt')


In [5]:
from sklearn.utils import shuffle

def prepare_data(X_origin, y_origin, alpha_range, data_size=1000, to_print=False):    
    X = []
    y = []
    for i in range(data_size):
        alpha = random.choice(alpha_range)
#         tau = random.choice(tau_range)
        signal = generate_multi_signal(X_origin, y_origin, alpha)
        
        
        X.append(signal['multi_impulse'])
        y.append(signal['multi_ref_time']) 
   
    X = np.array(X)
    y = np.array(y)
    
    if to_print:
        print("X shape:", X.shape)
        print("y shape:", y.shape)
    
    X, y = shuffle(X, y)

#     plt.scatter(alpha_values, tau_values)
#     plt.show()
    
    return X, y

In [6]:
# alpha_range = np.array([np.around(10**i, decimals=4) for i in np.arange(0, 3.1, 0.01)])
alpha_range = np.array([np.around(10**i, decimals=4) for i in np.arange(-3, 3.1, 0.1)])
# tau_range = np.arange(-25, 25, 1)

In [7]:
X, y = prepare_data(X_origin, y_origin, alpha_range, data_size=5000, to_print=True)

X shape: (5000, 1024)
y shape: (5000,)


### Hyperopt

In [8]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import HuberRegressor
from sklearn import linear_model
from sklearn import tree
from sklearn import svm
from sklearn.linear_model import PassiveAggressiveRegressor
import xgboost as xgb

In [9]:
freq_values = [5, 10, 15, 25, 50, 60, 75, 85, 100]

In [10]:
def print_results(iter_num=200):
    global X_freq
    for freq in freq_values:
        X_freq = get_freq_data(X, freq=freq)

        best = fmin(
            fn=f,  # "Loss" function to minimize
            space=space,  # Hyperparameter space
            algo=tpe.suggest,  # Tree-structured Parzen Estimator (TPE)
            max_evals=iter_num  # Perform 100 trials
        )

        print('-----------------------------------------------------')
        print("Freq:", freq)
        print("X_freq shape:", X_freq.shape)
        print("Found minimum after %d trials:" %(iter_num))
        print(best)
        print('-----------------------------------------------------')

##### Huber

In [11]:
def f(space):
    huber_reg = HuberRegressor(epsilon=space['epsilon'], max_iter=space['max_iter'], alpha=space['alpha'])
    scores = cross_validate(huber_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'epsilon':  hp.loguniform('epsilon', low=np.log(1.1), high=np.log(10)),
    'max_iter': ho_scope.int(hp.quniform('max_iter', low=100, high=500, q=10)),
    'alpha':  hp.loguniform('alpha', low=np.log(0.0001), high=np.log(0.01)),
}

global X_freq
print_results()

100%|██████████| 200/200 [43:55<00:00, 13.18s/it, best loss: 1.5919258545441557]
-----------------------------------------------------
Freq: 5
X_freq shape: (5000, 205)
Found minimum after 200 trials:
{'alpha': 0.00912142074579725, 'epsilon': 1.1081592088422072, 'max_iter': 470.0}
-----------------------------------------------------
 52%|█████▎    | 105/200 [12:06<11:14,  7.10s/it, best loss: 1.5993742393185015]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



100%|██████████| 200/200 [25:22<00:00,  7.61s/it, best loss: 1.599253984928063] 
-----------------------------------------------------
Freq: 10
X_freq shape: (5000, 103)
Found minimum after 200 trials:
{'alpha': 0.006858818174233596, 'epsilon': 1.1863802454513954, 'max_iter': 460.0}
-----------------------------------------------------
  1%|          | 2/200 [00:11<19:18,  5.85s/it, best loss: 1.6365269538197758]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 85%|████████▌ | 170/200 [18:41<03:39,  7.32s/it, best loss: 1.621131216712768]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



100%|██████████| 200/200 [21:37<00:00,  6.49s/it, best loss: 1.6210347373794078]
-----------------------------------------------------
Freq: 15
X_freq shape: (5000, 68)
Found minimum after 200 trials:
{'alpha': 0.004011205523676483, 'epsilon': 1.21303198141006, 'max_iter': 460.0}
-----------------------------------------------------
  1%|          | 2/200 [00:06<09:22,  2.84s/it, best loss: 2.0974814324088804]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



  3%|▎         | 6/200 [00:13<06:29,  2.01s/it, best loss: 2.0974814324088804]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss /

  4%|▍         | 8/200 [00:17<06:32,  2.04s/it, best loss: 2.0974814324088804]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



  8%|▊         | 16/200 [00:38<07:52,  2.57s/it, best loss: 2.0938887950381124]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 20%|██        | 40/200 [02:06<08:03,  3.02s/it, best loss: 2.090855692910952] 

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 59%|█████▉    | 118/200 [06:36<03:15,  2.38s/it, best loss: 2.0907924806558893]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



100%|██████████| 200/200 [11:34<00:00,  3.47s/it, best loss: 2.0906323822573647]
-----------------------------------------------------
Freq: 25
X_freq shape: (5000, 41)
Found minimum after 200 trials:
{'alpha': 0.0003341593215463922, 'epsilon': 1.2140869644274337, 'max_iter': 480.0}
-----------------------------------------------------
  2%|▏         | 3/200 [00:03<03:34,  1.09s/it, best loss: 3.31045718364381]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



  3%|▎         | 6/200 [00:08<04:31,  1.40s/it, best loss: 3.31045718364381]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



  6%|▌         | 12/200 [00:16<03:59,  1.27s/it, best loss: 3.3009152711818026]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss /

  7%|▋         | 14/200 [00:19<04:31,  1.46s/it, best loss: 3.3009152711818026]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 10%|█         | 21/200 [00:36<07:15,  2.43s/it, best loss: 3.2979658143378487]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss /

 22%|██▏       | 43/200 [01:35<07:39,  2.93s/it, best loss: 3.295103929833368] 

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss /

 23%|██▎       | 46/200 [01:42<06:35,  2.57s/it, best loss: 3.295103929833368]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 28%|██▊       | 56/200 [02:03<04:59,  2.08s/it, best loss: 3.295103929833368]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 48%|████▊     | 95/200 [04:08<05:20,  3.05s/it, best loss: 3.295103929833368]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss /

 50%|█████     | 101/200 [04:29<05:19,  3.23s/it, best loss: 3.295103929833368]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 54%|█████▎    | 107/200 [04:45<04:31,  2.92s/it, best loss: 3.295103929833368]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 64%|██████▍   | 129/200 [05:59<03:29,  2.94s/it, best loss: 3.295103929833368]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 66%|██████▋   | 133/200 [06:14<04:07,  3.69s/it, best loss: 3.295103929833368]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 72%|███████▏  | 144/200 [06:45<02:08,  2.30s/it, best loss: 3.295103929833368]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss /

 91%|█████████ | 182/200 [09:00<01:03,  3.53s/it, best loss: 3.295103929833368]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 98%|█████████▊| 195/200 [09:44<00:17,  3.59s/it, best loss: 3.295103929833368]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss /

100%|██████████| 200/200 [10:02<00:00,  3.01s/it, best loss: 3.295103929833368]
-----------------------------------------------------
Freq: 50
X_freq shape: (5000, 20)
Found minimum after 200 trials:
{'alpha': 0.0005559508846019253, 'epsilon': 1.2527473622212195, 'max_iter': 310.0}
-----------------------------------------------------
  4%|▍         | 8/200 [00:22<10:16,  3.21s/it, best loss: 4.262333412924745]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma



 23%|██▎       | 46/200 [02:05<06:05,  2.37s/it, best loss: 4.258200016549723]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss /

 48%|████▊     | 95/200 [04:59<07:49,  4.47s/it, best loss: 4.257802073856]   

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss /

 69%|██████▉   | 138/200 [07:34<04:28,  4.34s/it, best loss: 4.257802073856]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss /

100%|██████████| 200/200 [11:01<00:00,  3.31s/it, best loss: 4.257802073856]
-----------------------------------------------------
Freq: 60
X_freq shape: (5000, 17)
Found minimum after 200 trials:
{'alpha': 0.00019544278281848515, 'epsilon': 1.1025490955229473, 'max_iter': 450.0}
-----------------------------------------------------
 16%|█▋        | 33/200 [01:10<05:31,  1.99s/it, best loss: 4.868996406691613]

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss / sigma

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))

  grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma

  squared_loss = weighted_loss /

100%|██████████| 200/200 [07:31<00:00,  2.26s/it, best loss: 4.868513377565006]
-----------------------------------------------------
Freq: 75
X_freq shape: (5000, 14)
Found minimum after 200 trials:
{'alpha': 0.0022190692404681816, 'epsilon': 1.2933594993283217, 'max_iter': 370.0}
-----------------------------------------------------
100%|██████████| 200/200 [08:57<00:00,  2.69s/it, best loss: 5.005521380102399]
-----------------------------------------------------
Freq: 85
X_freq shape: (5000, 13)
Found minimum after 200 trials:
{'alpha': 0.00010798563644535371, 'epsilon': 1.1001365722700842, 'max_iter': 430.0}
-----------------------------------------------------
100%|██████████| 200/200 [06:13<00:00,  1.87s/it, best loss: 5.739722107203372]
-----------------------------------------------------
Freq: 100
X_freq shape: (5000, 10)
Found minimum after 200 trials:
{'alpha': 0.0005142384664916086, 'epsilon': 1.1475522051981037, 'max_iter': 250.0}
-----------------------------------------

#### Ridge

In [12]:
def f(space):
    ridge_reg = linear_model.Ridge(solver=space['solver'], max_iter=space['max_iter'], alpha=space['alpha'])
    scores = cross_validate(ridge_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'solver': hp.choice('solver', ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']),
    'max_iter': ho_scope.int(hp.quniform('max_iter', low=1000, high=5000, q=100)),
    'alpha':  hp.loguniform('alpha', low=np.log(0.0001), high=np.log(1)),
}

print_results()

100%|██████████| 200/200 [07:17<00:00,  2.19s/it, best loss: 1.6426919770132027]
-----------------------------------------------------
Freq: 5
X_freq shape: (5000, 205)
Found minimum after 200 trials:
{'alpha': 0.9996376002877478, 'max_iter': 3800.0, 'solver': 4}
-----------------------------------------------------
100%|██████████| 200/200 [06:29<00:00,  1.95s/it, best loss: 1.6430855049007653]
-----------------------------------------------------
Freq: 10
X_freq shape: (5000, 103)
Found minimum after 200 trials:
{'alpha': 0.9996724005595059, 'max_iter': 3000.0, 'solver': 6}
-----------------------------------------------------
100%|██████████| 200/200 [01:24<00:00,  2.37it/s, best loss: 1.6649564081613932]
-----------------------------------------------------
Freq: 15
X_freq shape: (5000, 68)
Found minimum after 200 trials:
{'alpha': 0.7221950563325338, 'max_iter': 2700.0, 'solver': 3}
-----------------------------------------------------
100%|██████████| 200/200 [05:47<00:00,  1.74s

#### Lasso

In [13]:
import warnings
warnings.filterwarnings("ignore")

In [14]:
def f(space):
    lasso_reg = linear_model.Lasso(max_iter=space['max_iter'], alpha=space['alpha'], normalize=space['normalize'])
    scores = cross_validate(lasso_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'normalize': hp.choice('normalize', [True, False]),
    'max_iter': ho_scope.int(hp.quniform('max_iter', low=1000, high=5000, q=100)),
    'alpha':  hp.loguniform('alpha', low=np.log(0.0001), high=np.log(1)),
}

print_results()

100%|██████████| 200/200 [08:31<00:00,  2.56s/it, best loss: 1.6288753482626426]
-----------------------------------------------------
Freq: 5
X_freq shape: (5000, 205)
Found minimum after 200 trials:
{'alpha': 0.0030460061414800186, 'max_iter': 2900.0, 'normalize': 0}
-----------------------------------------------------
100%|██████████| 200/200 [00:43<00:00,  4.59it/s, best loss: 1.629510835905562]
-----------------------------------------------------
Freq: 10
X_freq shape: (5000, 103)
Found minimum after 200 trials:
{'alpha': 0.0028671464051386562, 'max_iter': 2300.0, 'normalize': 0}
-----------------------------------------------------
100%|██████████| 200/200 [00:28<00:00,  7.09it/s, best loss: 1.656825685703484]
-----------------------------------------------------
Freq: 15
X_freq shape: (5000, 68)
Found minimum after 200 trials:
{'alpha': 0.0011438088930735243, 'max_iter': 3300.0, 'normalize': 0}
-----------------------------------------------------
100%|██████████| 200/200 [00:

#### Decision tree

In [15]:
def f(space):
    dt_reg = tree.DecisionTreeRegressor(max_depth=space['max_depth'], min_samples_split=space['min_samples_split'],
                                       min_samples_leaf=space['min_samples_leaf'], min_weight_fraction_leaf=
                                        space['min_weight_fraction_leaf'], max_features=space['max_features'])
    scores = cross_validate(dt_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'max_depth':  ho_scope.int(hp.quniform('max_iter', low=4, high=100, q=2)),
    'min_samples_split': ho_scope.int(hp.quniform('min_samples_split', low=2, high=10, q=1)),
    'min_samples_leaf':  ho_scope.int(hp.quniform('min_samples_leaf', low=1, high=10, q=1)),
    'min_weight_fraction_leaf': hp.uniform('min_weight_fraction_leaf', 0, 0.5),
    'max_features': hp.choice('max_features', ['auto', 'sqrt', 'log2'])
}

print_results()

100%|██████████| 200/200 [02:49<00:00,  1.18it/s, best loss: 0.833199216349206] 
-----------------------------------------------------
Freq: 5
X_freq shape: (5000, 205)
Found minimum after 200 trials:
{'max_features': 0, 'max_iter': 34.0, 'min_samples_leaf': 5.0, 'min_samples_split': 6.0, 'min_weight_fraction_leaf': 0.0006328265882642114}
-----------------------------------------------------
100%|██████████| 200/200 [01:37<00:00,  2.06it/s, best loss: 0.7914712000000002]
-----------------------------------------------------
Freq: 10
X_freq shape: (5000, 103)
Found minimum after 200 trials:
{'max_features': 0, 'max_iter': 56.0, 'min_samples_leaf': 1.0, 'min_samples_split': 2.0, 'min_weight_fraction_leaf': 0.00012299118822591242}
-----------------------------------------------------
100%|██████████| 200/200 [01:04<00:00,  3.12it/s, best loss: 0.9789725666666668]
-----------------------------------------------------
Freq: 15
X_freq shape: (5000, 68)
Found minimum after 200 trials:
{'max_f

#### Passive aggresive

In [16]:
def f(space):
    pa_reg = PassiveAggressiveRegressor(max_iter=space['max_iter'], tol=space['tol'], 
                                       C = space['C'])
    scores = cross_validate(pa_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'max_iter': ho_scope.int(hp.quniform('max_iter', low=1000, high=5000, q=100)),
    'tol': hp.loguniform('tol', low=np.log(0.000001), high=np.log(0.001)),
    'verbose': ho_scope.int(hp.quniform('verbose', low=1, high=100, q=2)),
    'C':  hp.loguniform('c', low=np.log(0.0001), high=np.log(10)),
}

print_results()

100%|██████████| 200/200 [06:24<00:00,  1.92s/it, best loss: 1.8025222457288126]
-----------------------------------------------------
Freq: 5
X_freq shape: (5000, 205)
Found minimum after 200 trials:
{'c': 0.06150486923618484, 'max_iter': 1100.0, 'tol': 0.0002940125570467737, 'verbose': 62.0}
-----------------------------------------------------
100%|██████████| 200/200 [04:39<00:00,  1.40s/it, best loss: 1.6786784907816734]
-----------------------------------------------------
Freq: 10
X_freq shape: (5000, 103)
Found minimum after 200 trials:
{'c': 0.08123880390241131, 'max_iter': 3900.0, 'tol': 1.4188516665807768e-05, 'verbose': 54.0}
-----------------------------------------------------
100%|██████████| 200/200 [04:28<00:00,  1.34s/it, best loss: 1.6727711345764547]
-----------------------------------------------------
Freq: 15
X_freq shape: (5000, 68)
Found minimum after 200 trials:
{'c': 0.04527430471433716, 'max_iter': 1600.0, 'tol': 2.2193316877624533e-06, 'verbose': 98.0}
----

#### Xgboost

In [17]:
def f(space):
    xgb_reg = xgb.XGBRegressor(objective="reg:linear", booster=space['booster'], eta=space['eta'], 
                               gamma=space['gamma'], max_depth=space['max_depth'], reg_lambda=space['lambda'],
                               alpha=space['alpha'], verbosity=0)
    scores = cross_validate(xgb_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'booster': hp.choice('booster', ['gbtree', 'gblinear', 'dart']),
    'eta': hp.loguniform('eta', low=np.log(0.001), high=np.log(1)),
    'gamma': hp.loguniform('gamma', low=np.log(0.001), high=np.log(100)),
    'max_depth': ho_scope.int(hp.quniform('max_depth', low=5, high=50, q=2)),
    'lambda': hp.loguniform('lambda', low=np.log(0.001), high=np.log(10)),
    'alpha':  hp.loguniform('alpha', low=np.log(0.001), high=np.log(10)),
}

print_results(iter_num=100)

100%|██████████| 100/100 [2:41:46<00:00, 97.06s/it, best loss: 0.5684488951660158]  
-----------------------------------------------------
Freq: 5
X_freq shape: (5000, 205)
Found minimum after 100 trials:
{'alpha': 2.22456831562574, 'booster': 0, 'eta': 0.3973668207576586, 'gamma': 0.001076016879970481, 'lambda': 0.003034526844710481, 'max_depth': 30.0}
-----------------------------------------------------
100%|██████████| 100/100 [1:23:55<00:00, 50.35s/it, best loss: 0.6310507911132813]
-----------------------------------------------------
Freq: 10
X_freq shape: (5000, 103)
Found minimum after 100 trials:
{'alpha': 0.0015324479870338621, 'booster': 0, 'eta': 0.3926055790235545, 'gamma': 0.005251357110207934, 'lambda': 0.0019836344587500485, 'max_depth': 18.0}
-----------------------------------------------------
100%|██████████| 100/100 [1:03:18<00:00, 37.98s/it, best loss: 0.7613856952392577]
-----------------------------------------------------
Freq: 15
X_freq shape: (5000, 68)
Foun