In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import cross_validate
import tqdm
import random

In [2]:
from hyperopt.pyll import scope as ho_scope
from hyperopt import fmin, tpe, hp

### Prepare data

In [3]:
from common import get_data, get_freq_data, signal_cyclic_shift, generate_multi_signal

In [4]:
X_origin, y_origin = get_data()

In [5]:
from sklearn.utils import shuffle

def prepare_data(X_origin, y_origin, tau_range, alpha_range, data_size=1000, to_print=False):    
    X = []
    y = []
    alpha_values = []
    tau_values = []
    for i in range(data_size):
        alpha = random.choice(alpha_range)
        tau = random.choice(tau_range)
        signal = generate_multi_signal(X_origin, y_origin, tau, alpha)['multi_impulse']
        
        X.append(signal)
        y.append(np.log10(alpha))
        
        alpha_values.append(alpha)
        tau_values.append(tau)
   
    X = np.array(X)
    y = np.array(y)
    
    if to_print:
        print("X shape:", X.shape)
        print("y shape:", y.shape)
    
    X, y = shuffle(X, y)

#     plt.scatter(alpha_values, tau_values)
#     plt.show()
    
    return X, y

In [6]:
# alpha_range = np.array([np.around(10**i, decimals=4) for i in np.arange(0, 3.1, 0.01)])
# alpha_range = np.arange(1, 1000, 0.1)
alpha_range = np.array([10**i for i in np.arange(0, 3, 0.05)])
tau_range = np.arange(-25, 25, 1)

In [7]:
X, y = prepare_data(X_origin, y_origin, tau_range, alpha_range, data_size=500, to_print=True)

X shape: (500, 1024)
y shape: (500,)


### Hyperopt

In [8]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import HuberRegressor
from sklearn import linear_model
from sklearn import tree
from sklearn import svm
from sklearn.linear_model import PassiveAggressiveRegressor
import xgboost as xgb
from catboost import CatBoostRegressor

In [9]:
freq_values = [1, 5, 10, 15, 25, 50, 60, 75, 85, 100]

In [10]:
def print_results(iter_num=200):
    global X_freq
    for freq in freq_values:
        X_freq = get_freq_data(X, freq=freq)

        best = fmin(
            fn=f,  # "Loss" function to minimize
            space=space,  # Hyperparameter space
            algo=tpe.suggest,  # Tree-structured Parzen Estimator (TPE)
            max_evals=iter_num  # Perform 100 trials
        )

        print('-----------------------------------------------------')
        print("Freq:", freq)
        print("X_freq shape:", X_freq.shape)
        print("Found minimum after %d trials:" %(iter_num))
        print(best)
        print('-----------------------------------------------------')

##### Huber

In [11]:
def f(space):
    huber_reg = HuberRegressor(epsilon=space['epsilon'], max_iter=space['max_iter'], alpha=space['alpha'])
    scores = cross_validate(huber_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'epsilon':  hp.loguniform('epsilon', low=np.log(1.1), high=np.log(10)),
    'max_iter': ho_scope.int(hp.quniform('max_iter', low=100, high=500, q=10)),
    'alpha':  hp.loguniform('alpha', low=np.log(0.0001), high=np.log(0.01)),
}

global X_freq
print_results()

100%|██████████| 200/200 [09:39<00:00,  2.90s/it, best loss: 0.6001469249942125]
-----------------------------------------------------
Freq: 1
X_freq shape: (500, 1024)
Found minimum after 200 trials:
{'alpha': 0.0008862233649182545, 'epsilon': 3.522881623454345, 'max_iter': 130.0}
-----------------------------------------------------
100%|██████████| 200/200 [04:22<00:00,  1.31s/it, best loss: 0.5928062870246442]
-----------------------------------------------------
Freq: 5
X_freq shape: (500, 204)
Found minimum after 200 trials:
{'alpha': 0.009814954503151637, 'epsilon': 2.8822182873563125, 'max_iter': 100.0}
-----------------------------------------------------
100%|██████████| 200/200 [04:02<00:00,  1.21s/it, best loss: 0.5939124798484652]
-----------------------------------------------------
Freq: 10
X_freq shape: (500, 102)
Found minimum after 200 trials:
{'alpha': 0.009944063913603498, 'epsilon': 3.7685773623301952, 'max_iter': 310.0}
--------------------------------------------

#### Ridge

In [12]:
def f(space):
    ridge_reg = linear_model.Ridge(solver=space['solver'], max_iter=space['max_iter'], alpha=space['alpha'])
    scores = cross_validate(ridge_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'solver': hp.choice('solver', ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']),
    'max_iter': ho_scope.int(hp.quniform('max_iter', low=1000, high=5000, q=100)),
    'alpha':  hp.loguniform('alpha', low=np.log(0.0001), high=np.log(1)),
}

global X_freq
print_results()

100%|██████████| 200/200 [07:06<00:00,  2.13s/it, best loss: 0.5932825667494368]
-----------------------------------------------------
Freq: 1
X_freq shape: (500, 1024)
Found minimum after 200 trials:
{'alpha': 0.17055124503981708, 'max_iter': 4000.0, 'solver': 3}
-----------------------------------------------------
100%|██████████| 200/200 [01:14<00:00,  2.67it/s, best loss: 0.5916921617802513]
-----------------------------------------------------
Freq: 5
X_freq shape: (500, 204)
Found minimum after 200 trials:
{'alpha': 0.035595843222584585, 'max_iter': 1600.0, 'solver': 3}
-----------------------------------------------------
100%|██████████| 200/200 [01:43<00:00,  1.93it/s, best loss: 0.5928987250176619]
-----------------------------------------------------
Freq: 10
X_freq shape: (500, 102)
Found minimum after 200 trials:
{'alpha': 0.013167811954036572, 'max_iter': 3500.0, 'solver': 6}
-----------------------------------------------------
100%|██████████| 200/200 [01:08<00:00,  2.

#### Lasso

In [13]:
import warnings
warnings.filterwarnings("ignore")

In [14]:
def f(space):
    lasso_reg = linear_model.Lasso(max_iter=space['max_iter'], alpha=space['alpha'], normalize=space['normalize'])
    scores = cross_validate(lasso_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'normalize': hp.choice('normalize', [True, False]),
    'max_iter': ho_scope.int(hp.quniform('max_iter', low=1000, high=5000, q=100)),
    'alpha':  hp.loguniform('alpha', low=np.log(0.0001), high=np.log(1)),
}

global X_freq
print_results()

100%|██████████| 200/200 [06:18<00:00,  1.89s/it, best loss: 0.5925920299258463]
-----------------------------------------------------
Freq: 1
X_freq shape: (500, 1024)
Found minimum after 200 trials:
{'alpha': 0.00026565972749035015, 'max_iter': 5000.0, 'normalize': 1}
-----------------------------------------------------
100%|██████████| 200/200 [02:07<00:00,  1.57it/s, best loss: 0.5927638511218996]
-----------------------------------------------------
Freq: 5
X_freq shape: (500, 204)
Found minimum after 200 trials:
{'alpha': 0.000323089195263108, 'max_iter': 4700.0, 'normalize': 1}
-----------------------------------------------------
100%|██████████| 200/200 [01:08<00:00,  2.94it/s, best loss: 0.5938024216629183]
-----------------------------------------------------
Freq: 10
X_freq shape: (500, 102)
Found minimum after 200 trials:
{'alpha': 0.00033959845544862986, 'max_iter': 2100.0, 'normalize': 1}
-----------------------------------------------------
100%|██████████| 200/200 [00

#### Decision tree

In [15]:
def f(space):
    dt_reg = tree.DecisionTreeRegressor(max_depth=space['max_depth'], min_samples_split=space['min_samples_split'],
                                       min_samples_leaf=space['min_samples_leaf'], min_weight_fraction_leaf=
                                        space['min_weight_fraction_leaf'], max_features=space['max_features'])
    scores = cross_validate(dt_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'max_depth':  ho_scope.int(hp.quniform('max_iter', low=4, high=100, q=2)),
    'min_samples_split': ho_scope.int(hp.quniform('min_samples_split', low=2, high=10, q=1)),
    'min_samples_leaf':  ho_scope.int(hp.quniform('min_samples_leaf', low=1, high=10, q=1)),
    'min_weight_fraction_leaf': hp.uniform('min_weight_fraction_leaf', 0, 0.5),
    'max_features': hp.choice('max_features', ['auto', 'sqrt', 'log2'])
}

global X_freq
print_results()

100%|██████████| 200/200 [00:29<00:00,  6.80it/s, best loss: 0.6976033616734909]
-----------------------------------------------------
Freq: 1
X_freq shape: (500, 1024)
Found minimum after 200 trials:
{'max_features': 0, 'max_iter': 38.0, 'min_samples_leaf': 5.0, 'min_samples_split': 10.0, 'min_weight_fraction_leaf': 0.32584947432607825}
-----------------------------------------------------
100%|██████████| 200/200 [00:12<00:00, 16.27it/s, best loss: 0.6974829980289179]
-----------------------------------------------------
Freq: 5
X_freq shape: (500, 204)
Found minimum after 200 trials:
{'max_features': 0, 'max_iter': 62.0, 'min_samples_leaf': 7.0, 'min_samples_split': 7.0, 'min_weight_fraction_leaf': 0.34579871962115427}
-----------------------------------------------------
100%|██████████| 200/200 [00:09<00:00, 21.15it/s, best loss: 0.6974829980289179]
-----------------------------------------------------
Freq: 10
X_freq shape: (500, 102)
Found minimum after 200 trials:
{'max_feature

#### Passive aggresive

In [16]:
def f(space):
    pa_reg = PassiveAggressiveRegressor(max_iter=space['max_iter'], tol=space['max_iter'], 
                                       C = space['C'])
    scores = cross_validate(pa_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'max_iter': ho_scope.int(hp.quniform('max_iter', low=1000, high=5000, q=100)),
    'tol': hp.loguniform('tol', low=np.log(0.000001), high=np.log(0.001)),
    'verbose': ho_scope.int(hp.quniform('verbose', low=1, high=100, q=2)),
    'C':  hp.loguniform('c', low=np.log(0.0001), high=np.log(10)),
}

global X_freq
print_results()

100%|██████████| 200/200 [00:20<00:00,  9.94it/s, best loss: 0.7059047889381511]
-----------------------------------------------------
Freq: 1
X_freq shape: (500, 1024)
Found minimum after 200 trials:
{'c': 0.0005118513041847099, 'max_iter': 3400.0, 'tol': 5.468543150798296e-06, 'verbose': 78.0}
-----------------------------------------------------
100%|██████████| 200/200 [00:10<00:00, 19.99it/s, best loss: 0.7055752831433362]
-----------------------------------------------------
Freq: 5
X_freq shape: (500, 204)
Found minimum after 200 trials:
{'c': 0.001123685263491527, 'max_iter': 2100.0, 'tol': 2.872641304840765e-06, 'verbose': 82.0}
-----------------------------------------------------
100%|██████████| 200/200 [00:08<00:00, 23.77it/s, best loss: 0.7051924669603904]
-----------------------------------------------------
Freq: 10
X_freq shape: (500, 102)
Found minimum after 200 trials:
{'c': 0.0026257015811204975, 'max_iter': 4400.0, 'tol': 1.8183423455195677e-06, 'verbose': 98.0}
--

#### Xgboost

In [None]:
def f(space):
    xgb_reg = xgb.XGBRegressor(objective="reg:linear", booster=space['booster'], eta=space['eta'], 
                               gamma=space['gamma'], max_depth=space['max_depth'], reg_lambda=space['lambda'],
                               alpha=space['alpha'], verbosity=0)
    scores = cross_validate(xgb_reg, X_freq, y, scoring='neg_mean_absolute_error', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'booster': hp.choice('booster', ['gbtree', 'gblinear', 'dart']),
    'eta': hp.loguniform('eta', low=np.log(0.001), high=np.log(1)),
    'gamma': hp.loguniform('gamma', low=np.log(0.001), high=np.log(100)),
    'max_depth': ho_scope.int(hp.quniform('max_depth', low=5, high=50, q=2)),
    'lambda': hp.loguniform('lambda', low=np.log(0.001), high=np.log(10)),
    'alpha':  hp.loguniform('alpha', low=np.log(0.001), high=np.log(10)),
}

global X_freq
print_results(iter_num=100)

 65%|██████▌   | 65/100 [45:38<25:30, 43.74s/it, best loss: 0.6649719057321549]  