In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_validate
import tqdm
import random

### Preprocessing data

In [2]:
from common import get_data, get_freq_data, signal_cyclic_shift, generate_multi_signal

In [4]:
X_origin, y_origin = get_data(data_path='./data/na62_11_pulses.txt')

In [8]:
def prepare_data(X_origin, y_origin, alpha_range, data_size=1000, to_print=False):
    pos_size = int(data_size/2)
    neg_size = data_size - pos_size
    
    X = []
    y = []
    for i in range(data_size):
        alpha = random.choice(alpha_range)
        
        if i < pos_size:
            X.append(random.choice(X_one_signal))
#             X.append(generate_multi_signal(X_origin, y_origin, tau, alpha)['first_impulse'])

            y.append(1)
        else:
            X.append(generate_multi_signal(X_origin, y_origin, alpha)['multi_impulse'])
            y.append(0)
   
    X = np.array(X)
    y = np.array(y)
   
    if to_print:
#         print("X positive shape:", X_positive.shape)
#         print("y positive shape:", y_positive.shape)
#         print("X negative shape:", X_negative.shape)
#         print("y negative shape:", y_negative.shape)
        print("X shape:", X.shape)
        print("y shape:", y.shape)
    
    X, y = shuffle(X, y)

    return X, y

In [9]:
alpha_range = np.array([np.around(10**i, decimals=4) for i in np.arange(-3, 3.1, 0.1)])
# tau_range = np.arange(-25, 25, 1)
X, y = prepare_data(X_origin, y_origin, alpha_range, data_size=5000, to_print=True)

X shape: (5000, 1024)
y shape: (5000,)


NameError: name 'shuffle' is not defined

In [None]:
plt.plot(X[0])

In [None]:
len(X)

In [None]:
import random
i = random.choice(range(len(X)))
plt.title(str(y[i]))
plt.plot(X[i])
plt.show()

### Models

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.neural_network import MLPClassifier

In [None]:
freq_values = [1, 5, 10, 15, 25, 50, 60, 75, 85, 100]

In [None]:
def print_results(iter_num=200):
    global X_freq
    for freq in freq_values:
        X_freq = get_freq_data(X, freq=freq)

        best = fmin(
            fn=f,  # "Loss" function to minimize
            space=space,  # Hyperparameter space
            algo=tpe.suggest,  # Tree-structured Parzen Estimator (TPE)
            max_evals=iter_num  # Perform 100 trials
        )

        print('-----------------------------------------------------')
        print("Freq:", freq)
        print("X_freq shape:", X_freq.shape)
        print("Found minimum after %d trials:" %(iter_num))
        print(best)
        print('-----------------------------------------------------')

### Hyperopt

In [None]:
from hyperopt.pyll import scope as ho_scope
from hyperopt import fmin, tpe, hp

###### KNeighborsClassifier

In [None]:
def f(space):
        model = KNeighborsClassifier(n_neighbors=space['n_neighbors'], weights=space['weights'],\
                                     algorithm=space['algorithm'], leaf_size=space['leaf_size'], p=space['p'])
        scores = cross_validate(model, X_freq, y, scoring='accuracy', cv=5)
        return -scores['test_score'].mean()

space = {
        'n_neighbors': ho_scope.int(hp.quniform('n_neighbors', low=2, high=10, q=1)),
        'weights':  hp.choice('weights', ['uniform', 'distance']),
        'algorithm': hp.choice('algorithm', ['auto', 'ball_tree', 'kd_tree', 'brute']),
        'leaf_size': ho_scope.int(hp.quniform('leaf_size', low=4, high=60, q=2)),
        'p': hp.choice('p', [1, 2])
}
    
global X_freq
print_results(iter_num=50)

##### DecisionTreeClassifier

In [None]:
def f(space):
    model = DecisionTreeClassifier(max_depth=space['max_depth'], max_features=space['max_features'],\
                                  criterion=space['criterion'], min_samples_split=space['min_samples_split'],\
                                  min_samples_leaf=space['min_samples_leaf'], min_weight_fraction_leaf = space['min_weight_fraction_leaf'])
    scores = cross_validate(model, X_freq, y, scoring='accuracy', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'max_depth': hp.choice('max_depth', range(1,20)),
    'max_features': hp.choice('max_features', ['auto', 'sqrt', 'log2']),
    'criterion': hp.choice('criterion', ["gini", "entropy"]),
    'min_samples_split': ho_scope.int(hp.quniform('min_samples_split', low=2, high=10, q=1)),
    'min_samples_leaf':  ho_scope.int(hp.quniform('min_samples_leaf', low=1, high=10, q=1)),
    'min_weight_fraction_leaf': hp.uniform('min_weight_fraction_leaf', 0, 0.5),
}

global X_freq
print_results()

##### RandomForest

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
def f(space):
    model = RandomForestClassifier(max_depth=space['max_depth'], max_features=space['max_features'],\
                                  criterion=space['criterion'], min_samples_split=space['min_samples_split'],\
                                  min_samples_leaf=space['min_samples_leaf'], min_weight_fraction_leaf = space['min_weight_fraction_leaf'],\
                                  verbose=0)
    scores = cross_validate(model, X_freq, y, scoring='accuracy', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'max_depth': hp.choice('max_depth', range(1,20)),
    'max_features': hp.choice('max_features', ['auto', 'sqrt', 'log2']),
    'criterion': hp.choice('criterion', ["gini", "entropy"]),
    'min_samples_split': ho_scope.int(hp.quniform('min_samples_split', low=2, high=10, q=1)),
    'min_samples_leaf':  ho_scope.int(hp.quniform('min_samples_leaf', low=1, high=10, q=1)),
    'min_weight_fraction_leaf': hp.uniform('min_weight_fraction_leaf', 0, 0.5),
}

global X_freq
print_results()

##### GaussianNB

In [17]:
def f(space):
    model = GaussianNB(var_smoothing=space['var_smoothing'])
    scores = cross_validate(model, X_freq, y, scoring='accuracy', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'var_smoothing': hp.loguniform('var_smoothing', low=np.log(1e-10), high=np.log(1e-7))
}

global X_freq
print_results()

100%|██████████| 200/200 [01:21<00:00,  2.47it/s, best loss: -0.5478000000000001]
-----------------------------------------------------
Freq: 1
X_freq shape: (5000, 1024)
Found minimum after 200 trials:
{'var_smoothing': 3.3508990415658856e-08}
-----------------------------------------------------
100%|██████████| 200/200 [00:11<00:00, 17.17it/s, best loss: -0.5466]
-----------------------------------------------------
Freq: 5
X_freq shape: (5000, 204)
Found minimum after 200 trials:
{'var_smoothing': 9.475946253489403e-08}
-----------------------------------------------------
100%|██████████| 200/200 [00:06<00:00, 29.01it/s, best loss: -0.5450000000000002]
-----------------------------------------------------
Freq: 10
X_freq shape: (5000, 102)
Found minimum after 200 trials:
{'var_smoothing': 3.048234025106439e-08}
-----------------------------------------------------
100%|██████████| 200/200 [00:05<00:00, 35.34it/s, best loss: -0.5422]
------------------------------------------------

##### SVC

In [None]:
def f(space):
    model = SVC(C=space['C'], kernel=space['kernel'], degree=space['degree'], gamma=space['gamma'],\
               shrinking=space['shrinking'])
    scores = cross_validate(model, X_freq, y, scoring='accuracy', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'C': hp.loguniform('C', low=np.log(0.01), high=np.log(1)),
    'kernel': hp.choice('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
    'degree':  ho_scope.int(hp.quniform('degree', low=2, high=5, q=1)),
    'gamma':  hp.loguniform('gamma', low=np.log(0.001), high=np.log(100)),
    'shrinking': hp.choice('shrinking', [True, False])
}

global X_freq
print_results(iter_num=50)

 20%|██        | 10/50 [1:11:07<3:06:05, 279.13s/it, best loss: -0.62]           

##### PassiveAggressiveClassifier

In [13]:
def f(space):
    model = PassiveAggressiveClassifier(C=space['C'], fit_intercept=space['fit_intercept'])
    scores = cross_validate(model, X_freq, y, scoring='accuracy', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'C': hp.loguniform('C', low=np.log(0.01), high=np.log(1)),
    'fit_intercept': hp.choice('fit_intercept', [True, False]),
}

global X_freq
print_results()

100%|██████████| 200/200 [02:30<00:00,  1.33it/s, best loss: -0.5146]
-----------------------------------------------------
Freq: 1
X_freq shape: (5000, 1024)
Found minimum after 200 trials:
{'C': 0.042531592126115486, 'fit_intercept': 0}
-----------------------------------------------------
100%|██████████| 200/200 [00:39<00:00,  5.11it/s, best loss: -0.5426]           
-----------------------------------------------------
Freq: 5
X_freq shape: (5000, 204)
Found minimum after 200 trials:
{'C': 0.07157846241623136, 'fit_intercept': 0}
-----------------------------------------------------
100%|██████████| 200/200 [00:41<00:00,  4.80it/s, best loss: -0.5527999999999998]
-----------------------------------------------------
Freq: 10
X_freq shape: (5000, 102)
Found minimum after 200 trials:
{'C': 0.1591828678137072, 'fit_intercept': 0}
-----------------------------------------------------
100%|██████████| 200/200 [00:19<00:00, 10.13it/s, best loss: -0.5484]           
---------------------

#####  MLPClassifier

In [None]:
def f(space):
    model = MLPClassifier(hidden_layer_sizes=(space['first_size'], space['second_size']), 
                          activation=space['activation'], batch_size=16,
                         max_iter=space['max_iter'], learning_rate=space['learning_rate'],\
                         learning_rate_init=space['learning_rate_init'])
    scores = cross_validate(model, X_freq, y, scoring='accuracy', cv=5)
    return -scores['test_score'].mean()
    
space = {
    'first_size':  ho_scope.int(hp.quniform('first_size', low=50, high=100, q=10)),
    'second_size':  ho_scope.int(hp.quniform('second_size', low=10, high=40, q=5)),
    'activation': hp.choice('activation', ['identity', 'logistic', 'tanh', 'relu']),
    'learning_rate': hp.choice('learning_rate', ['constant', 'invscaling', 'adaptive']),
    'learning_rate_init': hp.loguniform('learning_rate_init', low=np.log(0.00001), high=np.log(0.1)),
    'max_iter': ho_scope.int(hp.quniform('max_iter', low=300, high=500, q=25))
}

global X_freq
print_results(iter_num=5)

100%|██████████| 5/5 [23:37<00:00, 283.51s/it, best loss: -0.5252000000000001]
-----------------------------------------------------
Freq: 1
X_freq shape: (5000, 1024)
Found minimum after 5 trials:
{'activation': 3, 'first_size': 80.0, 'learning_rate': 2, 'learning_rate_init': 1.6528996498760195e-05, 'max_iter': 300.0, 'second_size': 10.0}
-----------------------------------------------------
100%|██████████| 5/5 [02:45<00:00, 33.16s/it, best loss: -0.5]
-----------------------------------------------------
Freq: 5
X_freq shape: (5000, 204)
Found minimum after 5 trials:
{'activation': 1, 'first_size': 60.0, 'learning_rate': 0, 'learning_rate_init': 0.04216946992168108, 'max_iter': 375.0, 'second_size': 10.0}
-----------------------------------------------------
  0%|          | 0/5 [00:00<?, ?it/s, best loss: ?]