In [44]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
import scipy

In [45]:
import ipynb.fs.full.TrainTest as TrainTest

p = 30 # want this to be high
w = 4  # want this to be low
cw = 9 # want this to be high
data = np.load('./data/' + 'p' + str(p) + '_w' + str(w) + '_cw' + str(cw) + '.npy')

x_train, y_train, x_test, y_test = TrainTest.get_train_test(data, 0.8)

In [46]:
svm = SVC()

param_options = {
    'C' : scipy.stats.expon(scale=100),
    'kernel' : ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree' : [2, 3, 4, 5],
    'gamma' : scipy.stats.expon(scale=.1),
    'coef0' : [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    'probability' : [True, False],
    'shrinking' : [True, False],
    'tol' : [1e-3, 1e-4],
#     'cache_size' : [],
    'class_weight' : [None, 'balanced'],
#     'verbose' : [],
#     'max_iter' : [],
#     'decision_function_shape' : []
#     'random_state' : []
}

svm_random = RandomizedSearchCV(estimator = svm, param_distributions = param_options, 
    n_iter = 100, cv = 3, verbose=1, random_state=42, n_jobs = -1)

svm_random.fit(x_train, y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done 233 tasks      | elapsed:   20.6s
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:   36.7s finished


RandomizedSearchCV(cv=3, error_score='raise',
          estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          fit_params=None, iid=True, n_iter=100, n_jobs=-1,
          param_distributions={'probability': [True, False], 'shrinking': [True, False], 'degree': [2, 3, 4, 5], 'coef0': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], 'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f3148448eb8>, 'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f3148448748>, 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'class_weight': [None, 'balanced'], 'tol': [0.001, 0.0001]},
          pre_dispatch='2*n_jobs', random_state=42, refit=True,
          return_train_score='warn', scoring=None, verbose=1)

In [47]:
best = svm_random.best_params_
best

{'C': 47.234776158326284,
 'class_weight': None,
 'coef0': 0.1,
 'degree': 5,
 'gamma': 0.42189593008315573,
 'kernel': 'rbf',
 'probability': True,
 'shrinking': False,
 'tol': 0.001}

In [48]:
from sklearn.metrics import accuracy_score, precision_score, f1_score

svm = SVC(**best)

avg_acc = 0
avg_prec = 0
avg_f1 = 0
num = 20
for i in range(num):
    svm.fit(x_train, y_train)
    
    y_pred = svm.predict(x_test)
    
    avg_acc += accuracy_score(y_test, y_pred)
    avg_prec += precision_score(y_test, y_pred)
    avg_f1 += f1_score(y_test, y_pred)

avg_acc /= num
avg_prec /= num
avg_f1 /= num

print('Average accuracy:  ' + str(avg_acc))
print('Average precision: ' + str(avg_prec))
print('Average F1 Score:  ' + str(avg_f1))

Average accuracy:  0.6734177215189874
Average precision: 0.7468354430379749
Average F1 Score:  0.6465753424657537
