In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import svm, metrics

In [2]:
tors = pd.read_csv('./tordays_chasealley.csv')
slp_pcs = pd.read_csv('./slp_pcs_latemay_1981-2018.csv')
h5_pcs = pd.read_csv('./500mb_pcs_latemay_1981-2018.csv')

all_pcs = slp_pcs.merge(h5_pcs, on=['date', 'mode'], how='inner', suffixes=['_slp', '_h5'])

In [3]:
tor_pcs = all_pcs.merge(tors, on='date', how='inner')
tor_pcs['tor'] = tor_pcs['tor'].astype(int)

In [4]:
tor_pcs.head()

Unnamed: 0,date,mode,pcs_slp,pcs_h5,tor
0,1985-05-17,0,-2875.503,636.5202,0
1,1985-05-17,1,-8066.7505,27.6029,0
2,1985-05-17,2,837.58417,-215.67738,0
3,1985-05-17,3,-1029.7639,-231.10164,0
4,1985-05-17,4,-214.46526,-188.53261,0


In [24]:
def svm_classify(nmodes_slp, nmodes_h5, kernel):
    slp_feats = tor_pcs[tor_pcs['mode'] < nmodes_slp]['pcs_slp'].values.reshape(-1, nmodes_slp)
    h5_feats = tor_pcs[tor_pcs['mode'] < nmodes_h5]['pcs_h5'].values.reshape(-1, nmodes_h5)
    features = np.hstack([slp_feats, h5_feats])
    targets = tor_pcs[tor_pcs['mode'] == 0].tor.values
    assert features.shape[0] == targets.shape[0]
    
    X_train, X_test, y_train, y_test = train_test_split(features, targets)
    print('Training data and target sizes: \n{}, {}'.format(X_train.shape,y_train.shape))
    print('Test data and target sizes: \n{}, {}'.format(X_test.shape,y_test.shape))
    
    classifier = svm.SVC(kernel=kernel)
    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    return classifier, y_test, y_pred

In [49]:
classifier, y_test, y_pred = svm_classify(4, 2, 'sigmoid')

Training data and target sizes: 
(382, 6), (382,)
Test data and target sizes: 
(128, 6), (128,)




In [50]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.76      0.63      0.69        89
           1       0.39      0.54      0.45        39

   micro avg       0.60      0.60      0.60       128
   macro avg       0.57      0.58      0.57       128
weighted avg       0.64      0.60      0.62       128



In [51]:
y_pred, y_test

(array([1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0,
        0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0,
        0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1,
        0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1,
        1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0]),
 array([0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0,
        0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
        1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0,
        0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
        1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1,
        1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]))

In [52]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred))

Confusion matrix:
[[56 33]
 [18 21]]
