In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import svm, metrics

In [40]:
tors = pd.read_csv('./tordays_chasealley.csv', parse_dates=['date'])
tors['tor'] = tors['tor'].astype(int)
tors['date'] = tors['date'] + pd.Timedelta('1 day')

slp_pcs = pd.read_csv('./slp_pcs_latemay_1981-2018.csv', parse_dates=['date'])
h5_pcs = pd.read_csv('./500mb_pcs_latemay_1981-2018.csv', parse_dates=['date'])

all_pcs = slp_pcs.merge(h5_pcs, on=['date', 'mode'], how='inner', suffixes=['_slp', '_h5'])
tor_pcs = all_pcs.merge(tors, on='date', how='inner')

In [41]:
tor_pcs = all_pcs.merge(tors, on='date', how='inner')
tor_pcs['tor'] = tor_pcs['tor'].astype(int)

In [42]:
tor_pcs.head()

Unnamed: 0,date,mode,pcs_slp,pcs_h5,tor
0,1985-05-17,0,-2875.503,636.5201,0
1,1985-05-17,1,-8066.7505,27.602953,0
2,1985-05-17,2,837.58417,-215.67729,0
3,1985-05-17,3,-1029.7639,-231.10178,0
4,1985-05-17,4,-214.46526,-188.53247,0


In [43]:
def svm_classify(nmodes, kernel):
    slp_feats = tor_pcs[tor_pcs['mode'] < nmodes]['pcs_slp'].values.reshape(-1, nmodes)
#     h5_feats = tor_pcs[tor_pcs['mode'] < nmodes]['pcs_h5'].values.reshape(-1, nmodes)
    features = np.hstack([slp_feats])
    targets = tor_pcs[tor_pcs['mode'] == 0].tor.values
    assert features.shape[0] == targets.shape[0]
    
    X_train, X_test, y_train, y_test = train_test_split(features, targets)
    print('Training data and target sizes: \n{}, {}'.format(X_train.shape,y_train.shape))
    print('Test data and target sizes: \n{}, {}'.format(X_test.shape,y_test.shape))
    
    classifier = svm.SVC(kernel=kernel)
    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    return classifier, y_test, y_pred

In [44]:
classifier, y_test, y_pred = svm_classify(2, 'sigmoid')

Training data and target sizes: 
(408, 2), (408,)
Test data and target sizes: 
(136, 2), (136,)




In [45]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.67      0.69      0.68        89
           1       0.38      0.36      0.37        47

   micro avg       0.57      0.57      0.57       136
   macro avg       0.52      0.52      0.52       136
weighted avg       0.57      0.57      0.57       136



In [46]:
y_pred, y_test

(array([0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
        1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1,
        1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
        0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
        1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
        0, 0, 0, 0]),
 array([1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
        0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
        0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,
        1, 0, 1, 0]))

In [47]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred))

Confusion matrix:
[[61 28]
 [30 17]]
