In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import svm, metrics

In [53]:
tors = pd.read_csv('./tordays_chasealley.csv', parse_dates=['date'])
tors['tor'] = tors['tor'].astype(int)
tors['date'] = tors['date'] + pd.Timedelta('1 day')

slp_pcs = pd.read_csv('./slp_pcs_latemay_1981-2018.csv', parse_dates=['date'])
h5_pcs = pd.read_csv('./500mb_pcs_latemay_1981-2018.csv', parse_dates=['date'])

all_pcs = slp_pcs.merge(h5_pcs, on=['date', 'mode'], how='inner', suffixes=['_slp', '_h5'])
tor_pcs = all_pcs.merge(tors, on='date', how='inner')

In [105]:
tor_pcs.tail()

Unnamed: 0,date,mode,pcs_slp,pcs_h5,tor
5435,2018-06-01,5,-801.4222,-131.11832,1
5436,2018-06-01,6,486.9441,-39.723503,1
5437,2018-06-01,7,-1266.7305,-207.7968,1
5438,2018-06-01,8,1628.1063,299.853,1
5439,2018-06-01,9,339.37573,145.77031,1


In [56]:
def svm_classify(nmodes_slp, nmodes_h5, kernel):
    slp_feats = tor_pcs[tor_pcs['mode'] < nmodes_slp]['pcs_slp'].values.reshape(-1, nmodes_slp)
    h5_feats = tor_pcs[tor_pcs['mode'] < nmodes_h5]['pcs_h5'].values.reshape(-1, nmodes_h5)
    features = np.hstack([slp_feats, h5_feats])
    targets = tor_pcs[tor_pcs['mode'] == 0].tor.values
    assert features.shape[0] == targets.shape[0]
    
    X_train, X_test, y_train, y_test = train_test_split(features, targets)
    print('Training data and target sizes: \n{}, {}'.format(X_train.shape,y_train.shape))
    print('Test data and target sizes: \n{}, {}'.format(X_test.shape,y_test.shape))
    
    classifier = svm.SVC(kernel=kernel)
    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    return classifier, y_test, y_pred

In [101]:
classifier, y_test, y_pred = svm_classify(2, 2, 'sigmoid')

Training data and target sizes: 
(408, 4), (408,)
Test data and target sizes: 
(136, 4), (136,)




In [102]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.61      0.70      0.65        83
           1       0.39      0.30      0.34        53

   micro avg       0.54      0.54      0.54       136
   macro avg       0.50      0.50      0.50       136
weighted avg       0.52      0.54      0.53       136



In [103]:
y_pred, y_test

(array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1,
        0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
        0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0,
        1, 1, 1, 1]),
 array([1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1,
        1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1,
        0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,
        0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,
        0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0,
        1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1,
        0, 1, 1, 0]))

In [104]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred))

Confusion matrix:
[[58 25]
 [37 16]]
