In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import svm, metrics

In [2]:
tor_clusts = pd.read_csv('./cluster_db3.csv', parse_dates=['date'])
tor_clusts['tor'] = (tor_clusts.max_cluster >= 30).astype(int)

In [3]:
tor_clusts.head()

Unnamed: 0,date,max_cluster,tor
0,1996-04-01,0,0
1,1996-04-02,0,0
2,1996-04-03,0,0
3,1996-04-04,0,0
4,1996-04-05,0,0


In [4]:
tor_clusts[(tor_clusts.max_cluster >= 30) & (tor_clusts.date.dt.month == 5) & (tor_clusts.date.dt.day >= 16)].shape

(58, 3)

In [5]:
slp_pcs = pd.read_csv('./slp_pcs_latemay_1981-2018.csv', parse_dates=['date'])
slp_pcs['date'] -= pd.Timedelta('1 day')
h5_pcs = pd.read_csv('./500mb_pcs_latemay_1981-2018.csv', parse_dates=['date'])
h5_pcs['date'] -= pd.Timedelta('1 day')

all_pcs = slp_pcs.merge(h5_pcs, on=['date', 'mode'], how='inner', suffixes=['_slp', '_h5'])
tor_pcs = all_pcs.merge(tor_clusts, on='date', how='inner')

In [6]:
tor_pcs.tail()

Unnamed: 0,date,mode,pcs_slp,pcs_h5,max_cluster,tor
3515,2017-05-31,5,-384.43347,-210.68015,0,0
3516,2017-05-31,6,163.8923,-250.17175,0,0
3517,2017-05-31,7,-843.6362,-62.726982,0,0
3518,2017-05-31,8,43.86753,153.7155,0,0
3519,2017-05-31,9,-380.9856,245.53929,0,0


In [208]:
tor_pcs_scaled = tor_pcs.copy()
unscaled_slp_pcs = tor_pcs['pcs_slp']
unscaled_h5_pcs = tor_pcs['pcs_h5'] 
tor_pcs_scaled['pcs_slp'] = unscaled_slp_pcs / unscaled_slp_pcs.std()
tor_pcs_scaled['pcs_h5'] = unscaled_h5_pcs / unscaled_h5_pcs.std()

In [209]:
tor_pcs_scaled.tail()

Unnamed: 0,date,mode,pcs_slp,pcs_h5,max_cluster,tor
3515,2017-05-31,5,-0.178443,-0.670099,0,0
3516,2017-05-31,6,0.076074,-0.795708,0,0
3517,2017-05-31,7,-0.391591,-0.199512,0,0
3518,2017-05-31,8,0.020362,0.488915,0,0
3519,2017-05-31,9,-0.176842,0.780974,0,0


In [463]:
def svm_classify(modes_slp, modes_h5, kernel, tol=1e-3, test_size=0.25):
    if isinstance(modes_slp, int):
        modes_slp = range(0, modes_slp)
    if isinstance(modes_h5, int):
        modes_h5 = range(0, modes_h5)
        
    slp_feats = tor_pcs_scaled[tor_pcs_scaled['mode'].isin(modes_slp)]['pcs_slp'].values
    h5_feats = tor_pcs_scaled[tor_pcs_scaled['mode'].isin(modes_h5)]['pcs_h5'].values
    
    feats_combined = []
    if slp_feats.any():
        feats_combined.append(slp_feats.reshape(-1, len(modes_slp)))
    if h5_feats.any():
        feats_combined.append(h5_feats.reshape(-1, len(modes_h5)))
        
    features = np.hstack(feats_combined)
    targets = tor_pcs[tor_pcs['mode'] == 0].tor.values
    assert features.shape[0] == targets.shape[0]
    
    X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=test_size)
    print('Training data and target sizes: \n{}, {}'.format(X_train.shape,y_train.shape))
    print('Test data and target sizes: \n{}, {}'.format(X_test.shape,y_test.shape))
    
    classifier = svm.SVC(kernel=kernel, tol=tol, gamma='scale')
    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    return classifier, y_test, y_pred

In [496]:
classifier, y_test, y_pred = svm_classify([1], 4, 'sigmoid', test_size=0.4)

print(metrics.classification_report(y_test, y_pred) + '\n')
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred) + '\n')
print('Predictions:\n%s' % y_pred)
print('Actuals:\n%s' % y_test)

Training data and target sizes: 
(211, 5), (211,)
Test data and target sizes: 
(141, 5), (141,)
              precision    recall  f1-score   support

           0       0.85      0.95      0.90       117
           1       0.40      0.17      0.24        24

   micro avg       0.82      0.82      0.82       141
   macro avg       0.62      0.56      0.57       141
weighted avg       0.77      0.82      0.78       141


Confusion matrix:
[[111   6]
 [ 20   4]]

Predictions:
[0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Actuals:
[1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 1 0 1
 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 1 1 0 1 0 0 1 0
