In [18]:
import xarray as xr
# import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn import svm, metrics

In [19]:
ds_tors = xr.open_dataset('./tordays.nc')
ds_nontors = xr.open_dataset('./nontordays.nc')

In [20]:
ds_tors.hgt.shape, ds_nontors.hgt.shape

((140, 12, 25), (404, 12, 25))

In [21]:
tor_days = len(ds_tors['time'])
tor_targets = np.ones(tor_days)

nontor_days = len(ds_nontors['time'])
nontor_targets = np.zeros(nontor_days)

In [22]:
tor_targets.shape, nontor_targets.shape

((140,), (404,))

In [23]:
tor_arr = ds_tors['hgt'].values.reshape((tor_days, -1))
nontor_arr = ds_nontors['hgt'].values.reshape((nontor_days, -1))

In [24]:
tor_arr.shape, nontor_arr.shape

((140, 300), (404, 300))

In [25]:
data = np.concatenate([tor_arr, nontor_arr])
targets = np.concatenate([tor_targets, nontor_targets])

In [35]:
data

array([[ 127.5332   ,  126.7334   ,  122.833496 , ...,   38.5      ,
          34.966797 ,   29.333496 ],
       [ -90.1001   ,  -71.299805 ,  -54.933105 , ...,  -31.566895 ,
         -21.700195 ,  -11.033203 ],
       [ -59.966797 ,  -57.1333   ,  -60.466797 , ...,  -10.399902 ,
          -8.700195 ,   -6.7001953],
       ...,
       [  96.8999   ,  128.7002   ,  143.0669   , ...,   31.433105 ,
          27.299805 ,   15.966797 ],
       [-118.0332   , -121.566895 , -122.3999   , ...,   29.6333   ,
          25.899902 ,   22.166504 ],
       [ -82.8999   ,  -98.1001   , -105.566895 , ...,   22.600098 ,
          16.1333   ,    5.533203 ]], dtype=float32)

In [26]:
data.shape, targets.shape

((544, 300), (544,))

In [27]:
X_train, X_test, y_train, y_test = train_test_split(data, targets)

In [28]:
print('Training data and target sizes: \n{}, {}'.format(X_train.shape,y_train.shape))
print('Test data and target sizes: \n{}, {}'.format(X_test.shape,y_test.shape))

Training data and target sizes: 
(408, 300), (408,)
Test data and target sizes: 
(136, 300), (136,)


In [29]:
# help(svm.SVC)

In [30]:
classifier = svm.SVC(kernel='linear')
classifier.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [31]:
y_pred = classifier.predict(X_test)

In [32]:
print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(y_test, y_pred)))

Classification report for classifier SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False):
              precision    recall  f1-score   support

         0.0       0.76      0.83      0.79        98
         1.0       0.43      0.34      0.38        38

   micro avg       0.69      0.69      0.69       136
   macro avg       0.60      0.58      0.59       136
weighted avg       0.67      0.69      0.68       136




In [33]:
y_pred, y_test

(array([0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
        0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
        1., 0., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1.,
        0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1.,
        0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 1., 0.]),
 array([0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0.,
        1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0.,
        0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 1., 0.,
        1., 0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 1., 0., 1., 1., 0., 0.,
        0.

In [34]:
print("Confusion matrix:\n%s" % metrics.confusion_matrix(y_test, y_pred))

Confusion matrix:
[[81 17]
 [25 13]]
