This notebook uses `sktime`


Examples of classification; try just training a CNN versus Random Weights versus raw time series?
+ Justification: TimeNet paper suggests generic feature extractors exist


https://github.com/hfawaz/dl-4-tsc/tree/master/classifiers
https://pyts.readthedocs.io/en/stable/generated/pyts.datasets.load_basic_motions.html

In [12]:
import sys

import matplotlib.pyplot as plt
import json

import dysts
from dysts.flows import *
from dysts.base import *
from dysts.utils import *
from dysts.analysis import *


# try:
#     from private.lyap import lyap_r, lyap_e, corr_dim
# except:
#     from nolds import lyap_r, lyap_e, corr_dim

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
## use basic motions dataset
from sktime.datasets import load_basic_motions
from sktime.utils.data_processing import from_nested_to_2d_array

X_train = np.array([np.array(item[0]) for item in load_basic_motions(split="train")[["dim_0"]].values])
X_train = standardize_ts(X_train)
y_train = load_basic_motions(split="train")["class_val"].to_list()

X_test = np.array([np.array(item[0]) for item in load_basic_motions(split="test")[["dim_0"]].values])
X_test = standardize_ts(X_test)
y_test = load_basic_motions(split="test")["class_val"].to_list()

In [17]:
# ?sktime.datasets.load_UCR_UEA_dataset

In [14]:
import sktime
dir(sktime.datasets)

['__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'base',
 'load_UCR_UEA_dataset',
 'load_airline',
 'load_arrow_head',
 'load_basic_motions',
 'load_gunpoint',
 'load_italy_power_demand',
 'load_longley',
 'load_lynx',
 'load_osuleaf',
 'load_shampoo_sales',
 'load_uschange']

In [6]:
## Pre-processing: nothing, PCA, rockets, trained rockets


## Featurize: tsfresh, random convolutions, pre-trained AE
## try selecting chaotic systems with the closest power spectrum
from sktime.transformations.panel.rocket import Rocket
model = Rocket() 

In [10]:
import torch
from torch import nn

## Benchmarks: 
# tsfresh handcrafted features
# ROCKET: Random convolutional transforms followed by an aggregation operation
# What classifier to use? Just a random forest?

In [9]:
# chunk_size = 30

import torch
from torch import nn

class Autoencoder(nn.Module):
    """
    A simple autoencoder for time series
    """
    def __init__(self):
        super(Autoencoder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv1d(1, 16, 3, stride=3, padding=1),  # b, 16, 10, 10
            nn.ELU(True),
            nn.MaxPool1d(2, stride=2),  # b, 16, 5, 5
            nn.Conv2d(16, 8, 3, stride=2, padding=1),  # b, 8, 3, 3
            nn.ELU(True),
            nn.MaxPool1d(2, stride=1)  # b, 8, 2, 2
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(8, 16, 3, stride=2),  # b, 16, 5, 5
            nn.ELU(True),
            nn.ConvTranspose1d(16, 8, 5, stride=3, padding=1),  # b, 8, 15, 15
            nn.ELU(True),
            nn.ConvTranspose1d(8, 1, 2, stride=2, padding=1),  # b, 1, 28, 28
            nn.Tanh()
        )
#         self.encoder = nn.Sequential(
#             nn.Linear(28 * 28, 128),
#             nn.ReLU(True),
#             nn.Linear(128, 64),
#             nn.ReLU(True), nn.Linear(64, 12), nn.ReLU(True), nn.Linear(12, 3))
#         self.decoder = nn.Sequential(
#             nn.Linear(3, 12),
#             nn.ReLU(True),
#             nn.Linear(12, 64),
#             nn.ReLU(True),
#             nn.Linear(64, 128),
#             nn.ReLU(True), nn.Linear(128, 28 * 28), nn.Tanh())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
## Batchwise training classifier
## Baseline is usually 1-NN with DTW
## But really need an NN where we can pre-train
## ROCKET vs trained convolutional?



## Or, unsupervised learning on a bunch of chaotic ssytems
## then train a classifier on the encoder outputs for new data

Classifier: logistic regression and Adam OR Ridge Regression



In [54]:
help(sktime.datasets.load_UCR_UEA_dataset)

Help on function load_UCR_UEA_dataset in module sktime.datasets.base:

load_UCR_UEA_dataset(name, split=None, return_X_y=False, extract_path=None)
    Load dataset from UCR UEA time series classification repository. Downloads and
    extracts dataset if not already downloaded.
    
    Parameters
    ----------
    name : str
        Name of data set
    split: None or str{"train", "test"}, optional (default=None)
        Whether to load the train or test partition of the problem. By
        default it loads both.
    return_X_y: bool, optional (default=False)
        If True, returns (features, target) separately instead of a single
        dataframe with columns for
        features and the target.
    extract_path : str, optional (default=None)
        Default extract path is `sktime/datasets/data/`
    
    Returns
    -------
    X: pandas DataFrame with m rows and c columns
        The time series data for the problem with m cases and c dimensions
    y: numpy array
        The c

In [56]:
X_train, y_train = sktime.datasets.load_UCR_UEA_dataset("AbnormalHeartbeat", split="train", return_X_y=True, extract_path="./resources")

FileNotFoundError: [Errno 2] No such file or directory: '/Users/williamgilpin/miniconda3/envs/dysts/lib/python3.8/site-packages/sktime/datasets/data/AbnormalHeartbeat/AbnormalHeartbeat_TRAIN.ts'

In [48]:
from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor

X_train, y_train = load_basic_motions(split="train", return_X_y=True)
X_test, y_test = load_basic_motions(split="test", return_X_y=True)

X_train, y_train = sktime.datasets.load_UCR_UEA_dataset("AbnormalHeartbeat", split="train", return_X_y=True)
X_test, y_test = sktime.datasets.load_UCR_UEA_dataset("AbnormalHeartbeat", split="test", return_X_y=True)

transformer = TSFreshFeatureExtractor()
X_train_featurized = transformer.fit_transform(X_train)
X_test_featurized = transformer.fit_transform(X_test)


from sklearn.linear_model import RidgeClassifierCV

model = RidgeClassifierCV(alphas = np.logspace(-3, 3, 10), normalize = True)
model.fit(X_train_featurized, y_train)

model.score(X_test_featurized, y_test)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/williamgilpin/miniconda3/envs/dysts/lib/python3.8/site-packages/sktime/datasets/data/AbnormalHeartbeat/AbnormalHeartbeat_TRAIN.ts'

In [38]:
X_train_featurized = extracted_features

1.0

In [44]:
model.predict(X_test_featurized)

array(['standing', 'standing', 'standing', 'standing', 'standing',
       'standing', 'standing', 'standing', 'standing', 'standing',
       'running', 'running', 'running', 'running', 'running', 'running',
       'running', 'running', 'running', 'running', 'walking', 'walking',
       'walking', 'walking', 'walking', 'walking', 'walking', 'walking',
       'walking', 'walking', 'badminton', 'badminton', 'badminton',
       'badminton', 'badminton', 'badminton', 'badminton', 'badminton',
       'badminton', 'badminton'], dtype='<U9')

In [45]:
y_test

array(['standing', 'standing', 'standing', 'standing', 'standing',
       'standing', 'standing', 'standing', 'standing', 'standing',
       'running', 'running', 'running', 'running', 'running', 'running',
       'running', 'running', 'running', 'running', 'walking', 'walking',
       'walking', 'walking', 'walking', 'walking', 'walking', 'walking',
       'walking', 'walking', 'badminton', 'badminton', 'badminton',
       'badminton', 'badminton', 'badminton', 'badminton', 'badminton',
       'badminton', 'badminton'], dtype='<U9')