# AVI-DYS Time Series Classification

## Import Libraries

In [72]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sktime.classification.hybrid import HIVECOTEV2
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import RocCurveDisplay

## Set User specific parameters

In [73]:
IDs = ['001', '003', '004', '005', '115', '116', '117', '118', '120', '121',
        '122', '123', '125', '126', '127', '128', '129', '130', '131', '132',
        '133', '135', '138', '151', '152', '301', '303', '304', '305', '306',
        '307', '308', '309']

data_path = "C:\\Users\\zacha\\Data\\AVI-DYS\\Results\\Post-Processing\\5-Concatenated"

## Format the DIS Scores

In [74]:
# Initialize an output DataFrame for the DIS scores (y)
cols = [
       'Dystonia-Duration', 'Dystonia-Amplitude',
       'Choreoathetosis-Duration', 'Choreoathetosis-Amplitude',
        ]

iterables = [IDs, ["11", "12"]]

index = pd.MultiIndex.from_product(iterables, names=["ID", "Side Code"])

y = pd.DataFrame(index=index, columns=cols)
y.reset_index(inplace=True)

In [75]:
# Put the DIS scores into a tidy format
df = pd.read_excel("C:\\Users\\zacha\\Repos\\AVI-DYS\\DIS-Included.xlsx")

for idx, i in enumerate(IDs):
    y['Dystonia-Duration'][idx*2] = df['Dystonia-Right-Duration-11'][idx]
    y['Dystonia-Duration'][idx*2+1] = df['Dystonia-Left-Duration-12'][idx]
    y['Dystonia-Amplitude'][idx*2] = df['Dystonia-Right-Amplitude-11'][idx]
    y['Dystonia-Amplitude'][idx*2+1] = df['Dystonia-Left-Amplitude-12'][idx]
    y['Choreoathetosis-Duration'][idx*2] = df['Choreoathetosis-Right-Duration-11'][idx]
    y['Choreoathetosis-Duration'][idx*2+1] = df['Choreoathetosis-Left-Duration-12'][idx]
    y['Choreoathetosis-Amplitude'][idx*2] = df['Choreoathetosis-Right-Amplitude-11'][idx]
    y['Choreoathetosis-Amplitude'][idx*2+1] = df['Choreoathetosis-Left-Amplitude-12'][idx]

## Create the 3D numpy array

In [76]:
# Initialize an output DataFrame for the Data (X)
cols = ['HLX_x', 'HLX_y', 'D1M_x', 'D1M_y', 'D3M_x', 'D3M_y', 'ANK_x',
       'ANK_y', 'KNE_x', 'KNE_y', 'KNE_ANK_length',
       'KNE_ANK_orientation', 'ANK_D3M_length', 'ANK_D3M_orientation',
       'ANK_D1M_length', 'ANK_D1M_orientation', 'D1M_HLX_length',
       'D1M_HLX_orientation']

df = pd.DataFrame(columns=cols)

In [77]:
# Flatten the DataFrame
for i in IDs:
   
    right_data = pd.read_csv(os.path.join(data_path,f'{i}-right-data.csv'))
    left_data = pd.read_csv(os.path.join(data_path,f'{i}-left-data.csv'))    
    
    right_flat = pd.DataFrame(index=[f'{i}-right'])
    for c in right_data.columns:
        right_flat[c] = [right_data[c].to_numpy()]  

    left_flat = pd.DataFrame(index=[f'{i}-left'])
    for c in left_data.columns:
        left_flat[c] = [left_data[c].to_numpy()]    
    
    df = pd.concat([df, right_flat, left_flat,])

# Extract the values from the DataFrame
data = df.values

# Create an empty 3D NumPy array
num_rows, num_columns = df.shape
num_elements = df.iloc[0, 0].shape[0]
X = np.empty((num_rows, num_columns, num_elements))

# Populate the 3D array with the values from the DataFrame
for i in range(num_rows):
    for j in range(num_columns):
        X[i, j] = data[i, j]

## HIVE-COTE v2.0

### Fit models using 6 fold cross-validation

#### Predict the Dystonia Duration Scores

In [78]:
Dystonia_Duration_y = y['Dystonia-Duration'].astype(str).to_numpy()

hc2 =HIVECOTEV2(
      stc_params=None,
      drcif_params=None,
      arsenal_params=None,
      tde_params=None,
      time_limit_in_minutes=1,
      save_component_probas=False,
      verbose=1,
      n_jobs=-1,
      random_state=42,
)

np.random.seed(seed=42)
Dystonia_Duration_results = cross_validate(hc2, X, y=Dystonia_Duration_y, cv=6, n_jobs=-1, verbose=1)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:  6.1min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:  6.1min finished


In [79]:
Dystonia_Duration_results['test_score']

array([0.81818182, 0.90909091, 0.81818182, 0.81818182, 0.72727273,
       0.72727273])

In [80]:
print("%0.2f accuracy with a standard deviation of %0.2f" % (Dystonia_Duration_results['test_score'].mean(), Dystonia_Duration_results['test_score'].std()))

0.80 accuracy with a standard deviation of 0.06


#### Predict the Dystonia Amplitude Scores

In [81]:
Dystonia_Amplitude_y = y['Dystonia-Amplitude'].astype(str).to_numpy()

hc2 =HIVECOTEV2(
      stc_params=None,
      drcif_params=None,
      arsenal_params=None,
      tde_params=None,
      time_limit_in_minutes=1,
      save_component_probas=False,
      verbose=1,
      n_jobs=-1,
      random_state=42,
)

np.random.seed(seed=42)
Dystonia_Amplitude_results = cross_validate(hc2, X, y=Dystonia_Amplitude_y, cv=6, n_jobs=-1, verbose=1)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:  6.4min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:  6.4min finished


In [82]:
Dystonia_Amplitude_results['test_score']

array([0.54545455, 0.63636364, 0.63636364, 0.63636364, 0.72727273,
       0.63636364])

In [83]:
print("%0.2f accuracy with a standard deviation of %0.2f" % (Dystonia_Amplitude_results['test_score'].mean(), Dystonia_Amplitude_results['test_score'].std()))

0.64 accuracy with a standard deviation of 0.05


#### Predict the Choreoathetosis Duration Scores

In [84]:
Choreoathetosis_Duration_y = y['Choreoathetosis-Duration'].astype(str).to_numpy()

hc2 =HIVECOTEV2(
      stc_params=None,
      drcif_params=None,
      arsenal_params=None,
      tde_params=None,
      time_limit_in_minutes=1,
      save_component_probas=False,
      verbose=1,
      n_jobs=-1,
      random_state=42,
)

np.random.seed(seed=42)
Choreoathetosis_Duration_results = cross_validate(hc2, X, y=Choreoathetosis_Duration_y, cv=6, n_jobs=-1, verbose=1)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:  6.7min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:  6.7min finished


In [85]:
Choreoathetosis_Duration_results['test_score']

array([0.54545455, 0.36363636, 0.36363636, 0.09090909, 0.09090909,
       0.09090909])

In [86]:
print("%0.2f accuracy with a standard deviation of %0.2f" % (Choreoathetosis_Duration_results['test_score'].mean(), Choreoathetosis_Duration_results['test_score'].std()))

0.26 accuracy with a standard deviation of 0.18


#### Predict the Choreoathetosis Amplitude Scores

In [87]:
Choreoathetosis_Amplitude_y = y['Choreoathetosis-Amplitude'].astype(str).to_numpy()

hc2 =HIVECOTEV2(
      stc_params=None,
      drcif_params=None,
      arsenal_params=None,
      tde_params=None,
      time_limit_in_minutes=1,
      save_component_probas=False,
      verbose=1,
      n_jobs=-1,
      random_state=42,
)

np.random.seed(seed=42)
Choreoathetosis_Amplitude_results = cross_validate(hc2, X, y=Choreoathetosis_Amplitude_y, cv=6, n_jobs=-1, verbose=1)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 32 concurrent workers.
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:  6.7min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   6 out of   6 | elapsed:  6.7min finished


In [88]:
Dystonia_Amplitude_results['test_score']

array([0.54545455, 0.63636364, 0.63636364, 0.63636364, 0.72727273,
       0.63636364])

In [89]:
print("%0.2f accuracy with a standard deviation of %0.2f" % (Dystonia_Amplitude_results['test_score'].mean(), Dystonia_Amplitude_results['test_score'].std()))

0.64 accuracy with a standard deviation of 0.05
