# AVI-DYS Time Series Classification

## Import Libraries

In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import StratifiedKFold
from sktime.classification.hybrid import HIVECOTEV2
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

## Set User specific parameters

In [2]:
IDs = ['001', '003', '004', '005', '115', '116', '117', '118', '120', '121',
       '122', '123', '125', '126', '127', '128', '129', '130', '131', '132',
       '133', '135', '138', '151', '152', '301', '303', '304', '305', '306',
       '307', '308', '309']

data_path = "C:\\Users\\zacha\\Data\\AVI-DYS\\Results\\Post-Processing\\5-Concatenated"

## Format the DIS Scores

In [3]:
# Initialize an output DataFrame for the DIS scores (y)
cols = [
       'Dystonia-Duration', 'Dystonia-Amplitude',
       'Choreoathetosis-Duration', 'Choreoathetosis-Amplitude',
        ]

iterables = [IDs, ["11", "12"]]

index = pd.MultiIndex.from_product(iterables, names=["ID", "Side Code"])

Y = pd.DataFrame(index=index, columns=cols)
Y.reset_index(inplace=True)

In [4]:
# Put the DIS scores into a tidy format
df = pd.read_excel("C:\\Users\\zacha\\Repos\\AVI-DYS\\DIS-Included.xlsx")

for idx, i in enumerate(IDs):
    Y['Dystonia-Duration'][idx*2] = df['Dystonia-Right-Duration-11'][idx]
    Y['Dystonia-Duration'][idx*2+1] = df['Dystonia-Left-Duration-12'][idx]
    Y['Dystonia-Amplitude'][idx*2] = df['Dystonia-Right-Amplitude-11'][idx]
    Y['Dystonia-Amplitude'][idx*2+1] = df['Dystonia-Left-Amplitude-12'][idx]
    Y['Choreoathetosis-Duration'][idx*2] = df['Choreoathetosis-Right-Duration-11'][idx]
    Y['Choreoathetosis-Duration'][idx*2+1] = df['Choreoathetosis-Left-Duration-12'][idx]
    Y['Choreoathetosis-Amplitude'][idx*2] = df['Choreoathetosis-Right-Amplitude-11'][idx]
    Y['Choreoathetosis-Amplitude'][idx*2+1] = df['Choreoathetosis-Left-Amplitude-12'][idx]

## Create the 3D numpy array

In [5]:
# Initialize an output DataFrame for the Data (X)
cols = ['HLX_x', 'HLX_y', 'D1M_x', 'D1M_y', 'D3M_x', 'D3M_y', 'ANK_x',
       'ANK_y', 'KNE_x', 'KNE_y', 'KNE_ANK_length',
       'KNE_ANK_orientation', 'ANK_D3M_length', 'ANK_D3M_orientation',
       'ANK_D1M_length', 'ANK_D1M_orientation', 'D1M_HLX_length',
       'D1M_HLX_orientation']

df = pd.DataFrame(columns=cols)

In [6]:
# Flatten the DataFrame
for i in IDs:
   
    right_data = pd.read_csv(os.path.join(data_path,f'{i}-right-data.csv'))
    left_data = pd.read_csv(os.path.join(data_path,f'{i}-left-data.csv'))    
    
    right_flat = pd.DataFrame(index=[f'{i}-right'])
    for c in right_data.columns:
        right_flat[c] = [right_data[c].to_numpy()]  

    left_flat = pd.DataFrame(index=[f'{i}-left'])
    for c in left_data.columns:
        left_flat[c] = [left_data[c].to_numpy()]    
    
    df = pd.concat([df, right_flat, left_flat,])

# Extract the values from the DataFrame
data = df.values

# Create an empty 3D NumPy array
num_rows, num_columns = df.shape
num_elements = df.iloc[0, 0].shape[0]
X = np.empty((num_rows, num_columns, num_elements))

# Populate the 3D array with the values from the DataFrame
for i in range(num_rows):
    for j in range(num_columns):
        X[i, j] = data[i, j]

## HIVE-COTE v2.0

### Fit models using 6 fold cross-validation

#### Predict the Dystonia Duration Scores

In [8]:
y = Y['Dystonia-Duration'].astype(str).to_numpy()

cv = StratifiedKFold(n_splits=6)

hc2 =HIVECOTEV2(
      stc_params=None,
      drcif_params=None,
      arsenal_params=None,
      tde_params=None,
      time_limit_in_minutes=1,
      save_component_probas=False,
      verbose=0,
      #n_jobs=-1,
      random_state=42,
)

y_pred = cross_val_predict(hc2, X, y, cv=cv)

Dystonia_Duration_confusion_matrix=confusion_matrix(y, y_pred)
print(Dystonia_Duration_confusion_matrix)

target_names = ['DIS-0','DIS-1','DIS-2','DIS-3','DIS-4']
Dystonia_Duration_classification_report = classification_report(y, y_pred, target_names=target_names)
print(Dystonia_Duration_classification_report)



[[ 8  0  0  0  3]
 [ 1  0  0  0  0]
 [ 0  0  0  1  2]
 [ 1  0  0  1  4]
 [ 2  1  1  8 33]]
              precision    recall  f1-score   support

       DIS-0       0.67      0.73      0.70        11
       DIS-1       0.00      0.00      0.00         1
       DIS-2       0.00      0.00      0.00         3
       DIS-3       0.10      0.17      0.12         6
       DIS-4       0.79      0.73      0.76        45

    accuracy                           0.64        66
   macro avg       0.31      0.33      0.32        66
weighted avg       0.66      0.64      0.64        66



#### Predict the Dystonia Amplitude Scores

In [9]:
y = Y['Dystonia-Amplitude'].astype(str).to_numpy()

cv = StratifiedKFold(n_splits=6)

hc2 =HIVECOTEV2(
      stc_params=None,
      drcif_params=None,
      arsenal_params=None,
      tde_params=None,
      time_limit_in_minutes=1,
      save_component_probas=False,
      verbose=0,
      #n_jobs=-1,
      random_state=42,
)

y_pred = cross_val_predict(hc2, X, y, cv=cv)

Dystonia_Amplitude_confusion_matrix=confusion_matrix(y, y_pred)
print(Dystonia_Amplitude_confusion_matrix)

target_names = ['DIS-0','DIS-1','DIS-2','DIS-3','DIS-4']
Dystonia_Amplitude_classification_report = classification_report(y, y_pred, target_names=target_names)
print(Dystonia_Amplitude_classification_report)



[[ 8  0  2  0  1]
 [ 0  0  0  0  1]
 [ 2  0  2  1  4]
 [ 0  0  0  2  8]
 [ 5  0  2  3 25]]
              precision    recall  f1-score   support

       DIS-0       0.53      0.73      0.62        11
       DIS-1       0.00      0.00      0.00         1
       DIS-2       0.33      0.22      0.27         9
       DIS-3       0.33      0.20      0.25        10
       DIS-4       0.64      0.71      0.68        35

    accuracy                           0.56        66
   macro avg       0.37      0.37      0.36        66
weighted avg       0.52      0.56      0.54        66



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### Predict the Choreoathetosis Duration Scores

In [10]:
y = Y['Choreoathetosis-Duration'].astype(str).to_numpy()

cv = StratifiedKFold(n_splits=6)

hc2 =HIVECOTEV2(
      stc_params=None,
      drcif_params=None,
      arsenal_params=None,
      tde_params=None,
      time_limit_in_minutes=1,
      save_component_probas=False,
      verbose=0,
      #n_jobs=-1,
      random_state=42,
)

y_pred = cross_val_predict(hc2, X, y, cv=cv)

Choreoathetosis_Amplitude_confusion_matrix=confusion_matrix(y, y_pred)
print(Choreoathetosis_Amplitude_confusion_matrix)

target_names = ['DIS-0','DIS-1','DIS-2','DIS-3','DIS-4']
Choreoathetosis_Amplitude_classification_report = classification_report(y, y_pred, target_names=target_names)
print(Choreoathetosis_Amplitude_classification_report)

[[5 5 4 1 3]
 [5 7 3 0 2]
 [5 5 3 0 3]
 [1 1 2 3 0]
 [2 4 1 1 0]]
              precision    recall  f1-score   support

       DIS-0       0.28      0.28      0.28        18
       DIS-1       0.32      0.41      0.36        17
       DIS-2       0.23      0.19      0.21        16
       DIS-3       0.60      0.43      0.50         7
       DIS-4       0.00      0.00      0.00         8

    accuracy                           0.27        66
   macro avg       0.29      0.26      0.27        66
weighted avg       0.28      0.27      0.27        66



#### Predict the Choreoathetosis Amplitude Scores

In [11]:
y = Y['Choreoathetosis-Amplitude'].astype(str).to_numpy()

cv = StratifiedKFold(n_splits=6)

hc2 =HIVECOTEV2(
      stc_params=None,
      drcif_params=None,
      arsenal_params=None,
      tde_params=None,
      time_limit_in_minutes=1,
      save_component_probas=False,
      verbose=0,
      #n_jobs=-1,
      random_state=42,
)

y_pred = cross_val_predict(hc2, X, y, cv=cv)

Choreoathetosis_Duration_confusion_matrix=confusion_matrix(y, y_pred)
print(Choreoathetosis_Duration_confusion_matrix)

target_names = ['DIS-0','DIS-1','DIS-2','DIS-3','DIS-4']
Choreoathetosis_Duration_classification_report = classification_report(y, y_pred, target_names=target_names)
print(Choreoathetosis_Duration_classification_report)



[[9 5 4 0 0]
 [5 3 6 0 1]
 [6 2 7 5 1]
 [0 2 6 1 0]
 [1 0 1 1 0]]
              precision    recall  f1-score   support

       DIS-0       0.43      0.50      0.46        18
       DIS-1       0.25      0.20      0.22        15
       DIS-2       0.29      0.33      0.31        21
       DIS-3       0.14      0.11      0.12         9
       DIS-4       0.00      0.00      0.00         3

    accuracy                           0.30        66
   macro avg       0.22      0.23      0.22        66
weighted avg       0.29      0.30      0.29        66

