In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import torch

%matplotlib inline

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
train_df = pd.read_csv('data/ECG5000_TRAIN.txt',
                       sep=' ',skipinitialspace=True,
                       header=None,
                       names=['target'] + list(range(0,140)))

test_df = pd.read_csv('data/ECG5000_TEST.txt',
                       sep=' ',skipinitialspace=True,
                       header=None,
                       names=['target'] + list(range(0,140)))


In [None]:
df_all = train_df.append(test_df)

# Exploration

In [None]:
class_names = ['Normal', 'R on T PVC', 'PVC', 'SP','UB']

In [None]:
df_all['target'].value_counts()

In [None]:
melted_df = df_all.melt(id_vars=['target'], value_vars=list(range(0,140)),var_name='timestamp')

In [None]:
fig, axs = plt.subplots(nrows=2, ncols=3, sharey=True, figsize=(20,8))
for i, class_name in enumerate(class_names):
    sns.lineplot(x="timestamp", y="value",
                 data=melted_df[melted_df.target==i+1],ax=axs.flat[i])
    axs.flat[i].set_title(class_name)
fig.delaxes(axs.flat[-1])
plt.tight_layout()

In [None]:
fig, axs = plt.subplots(nrows=2, ncols=1, sharey=True, figsize=(10,6))

sns.lineplot(x="timestamp", y="value",
             data=melted_df[melted_df.target<=2],ax=axs.flat[0])
axs.flat[0].set_title('normal')
sns.lineplot(x="timestamp", y="value",
             data=melted_df[melted_df.target>2],ax=axs.flat[1])
axs.flat[1].set_title('Anomaly')
plt.tight_layout()

# Data preparation

In [None]:
df_all = df_all.sample(frac=1.0, random_state=42)
normal_df = df_all[df_all.target<=2].drop(columns='target')
anomaly_df = df_all[df_all.target>2].drop(columns='target')

In [None]:
train_df, test_df = train_test_split(normal_df,train_size=0.85)
val_df, test_df = train_test_split(test_df, test_size=0.5)
print(f'Train size: {len(train_df)}')
print(f'Validation size: {len(val_df)}')
print(f'Test size: {len(test_df)}')
train_sequences = train_df.astype('float64').values.tolist()
val_sequences = val_df.astype('float64').values.tolist()
test_sequences = test_df.astype('float64').values.tolist()


In [None]:
def create_tensors(sequences):
    dataset = [torch.tensor(seq).unsqueeze(1) for seq in sequences]
    return dataset