# Notebook for Preparing Data for LSTM Model

This notebook prepares beat-level time-series sequences for use in an LSTM model. It loads the processed single-beat segments and creates beat sequences grouped and ordered by record. 

## Setup

In [1]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd
import sys
sys.path.append("../")
from scripts.utils import get_project_path
from scripts.extract import build_beat_sequences

## Load Processed Data

In [2]:
segments = np.load(get_project_path("data", "processed", "beat_segments.npy"))
labels_df = pd.read_csv(get_project_path("data", "processed", "beats_dataset.csv"))

In [3]:
labels_df.head()

Unnamed: 0,record,label,rr_interval,sample_index
0,100,N,0.163889,77
1,100,N,0.813889,370
2,100,N,0.811111,662
3,100,N,0.788889,946
4,100,N,0.791667,1231


## Ensure Alignment of Labels and Segments

In [4]:
if len(labels_df) > len(segments):
    labels_df = labels_df.iloc[:len(segments)].copy()
else:
    segments = segments[:len(labels_df)]
    labels_df = labels_df.copy()

In [5]:
print(f"length of labels_df: {len(labels_df)}")
print(f"length of segments: {len(segments)}")

length of labels_df: 10734
length of segments: 10734


## Construct Sequences

In [6]:
X_seq, y_seq = build_beat_sequences(
    segments = segments,
    labels_df = labels_df,
    sequence_length = 5
)

print("Sequence Shape:", X_seq.shape)
print("Label Shape:", y_seq.shape)
print("Example Labels:", np.unique(y_seq, return_counts = True))

Sequence Shape: (160, 5, 432)
Label Shape: (160,)
Example Labels: (array(['A', 'N'], dtype='<U1'), array([ 13, 147], dtype=int64))


## Save Sequences for LSTM

In [7]:
save_path = get_project_path("data", "processed")
np.save(os.path.join(save_path, "LSTM_X_sequences.npy"), X_seq)
np.save(os.path.join(save_path, "LSTM_y_labels.npy"), y_seq)
print("LSTM-ready sequences saved")

LSTM-ready sequences saved
