### Code for the Dataset

In [1]:
import numpy as np
import pandas as pd
import random

#Number of patients
npatients = 1000  
#Length of time-series data
nts = 100  

#Setting the seed for reproducibility
seed = 42
np.random.seed(seed)
random.seed(seed)

def generateTS(npatients, nts, tabData):
    TSdata = []

    for i in range(npatients):
        patient = tabData.iloc[i]

        #Base of the ECG signal which is adjusted by the heart disease presence
        time = np.linspace(0, 2 * np.pi, nts)
        ecgbase = np.sin(time)
        noise = np.random.normal(0, 0.1, nts)
        ecg = ecgbase + noise

        #Adding irregularity for heart disease
        if patient['HeartDisease'] == 1:
            ecg += 0.2 * np.sin(2 * time)  

        #Determining Heart rate and respiration rate based on age and health status
        hrBase = 70 - 0.2 * (patient['Age'] - 30)
        hr = hrBase + 5 * np.sin(0.5 * time) + noise

        rrBase = 16 - 0.1 * (patient['Age'] - 30)
        rr = rrBase + 2 * np.sin(0.3 * time) + noise

        #Combining them into one array
        patientData = np.stack([ecg, hr, rr], axis=1)
        TSdata.append(patientData)

    return np.array(TSdata)

def generateTD(npatients):
    data = {
        'Age': np.random.randint(30, 90, npatients),
        'Sex': np.random.choice(['M', 'F'], npatients),
        'ChestPainType': np.random.choice(['TA', 'ATA', 'NAP', 'ASY'], npatients),
        'RestingBP': np.random.normal(120, 15, npatients),
        'Cholesterol': np.random.normal(200, 30, npatients),
        'FastingBS': np.random.choice([0, 1], npatients),
        'RestingECG': np.random.choice(['Normal', 'ST', 'LVH'], npatients),
        'MaxHR': np.random.randint(60, 202, npatients),
        'ExerciseAngina': np.random.choice(['Y', 'N'], npatients),
        'Oldpeak': np.random.uniform(0.0, 6.0, npatients),
        'ST_Slope': np.random.choice(['Up', 'Flat', 'Down'], npatients),
    }

    #HeartDisease is calculated depending on critical features
    hdprob = (
        0.1 * (data['Age'] - 30) / 60 +
        0.2 * (data['Sex'] == 'M') +
        0.3 * (np.isin(data['ChestPainType'], ['ASY'])) +
        0.2 * (data['RestingBP'] > 140) +
        0.3 * (data['Cholesterol'] > 240) +
        0.4 * (data['FastingBS'] == 1) +
        0.3 * (data['RestingECG'] == 'ST') +
        0.2 * (data['MaxHR'] < 100) +
        0.4 * (data['ExerciseAngina'] == 'Y') +
        0.3 * (data['Oldpeak'] > 2.5) +
        0.3 * (data['ST_Slope'] == 'Flat')
    )
    #Making sure that the probability is btw 0 and 1
    hdprob = np.clip(hdprob, 0, 1)  
    data['HeartDisease'] = np.random.binomial(1, hdprob)

    return pd.DataFrame(data)

#Generating tabular data first
tabData = generateTD(npatients)

#Generating time-series data based on tabular data
TSdata = generateTS(npatients, nts, tabData)

#Saving the data as .npy and .csv file respectively
np.save('time_series_data.npy', TSdata)
tabData.to_csv('tabular_data.csv', index=False)

print("Time-series data shape:", TSdata.shape)
print("Tabular data shape:", tabData.shape)

# Example: Access a patient's time-series and tabular data
pidx = 0
print("Time-series data for patient 0:", TSdata[pidx])
print("Tabular data for patient 0:", tabData.iloc[pidx])

Time-series data shape: (1000, 100, 3)
Tabular data shape: (1000, 12)
Time-series data for patient 0: [[ 3.32858279e-03  6.24033286e+01  1.22033286e+01]
 [ 1.86571807e-01  6.26564691e+01  1.23359070e+01]
 [ 2.20393315e-01  6.27606909e+01  1.23197127e+01]
 [ 3.08465745e-01  6.29201622e+01  1.23590596e+01]
 [ 4.70607150e-01  6.31551821e+01  1.24743923e+01]
 [ 5.91272417e-01  6.33506644e+01  1.25507695e+01]
 [ 7.34037430e-01  6.35706154e+01  1.26523420e+01]
 [ 5.27122835e-01  6.34436513e+01  1.24078695e+01]
 [ 9.03082563e-01  6.39026807e+01  1.27504034e+01]
 [ 6.12889922e-01  6.36989855e+01  1.24313671e+01]
 [ 9.77413118e-01  6.41536920e+01  1.27720272e+01]
 [ 7.76353174e-01  6.40467047e+01  1.25524274e+01]
 [ 8.82958500e-01  6.42514183e+01  1.26460996e+01]
 [ 9.35820258e-01  6.44065103e+01  1.26918571e+01]
 [ 8.85209879e-01  6.44622775e+01  1.26401307e+01]
 [ 1.01224798e+00  6.46998045e+01  1.27721370e+01]
 [ 9.47390374e-01  6.47494499e+01  1.27183644e+01]
 [ 9.24472992e-01  6.48448926e+

### Baseline Model

In [2]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

import random

#Setting seed for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

#Loading the tabular data
data = pd.read_csv('tabular_data.csv')

#Preprocessing Columns in Dataset
features = data.drop(columns=['HeartDisease'])
target = data['HeartDisease']

#Checking dataset for categorical columns
catcolumns = features.select_dtypes(include=['object']).columns

#Converting categorical columns to numeric using Label Encoding
for col in catcolumns:
    le = LabelEncoder()
    features[col] = le.fit_transform(features[col])

#Splitting the dataset into train and test sets
Xtrain, Xtest, ytrain, ytest = train_test_split(features, target, test_size=0.2, random_state=seed)

#Scaling the features or simply using standardization
scaler = StandardScaler()
Xtrain = scaler.fit_transform(Xtrain)
Xtest = scaler.transform(Xtest)

#Initializing the Logistic Regression model
basemodel = LogisticRegression(class_weight='balanced', random_state=seed)

# Train the model on the training data
basemodel.fit(Xtrain, ytrain)

# Make predictions on the test data
ypred = basemodel.predict(Xtest)

# Evaluate the model
print("Baseline Model Evaluation:")
print(classification_report(ytest, ypred))

Baseline Model Evaluation:
              precision    recall  f1-score   support

           0       0.39      0.81      0.53        26
           1       0.97      0.81      0.88       174

    accuracy                           0.81       200
   macro avg       0.68      0.81      0.70       200
weighted avg       0.89      0.81      0.83       200

