In [18]:
import os
import numpy as np
import pandas as pd
import glob

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from scipy.fft import rfft
from scipy.stats import skew, kurtosis
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

import kerastuner as kt
from tensorflow.keras import layers
from tensorflow.keras.metrics import Precision, Recall
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

import warnings
warnings.simplefilter(action="ignore")

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_rows',100)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [19]:
path_conditions = 'data\depresjon\condition'
conditions = glob.glob(path_conditions + "/*.csv")

path_controls = 'data\depresjon\control'
controls = glob.glob(path_controls + "/*.csv")

df_scores = pd.read_csv("data\depresjon\scores.csv")

# Initialize an empty list to store DataFrames
all_list = []

# Loop through the list of csv files
for filename in conditions:
    # Read the current CSV file
    df = pd.read_csv(filename)
    
    # Optionally, extract patient_name or other identifiers from filename if needed
    # For example, if the filename contains the patient_name:
    name = os.path.splitext(os.path.basename(filename))[0]  # Adjust based on your file naming convention and operating system
    df['patient_name'] = name  # Add patient_name as a new column
    df['label'] = 1
    # Append the DataFrame to the list
    all_list.append(df)

for filename in controls:
    # Read the current CSV file
    df = pd.read_csv(filename)
    
    # Optionally, extract patient_name or other identifiers from filename if needed
    # For example, if the filename contains the patient_name:
    name = os.path.splitext(os.path.basename(filename))[0]  # Adjust based on your file naming convention and operating system
    df['patient_name'] = name  # Add patient_name as a new column
    df['label'] = 0
    # Append the DataFrame to the list
    all_list.append(df)


# Combine all DataFrames in the list into a single DataFrame
combined_df = pd.concat(all_list, ignore_index=True)
combined_df

Unnamed: 0,timestamp,date,activity,patient_name,label
0,2003-05-07 12:00:00,2003-05-07,0,condition_1,1
1,2003-05-07 12:01:00,2003-05-07,143,condition_1,1
2,2003-05-07 12:02:00,2003-05-07,0,condition_1,1
3,2003-05-07 12:03:00,2003-05-07,20,condition_1,1
4,2003-05-07 12:04:00,2003-05-07,166,condition_1,1
...,...,...,...,...,...
1571701,2003-12-01 12:53:00,2003-12-01,7,control_9,0
1571702,2003-12-01 12:54:00,2003-12-01,7,control_9,0
1571703,2003-12-01 12:55:00,2003-12-01,5,control_9,0
1571704,2003-12-01 12:56:00,2003-12-01,5,control_9,0


In [20]:
# Clean and preprocess the dataset
# Ensuring no NaN/infinite values in the dataset
combined_df.fillna(0, inplace=True)
combined_df.replace([np.inf, -np.inf], 0, inplace=True)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(combined_df, combined_df['label'], test_size=0.2, random_state=42)

# Function to segment the data with 48-hour segments
def segment_data(df, segment_size_minutes=2880):
    segments = []
    labels = []
    for name in df['patient_name'].unique():
        patient_data = df[df['patient_name'] == name]
        for start_pos in range(0, len(patient_data) - segment_size_minutes + 1, segment_size_minutes):
            segment = patient_data['activity'].iloc[start_pos:start_pos + segment_size_minutes].values
            label = patient_data['label'].iloc[start_pos]  # Assuming label is constant within each segment
            segments.append(segment)
            labels.append(label)
    return np.array(segments), np.array(labels)

# Segment the training and testing data separately
X_train_segments, y_train_segments = segment_data(X_train)
X_test_segments, y_test_segments = segment_data(X_test)

# Normalize the data after segmentation
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train_segments)
X_test_normalized = scaler.transform(X_test_segments)

# Adjusting the shape for CNN input
X_train_processed = X_train_normalized.reshape((-1, X_train_normalized.shape[1], 1))
X_test_processed = X_test_normalized.reshape((-1, X_test_normalized.shape[1], 1))


In [21]:
# Apply SMOTE for addressing class imbalance
# Flatten the X_train_processed array
X_train_flattened = X_train_processed.reshape(X_train_processed.shape[0], -1)
smote = SMOTE()
X_train_balanced, y_train_balanced = smote.fit_resample(X_train_flattened, y_train_segments)


In [22]:
def model_builder(hp):
    model = Sequential()
    model.add(Conv1D(filters=hp.Int('filters1', min_value=16, max_value=64, step=16),
                     kernel_size=hp.Choice('kernel_size1', values=[3, 5]),
                     activation='relu',
                     input_shape=(X_train_processed.shape[1], 1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(rate=hp.Float('dropout1', min_value=0.0, max_value=0.5, step=0.1)))
    
    # Additional convolutional layer
    model.add(Conv1D(filters=hp.Int('filters2', min_value=16, max_value=64, step=16),
                     kernel_size=hp.Choice('kernel_size2', values=[3, 5]),
                     activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(rate=hp.Float('dropout2', min_value=0.0, max_value=0.5, step=0.1)))
    
    # Additional convolutional layer
    model.add(Conv1D(filters=hp.Int('filters3', min_value=16, max_value=64, step=16),
                     kernel_size=hp.Choice('kernel_size3', values=[3, 5]),
                     activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(rate=hp.Float('dropout3', min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(Flatten())
    model.add(Dense(units=hp.Int('units', min_value=32, max_value=128, step=32), activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    # Include gradient clipping in the optimizer
    optimizer = Adam(
        hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4]),
        clipnorm=hp.Choice('clipnorm', values=[0.5, 1.0, 1.5])
    )
    
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy', Precision(), Recall()])
    
    return model

# Initialize the tuner
tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=5,
                     directory='my_dir',
                     project_name='intro_to_kt')

# Create a callback to stop training early after reaching a certain value for the validation loss
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

# Execute the hyperparameter search
tuner.search(X_train_balanced, y_train_balanced, epochs=5, validation_split=0.2, callbacks=[stop_early])

# Retrieve the best model
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
model = tuner.hypermodel.build(best_hps)

# Train the best model
history = model.fit(X_train_balanced, y_train_balanced, epochs=5, validation_split=0.2)

# Predict probabilities on the test set
y_pred_prob = model.predict(X_test_processed)

# Convert probabilities to binary predictions based on a threshold
threshold = 0.5  # You can adjust this threshold if needed
y_pred = (y_pred_prob > threshold).astype(int)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test_segments, y_pred)
precision = precision_score(y_test_segments, y_pred)
recall = recall_score(y_test_segments, y_pred)
f1 = f1_score(y_test_segments, y_pred)
conf_matrix = confusion_matrix(y_test_segments, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:\n", conf_matrix)

Reloading Tuner from my_dir\intro_to_kt\tuner0.json
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 0.6172839506172839
Precision: 0.41935483870967744
Recall: 0.5
F1 Score: 0.45614035087719296
Confusion Matrix:
 [[37 18]
 [13 13]]
