In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import librosa

# Base folder with 300_P, 301_P, ...
base_path = Path(r"C:\Users\DELL\Desktop\Conversational-Health-Analytics-\Dataset\extracted_folders")

# Get participant folder names like ['300_P', '301_P', ...]
participant_folders = sorted(
    [f for f in base_path.iterdir() if f.is_dir() and f.name.endswith("_P")]
)

print("Number of participants:", len(participant_folders))
print("Example folders:", [f.name for f in participant_folders[:5]])

Number of participants: 189
Example folders: ['300_P', '301_P', '302_P', '303_P', '304_P']


In [2]:
from pathlib import Path
import pandas as pd

pid = 300
folder = base_path / f"{pid}_P"
transcript_path = folder / f"{pid}_TRANSCRIPT.csv"

df_t = pd.read_csv(transcript_path, sep="\t")  # IMPORTANT
print("Columns in transcript file:", df_t.columns.tolist())
df_t.head()

Columns in transcript file: ['start_time', 'stop_time', 'speaker', 'value']


Unnamed: 0,start_time,stop_time,speaker,value
0,36.588,39.668,Ellie,hi i'm ellie thanks for coming in today
1,39.888,43.378,Ellie,i was created to talk to people in a safe and ...
2,43.728,48.498,Ellie,think of me as a friend i don't judge i can't ...
3,49.188,52.388,Ellie,i'm here to learn about people and would love ...
4,52.658,58.958,Ellie,i'll ask a few questions to get us started and...


In [3]:
import librosa
import numpy as np
import pandas as pd

def load_audio_and_clean(pid, sr_target=None):
    """
    Loads original audio AND cleaned (no Ellie) audio.
    Prints both durations.
    Returns: y_full, y_clean, sr
    """
    folder = base_path / f"{pid}_P"
    audio_path = folder / f"{pid}_AUDIO.wav"
    transcript_path = folder / f"{pid}_TRANSCRIPT.csv"

    # Load original audio
    y_full, sr = librosa.load(audio_path, sr=sr_target)
    full_duration = len(y_full) / sr

    # Load transcript (tab-separated)
    df_t = pd.read_csv(transcript_path, sep="\t")
    
    # Ensure numeric times
    df_t["start_time"] = pd.to_numeric(df_t["start_time"], errors="coerce")
    df_t["stop_time"]  = pd.to_numeric(df_t["stop_time"],  errors="coerce")

    keep_segments = []

    for _, row in df_t.iterrows():
        speaker = str(row["speaker"]).lower()

        # Keep only participant speech
        if "ellie" in speaker:
            continue

        start_sec = float(row["start_time"])
        end_sec   = float(row["stop_time"])
        if np.isnan(start_sec) or np.isnan(end_sec):
            continue

        start_idx = int(start_sec * sr)
        end_idx   = int(end_sec * sr)

        # Clip to audio boundaries
        start_idx = max(0, min(start_idx, len(y_full)))
        end_idx   = max(0, min(end_idx, len(y_full)))

        if end_idx > start_idx:
            keep_segments.append(y_full[start_idx:end_idx])

    # Concatenate kept segments
    if keep_segments:
        y_clean = np.concatenate(keep_segments)
    else:
        y_clean = y_full  # fallback

    clean_duration = len(y_clean) / sr

    # Print both lengths
    print(f"Participant {pid}:")
    print(f" - Original audio length: {full_duration:.2f} seconds")
    print(f" - Cleaned audio length (no Ellie): {clean_duration:.2f} seconds")

    return y_full, y_clean, sr




### Summarize Feature Matrix.

In [4]:
import numpy as np
import pandas as pd

def summarize_feature_matrix(mat, prefix):
    """
    mat: 2D numpy array (n_features x n_frames)
    prefix: string for column names, e.g. 'mfcc', 'delta', 'delta2'
    
    Returns: pandas.Series with stats for each row:
             {prefix}_{i}_mean, _std, _min, _max
    """
    stats = {}
    
    n_features = mat.shape[0]
    for i in range(n_features):
        row = mat[i, :]
        row = row[np.isfinite(row)]  # just in case

        if row.size == 0:
            mean = std = rmin = rmax = np.nan
        else:
            mean = row.mean()
            std  = row.std()
            rmin = row.min()
            rmax = row.max()
        
        stats[f"{prefix}_{i+1}_mean"] = mean
        stats[f"{prefix}_{i+1}_std"]  = std
        stats[f"{prefix}_{i+1}_min"]  = rmin
        stats[f"{prefix}_{i+1}_max"]  = rmax

    return pd.Series(stats)

In [6]:
### LogMel Spectogram Features.

In [7]:
import librosa
import numpy as np
import pandas as pd

def extract_logmel_family_features(y, sr, n_mels=64, prefix="logmel"):
    """
    Extract Log-Mel Spectrogram + Delta + Delta-Delta
    Then summarize each into statistical features.

    Returns:
        pandas.Series (one row of features)
    """

    feats = {}

    # -------------------------------------------------
    # 1. Mel-Spectrogram (power)
    # -------------------------------------------------
    mel = librosa.feature.melspectrogram(
        y=y,
        sr=sr,
        n_mels=n_mels,
        power=2.0     # energy spectrogram
    )

    # Convert to log-mel
    logmel = librosa.power_to_db(mel, ref=np.max)

    mel_stats = summarize_feature_matrix(logmel, prefix=f"{prefix}")
    feats.update(mel_stats.to_dict())

    # -------------------------------------------------
    # 2. Delta (1st-order derivative)
    # -------------------------------------------------
    mel_delta = librosa.feature.delta(logmel)
    mel_delta_stats = summarize_feature_matrix(mel_delta, prefix=f"{prefix}_delta")
    feats.update(mel_delta_stats.to_dict())

    # -------------------------------------------------
    # 3. Delta-Delta (2nd-order derivative)
    # -------------------------------------------------
    mel_delta2 = librosa.feature.delta(logmel, order=2)
    mel_delta2_stats = summarize_feature_matrix(mel_delta2, prefix=f"{prefix}_delta2")
    feats.update(mel_delta2_stats.to_dict())

    return pd.Series(feats)


In [8]:
pid = 300
y_full, y_clean, sr = load_audio_and_clean(pid)

logmel_series = extract_logmel_family_features(y_clean, sr)
logmel_series["Participant_ID"] = pid

df_logmel = logmel_series.to_frame().T

print("Log-Mel DF shape:", df_logmel.shape)
display(df_logmel)


Participant 300:
 - Original audio length: 648.50 seconds
 - Cleaned audio length (no Ellie): 155.76 seconds
Log-Mel DF shape: (1, 769)


Unnamed: 0,logmel_1_mean,logmel_1_std,logmel_1_min,logmel_1_max,logmel_2_mean,logmel_2_std,logmel_2_min,logmel_2_max,logmel_3_mean,logmel_3_std,...,logmel_delta2_62_max,logmel_delta2_63_mean,logmel_delta2_63_std,logmel_delta2_63_min,logmel_delta2_63_max,logmel_delta2_64_mean,logmel_delta2_64_std,logmel_delta2_64_min,logmel_delta2_64_max,Participant_ID
0,-39.851742,5.740917,-50.290749,-10.951934,-36.666473,9.424625,-59.953186,-1.421984,-32.704754,10.618219,...,2.475754,9.5e-05,0.306427,-2.576145,2.543875,1.9e-05,0.286238,-2.665952,2.558672,300.0


In [10]:
from pathlib import Path
import pandas as pd

# ---- 1. Get all participant IDs ----
participant_ids = sorted([
    int(f.name.split("_")[0])
    for f in base_path.iterdir()
    if f.is_dir() and f.name.endswith("_P")
])

print("Total participants detected:", len(participant_ids))
print("First 10 participants:", participant_ids[:10])

Total participants detected: 189
First 10 participants: [300, 301, 302, 303, 304, 305, 306, 307, 308, 309]


In [12]:
# ---- 2. Prepare lists for each DF ----
logmel_rows     = []
# ---- 3. Loop over participants ----
for pid in participant_ids:
    print(f"\nProcessing Participant: {pid}")

    # Load full + cleaned audio
    y_full, y_clean, sr = load_audio_and_clean(pid)

    # 1) MFCC Family
    s_logmel = extract_logmel_family_features(y_clean, sr)
    s_logmel["Participant_ID"] = pid
    logmel_rows.append(s_logmel)


Processing Participant: 300
Participant 300:
 - Original audio length: 648.50 seconds
 - Cleaned audio length (no Ellie): 155.76 seconds

Processing Participant: 301
Participant 301:
 - Original audio length: 823.90 seconds
 - Cleaned audio length (no Ellie): 475.44 seconds

Processing Participant: 302
Participant 302:
 - Original audio length: 758.80 seconds
 - Cleaned audio length (no Ellie): 208.93 seconds

Processing Participant: 303
Participant 303:
 - Original audio length: 985.30 seconds
 - Cleaned audio length (no Ellie): 642.93 seconds

Processing Participant: 304
Participant 304:
 - Original audio length: 792.60 seconds
 - Cleaned audio length (no Ellie): 362.60 seconds

Processing Participant: 305
Participant 305:
 - Original audio length: 1704.00 seconds
 - Cleaned audio length (no Ellie): 1118.49 seconds

Processing Participant: 306
Participant 306:
 - Original audio length: 858.10 seconds
 - Cleaned audio length (no Ellie): 509.37 seconds

Processing Participant: 307
Par

In [13]:
import pandas as pd
from pathlib import Path

root = Path(r"C:\Users\DELL\Desktop\Conversational-Health-Analytics-")
dataset_folder = root / "Dataset"

DF_TRAIN = pd.read_csv(dataset_folder / "train_split_Depression_AVEC2017.csv")
DF_DEV   = pd.read_csv(dataset_folder / "dev_split_Depression_AVEC2017.csv")
DF_TEST  = pd.read_csv(dataset_folder / "full_test_split.csv")

In [14]:
df_logmel_all = pd.DataFrame(logmel_rows)

In [21]:
# Keep only needed label columns
label_cols = ["Participant_ID", "PHQ8_Score", "PHQ8_Binary", "Gender"]

df_train_lbl = DF_TRAIN[label_cols]
df_dev_lbl   = DF_DEV[label_cols]
df_test_lbl  = DF_TEST[label_cols]

df_labels = (
    pd.concat([df_train_lbl, df_dev_lbl, df_test_lbl], axis=0)
      .drop_duplicates(subset="Participant_ID")
      .reset_index(drop=True)
)

# Merge MFCC features with labels
logmel_with_labels = df_logmel_all.merge(df_labels, on="Participant_ID", how="inner")
print("MFCC+Labels shape:", logmel_with_labels.shape)
logmel_with_labels.head(2)

MFCC+Labels shape: (189, 772)


Unnamed: 0,logmel_1_mean,logmel_1_std,logmel_1_min,logmel_1_max,logmel_2_mean,logmel_2_std,logmel_2_min,logmel_2_max,logmel_3_mean,logmel_3_std,logmel_3_min,logmel_3_max,logmel_4_mean,logmel_4_std,logmel_4_min,logmel_4_max,logmel_5_mean,logmel_5_std,logmel_5_min,logmel_5_max,logmel_6_mean,logmel_6_std,logmel_6_min,logmel_6_max,logmel_7_mean,logmel_7_std,logmel_7_min,logmel_7_max,logmel_8_mean,logmel_8_std,logmel_8_min,logmel_8_max,logmel_9_mean,logmel_9_std,logmel_9_min,logmel_9_max,logmel_10_mean,logmel_10_std,logmel_10_min,logmel_10_max,logmel_11_mean,logmel_11_std,logmel_11_min,logmel_11_max,logmel_12_mean,logmel_12_std,logmel_12_min,logmel_12_max,logmel_13_mean,logmel_13_std,logmel_13_min,logmel_13_max,logmel_14_mean,logmel_14_std,logmel_14_min,logmel_14_max,logmel_15_mean,logmel_15_std,logmel_15_min,logmel_15_max,logmel_16_mean,logmel_16_std,logmel_16_min,logmel_16_max,logmel_17_mean,logmel_17_std,logmel_17_min,logmel_17_max,logmel_18_mean,logmel_18_std,logmel_18_min,logmel_18_max,logmel_19_mean,logmel_19_std,logmel_19_min,logmel_19_max,logmel_20_mean,logmel_20_std,logmel_20_min,logmel_20_max,logmel_21_mean,logmel_21_std,logmel_21_min,logmel_21_max,logmel_22_mean,logmel_22_std,logmel_22_min,logmel_22_max,logmel_23_mean,logmel_23_std,logmel_23_min,logmel_23_max,logmel_24_mean,logmel_24_std,logmel_24_min,logmel_24_max,logmel_25_mean,logmel_25_std,logmel_25_min,logmel_25_max,logmel_26_mean,logmel_26_std,logmel_26_min,logmel_26_max,logmel_27_mean,logmel_27_std,logmel_27_min,logmel_27_max,logmel_28_mean,logmel_28_std,logmel_28_min,logmel_28_max,logmel_29_mean,logmel_29_std,logmel_29_min,logmel_29_max,logmel_30_mean,logmel_30_std,logmel_30_min,logmel_30_max,logmel_31_mean,logmel_31_std,logmel_31_min,logmel_31_max,logmel_32_mean,logmel_32_std,logmel_32_min,logmel_32_max,logmel_33_mean,logmel_33_std,logmel_33_min,logmel_33_max,logmel_34_mean,logmel_34_std,logmel_34_min,logmel_34_max,logmel_35_mean,logmel_35_std,logmel_35_min,logmel_35_max,logmel_36_mean,logmel_36_std,logmel_36_min,logmel_36_max,logmel_37_mean,logmel_37_std,logmel_37_min,logmel_37_max,logmel_38_mean,logmel_38_std,logmel_38_min,logmel_38_max,logmel_39_mean,logmel_39_std,logmel_39_min,logmel_39_max,logmel_40_mean,logmel_40_std,logmel_40_min,logmel_40_max,logmel_41_mean,logmel_41_std,logmel_41_min,logmel_41_max,logmel_42_mean,logmel_42_std,logmel_42_min,logmel_42_max,logmel_43_mean,logmel_43_std,logmel_43_min,logmel_43_max,logmel_44_mean,logmel_44_std,logmel_44_min,logmel_44_max,logmel_45_mean,logmel_45_std,logmel_45_min,logmel_45_max,logmel_46_mean,logmel_46_std,logmel_46_min,logmel_46_max,logmel_47_mean,logmel_47_std,logmel_47_min,logmel_47_max,logmel_48_mean,logmel_48_std,logmel_48_min,logmel_48_max,logmel_49_mean,logmel_49_std,logmel_49_min,logmel_49_max,logmel_50_mean,logmel_50_std,logmel_50_min,logmel_50_max,logmel_51_mean,logmel_51_std,logmel_51_min,logmel_51_max,logmel_52_mean,logmel_52_std,logmel_52_min,logmel_52_max,logmel_53_mean,logmel_53_std,logmel_53_min,logmel_53_max,logmel_54_mean,logmel_54_std,logmel_54_min,logmel_54_max,logmel_55_mean,logmel_55_std,logmel_55_min,logmel_55_max,logmel_56_mean,logmel_56_std,logmel_56_min,logmel_56_max,logmel_57_mean,logmel_57_std,logmel_57_min,logmel_57_max,logmel_58_mean,logmel_58_std,logmel_58_min,logmel_58_max,logmel_59_mean,logmel_59_std,logmel_59_min,logmel_59_max,logmel_60_mean,logmel_60_std,logmel_60_min,logmel_60_max,logmel_61_mean,logmel_61_std,logmel_61_min,logmel_61_max,logmel_62_mean,logmel_62_std,logmel_62_min,logmel_62_max,logmel_63_mean,logmel_63_std,logmel_63_min,logmel_63_max,logmel_64_mean,logmel_64_std,logmel_64_min,logmel_64_max,logmel_delta_1_mean,logmel_delta_1_std,logmel_delta_1_min,logmel_delta_1_max,logmel_delta_2_mean,logmel_delta_2_std,logmel_delta_2_min,logmel_delta_2_max,logmel_delta_3_mean,logmel_delta_3_std,logmel_delta_3_min,logmel_delta_3_max,logmel_delta_4_mean,logmel_delta_4_std,logmel_delta_4_min,logmel_delta_4_max,logmel_delta_5_mean,logmel_delta_5_std,logmel_delta_5_min,logmel_delta_5_max,logmel_delta_6_mean,logmel_delta_6_std,logmel_delta_6_min,logmel_delta_6_max,logmel_delta_7_mean,logmel_delta_7_std,logmel_delta_7_min,logmel_delta_7_max,logmel_delta_8_mean,logmel_delta_8_std,logmel_delta_8_min,logmel_delta_8_max,logmel_delta_9_mean,logmel_delta_9_std,logmel_delta_9_min,logmel_delta_9_max,logmel_delta_10_mean,logmel_delta_10_std,logmel_delta_10_min,logmel_delta_10_max,logmel_delta_11_mean,logmel_delta_11_std,logmel_delta_11_min,logmel_delta_11_max,logmel_delta_12_mean,logmel_delta_12_std,logmel_delta_12_min,logmel_delta_12_max,logmel_delta_13_mean,logmel_delta_13_std,logmel_delta_13_min,logmel_delta_13_max,logmel_delta_14_mean,logmel_delta_14_std,logmel_delta_14_min,logmel_delta_14_max,logmel_delta_15_mean,logmel_delta_15_std,logmel_delta_15_min,logmel_delta_15_max,logmel_delta_16_mean,logmel_delta_16_std,logmel_delta_16_min,logmel_delta_16_max,logmel_delta_17_mean,logmel_delta_17_std,logmel_delta_17_min,logmel_delta_17_max,logmel_delta_18_mean,logmel_delta_18_std,logmel_delta_18_min,logmel_delta_18_max,logmel_delta_19_mean,logmel_delta_19_std,logmel_delta_19_min,logmel_delta_19_max,logmel_delta_20_mean,logmel_delta_20_std,logmel_delta_20_min,logmel_delta_20_max,logmel_delta_21_mean,logmel_delta_21_std,logmel_delta_21_min,logmel_delta_21_max,logmel_delta_22_mean,logmel_delta_22_std,logmel_delta_22_min,logmel_delta_22_max,logmel_delta_23_mean,logmel_delta_23_std,logmel_delta_23_min,logmel_delta_23_max,logmel_delta_24_mean,logmel_delta_24_std,logmel_delta_24_min,logmel_delta_24_max,logmel_delta_25_mean,logmel_delta_25_std,logmel_delta_25_min,logmel_delta_25_max,logmel_delta_26_mean,logmel_delta_26_std,logmel_delta_26_min,logmel_delta_26_max,logmel_delta_27_mean,logmel_delta_27_std,logmel_delta_27_min,logmel_delta_27_max,logmel_delta_28_mean,logmel_delta_28_std,logmel_delta_28_min,logmel_delta_28_max,logmel_delta_29_mean,logmel_delta_29_std,logmel_delta_29_min,logmel_delta_29_max,logmel_delta_30_mean,logmel_delta_30_std,logmel_delta_30_min,logmel_delta_30_max,logmel_delta_31_mean,logmel_delta_31_std,logmel_delta_31_min,logmel_delta_31_max,logmel_delta_32_mean,logmel_delta_32_std,logmel_delta_32_min,logmel_delta_32_max,logmel_delta_33_mean,logmel_delta_33_std,logmel_delta_33_min,logmel_delta_33_max,logmel_delta_34_mean,logmel_delta_34_std,logmel_delta_34_min,logmel_delta_34_max,logmel_delta_35_mean,logmel_delta_35_std,logmel_delta_35_min,logmel_delta_35_max,logmel_delta_36_mean,logmel_delta_36_std,logmel_delta_36_min,logmel_delta_36_max,logmel_delta_37_mean,logmel_delta_37_std,logmel_delta_37_min,logmel_delta_37_max,logmel_delta_38_mean,logmel_delta_38_std,logmel_delta_38_min,logmel_delta_38_max,logmel_delta_39_mean,logmel_delta_39_std,logmel_delta_39_min,logmel_delta_39_max,logmel_delta_40_mean,logmel_delta_40_std,logmel_delta_40_min,logmel_delta_40_max,logmel_delta_41_mean,logmel_delta_41_std,logmel_delta_41_min,logmel_delta_41_max,logmel_delta_42_mean,logmel_delta_42_std,logmel_delta_42_min,logmel_delta_42_max,logmel_delta_43_mean,logmel_delta_43_std,logmel_delta_43_min,logmel_delta_43_max,logmel_delta_44_mean,logmel_delta_44_std,logmel_delta_44_min,logmel_delta_44_max,logmel_delta_45_mean,logmel_delta_45_std,logmel_delta_45_min,logmel_delta_45_max,logmel_delta_46_mean,logmel_delta_46_std,logmel_delta_46_min,logmel_delta_46_max,logmel_delta_47_mean,logmel_delta_47_std,logmel_delta_47_min,logmel_delta_47_max,logmel_delta_48_mean,logmel_delta_48_std,logmel_delta_48_min,logmel_delta_48_max,logmel_delta_49_mean,logmel_delta_49_std,logmel_delta_49_min,logmel_delta_49_max,logmel_delta_50_mean,logmel_delta_50_std,logmel_delta_50_min,logmel_delta_50_max,logmel_delta_51_mean,logmel_delta_51_std,logmel_delta_51_min,logmel_delta_51_max,logmel_delta_52_mean,logmel_delta_52_std,logmel_delta_52_min,logmel_delta_52_max,logmel_delta_53_mean,logmel_delta_53_std,logmel_delta_53_min,logmel_delta_53_max,logmel_delta_54_mean,logmel_delta_54_std,logmel_delta_54_min,logmel_delta_54_max,logmel_delta_55_mean,logmel_delta_55_std,logmel_delta_55_min,logmel_delta_55_max,logmel_delta_56_mean,logmel_delta_56_std,logmel_delta_56_min,logmel_delta_56_max,logmel_delta_57_mean,logmel_delta_57_std,logmel_delta_57_min,logmel_delta_57_max,logmel_delta_58_mean,logmel_delta_58_std,logmel_delta_58_min,logmel_delta_58_max,logmel_delta_59_mean,logmel_delta_59_std,logmel_delta_59_min,logmel_delta_59_max,logmel_delta_60_mean,logmel_delta_60_std,logmel_delta_60_min,logmel_delta_60_max,logmel_delta_61_mean,logmel_delta_61_std,logmel_delta_61_min,logmel_delta_61_max,logmel_delta_62_mean,logmel_delta_62_std,logmel_delta_62_min,logmel_delta_62_max,logmel_delta_63_mean,logmel_delta_63_std,logmel_delta_63_min,logmel_delta_63_max,logmel_delta_64_mean,logmel_delta_64_std,logmel_delta_64_min,logmel_delta_64_max,logmel_delta2_1_mean,logmel_delta2_1_std,logmel_delta2_1_min,logmel_delta2_1_max,logmel_delta2_2_mean,logmel_delta2_2_std,logmel_delta2_2_min,logmel_delta2_2_max,logmel_delta2_3_mean,logmel_delta2_3_std,logmel_delta2_3_min,logmel_delta2_3_max,logmel_delta2_4_mean,logmel_delta2_4_std,logmel_delta2_4_min,logmel_delta2_4_max,logmel_delta2_5_mean,logmel_delta2_5_std,logmel_delta2_5_min,logmel_delta2_5_max,logmel_delta2_6_mean,logmel_delta2_6_std,logmel_delta2_6_min,logmel_delta2_6_max,logmel_delta2_7_mean,logmel_delta2_7_std,logmel_delta2_7_min,logmel_delta2_7_max,logmel_delta2_8_mean,logmel_delta2_8_std,logmel_delta2_8_min,logmel_delta2_8_max,logmel_delta2_9_mean,logmel_delta2_9_std,logmel_delta2_9_min,logmel_delta2_9_max,logmel_delta2_10_mean,logmel_delta2_10_std,logmel_delta2_10_min,logmel_delta2_10_max,logmel_delta2_11_mean,logmel_delta2_11_std,logmel_delta2_11_min,logmel_delta2_11_max,logmel_delta2_12_mean,logmel_delta2_12_std,logmel_delta2_12_min,logmel_delta2_12_max,logmel_delta2_13_mean,logmel_delta2_13_std,logmel_delta2_13_min,logmel_delta2_13_max,logmel_delta2_14_mean,logmel_delta2_14_std,logmel_delta2_14_min,logmel_delta2_14_max,logmel_delta2_15_mean,logmel_delta2_15_std,logmel_delta2_15_min,logmel_delta2_15_max,logmel_delta2_16_mean,logmel_delta2_16_std,logmel_delta2_16_min,logmel_delta2_16_max,logmel_delta2_17_mean,logmel_delta2_17_std,logmel_delta2_17_min,logmel_delta2_17_max,logmel_delta2_18_mean,logmel_delta2_18_std,logmel_delta2_18_min,logmel_delta2_18_max,logmel_delta2_19_mean,logmel_delta2_19_std,logmel_delta2_19_min,logmel_delta2_19_max,logmel_delta2_20_mean,logmel_delta2_20_std,logmel_delta2_20_min,logmel_delta2_20_max,logmel_delta2_21_mean,logmel_delta2_21_std,logmel_delta2_21_min,logmel_delta2_21_max,logmel_delta2_22_mean,logmel_delta2_22_std,logmel_delta2_22_min,logmel_delta2_22_max,logmel_delta2_23_mean,logmel_delta2_23_std,logmel_delta2_23_min,logmel_delta2_23_max,logmel_delta2_24_mean,logmel_delta2_24_std,logmel_delta2_24_min,logmel_delta2_24_max,logmel_delta2_25_mean,logmel_delta2_25_std,logmel_delta2_25_min,logmel_delta2_25_max,logmel_delta2_26_mean,logmel_delta2_26_std,logmel_delta2_26_min,logmel_delta2_26_max,logmel_delta2_27_mean,logmel_delta2_27_std,logmel_delta2_27_min,logmel_delta2_27_max,logmel_delta2_28_mean,logmel_delta2_28_std,logmel_delta2_28_min,logmel_delta2_28_max,logmel_delta2_29_mean,logmel_delta2_29_std,logmel_delta2_29_min,logmel_delta2_29_max,logmel_delta2_30_mean,logmel_delta2_30_std,logmel_delta2_30_min,logmel_delta2_30_max,logmel_delta2_31_mean,logmel_delta2_31_std,logmel_delta2_31_min,logmel_delta2_31_max,logmel_delta2_32_mean,logmel_delta2_32_std,logmel_delta2_32_min,logmel_delta2_32_max,logmel_delta2_33_mean,logmel_delta2_33_std,logmel_delta2_33_min,logmel_delta2_33_max,logmel_delta2_34_mean,logmel_delta2_34_std,logmel_delta2_34_min,logmel_delta2_34_max,logmel_delta2_35_mean,logmel_delta2_35_std,logmel_delta2_35_min,logmel_delta2_35_max,logmel_delta2_36_mean,logmel_delta2_36_std,logmel_delta2_36_min,logmel_delta2_36_max,logmel_delta2_37_mean,logmel_delta2_37_std,logmel_delta2_37_min,logmel_delta2_37_max,logmel_delta2_38_mean,logmel_delta2_38_std,logmel_delta2_38_min,logmel_delta2_38_max,logmel_delta2_39_mean,logmel_delta2_39_std,logmel_delta2_39_min,logmel_delta2_39_max,logmel_delta2_40_mean,logmel_delta2_40_std,logmel_delta2_40_min,logmel_delta2_40_max,logmel_delta2_41_mean,logmel_delta2_41_std,logmel_delta2_41_min,logmel_delta2_41_max,logmel_delta2_42_mean,logmel_delta2_42_std,logmel_delta2_42_min,logmel_delta2_42_max,logmel_delta2_43_mean,logmel_delta2_43_std,logmel_delta2_43_min,logmel_delta2_43_max,logmel_delta2_44_mean,logmel_delta2_44_std,logmel_delta2_44_min,logmel_delta2_44_max,logmel_delta2_45_mean,logmel_delta2_45_std,logmel_delta2_45_min,logmel_delta2_45_max,logmel_delta2_46_mean,logmel_delta2_46_std,logmel_delta2_46_min,logmel_delta2_46_max,logmel_delta2_47_mean,logmel_delta2_47_std,logmel_delta2_47_min,logmel_delta2_47_max,logmel_delta2_48_mean,logmel_delta2_48_std,logmel_delta2_48_min,logmel_delta2_48_max,logmel_delta2_49_mean,logmel_delta2_49_std,logmel_delta2_49_min,logmel_delta2_49_max,logmel_delta2_50_mean,logmel_delta2_50_std,logmel_delta2_50_min,logmel_delta2_50_max,logmel_delta2_51_mean,logmel_delta2_51_std,logmel_delta2_51_min,logmel_delta2_51_max,logmel_delta2_52_mean,logmel_delta2_52_std,logmel_delta2_52_min,logmel_delta2_52_max,logmel_delta2_53_mean,logmel_delta2_53_std,logmel_delta2_53_min,logmel_delta2_53_max,logmel_delta2_54_mean,logmel_delta2_54_std,logmel_delta2_54_min,logmel_delta2_54_max,logmel_delta2_55_mean,logmel_delta2_55_std,logmel_delta2_55_min,logmel_delta2_55_max,logmel_delta2_56_mean,logmel_delta2_56_std,logmel_delta2_56_min,logmel_delta2_56_max,logmel_delta2_57_mean,logmel_delta2_57_std,logmel_delta2_57_min,logmel_delta2_57_max,logmel_delta2_58_mean,logmel_delta2_58_std,logmel_delta2_58_min,logmel_delta2_58_max,logmel_delta2_59_mean,logmel_delta2_59_std,logmel_delta2_59_min,logmel_delta2_59_max,logmel_delta2_60_mean,logmel_delta2_60_std,logmel_delta2_60_min,logmel_delta2_60_max,logmel_delta2_61_mean,logmel_delta2_61_std,logmel_delta2_61_min,logmel_delta2_61_max,logmel_delta2_62_mean,logmel_delta2_62_std,logmel_delta2_62_min,logmel_delta2_62_max,logmel_delta2_63_mean,logmel_delta2_63_std,logmel_delta2_63_min,logmel_delta2_63_max,logmel_delta2_64_mean,logmel_delta2_64_std,logmel_delta2_64_min,logmel_delta2_64_max,Participant_ID,PHQ8_Score,PHQ8_Binary,Gender
0,-39.851742,5.740917,-50.290749,-10.951934,-36.666473,9.424625,-59.953186,-1.421984,-32.704754,10.618219,-62.922821,0.0,-38.444298,11.59189,-58.93544,-8.010373,-36.046009,10.111912,-56.211853,-5.029753,-39.541901,10.223858,-58.508698,-10.306637,-42.335514,10.076996,-60.252258,-9.60392,-41.445229,10.693617,-63.525314,-5.730236,-43.900616,11.855015,-67.004128,-11.83223,-42.292408,11.916592,-66.646904,-9.608103,-41.670689,12.271562,-68.370148,-12.0534,-42.511345,12.572989,-66.612236,-10.799599,-42.740101,12.980083,-67.882423,-9.160012,-45.265316,12.594672,-67.811661,-7.733182,-47.281605,11.568984,-68.004677,-11.218313,-48.010006,12.18061,-71.816017,-11.783464,-47.378807,12.451192,-68.53788,-11.24302,-46.011837,11.511155,-67.631721,-9.824169,-49.009212,11.873863,-69.166443,-13.946668,-50.276794,11.378053,-69.881828,-12.796806,-50.927895,10.84037,-69.346687,-12.481324,-52.884293,12.025846,-72.168762,-14.247681,-53.218559,11.982041,-73.972351,-13.800469,-54.23177,11.837197,-71.576859,-12.470656,-53.495396,11.753271,-73.183128,-13.771776,-53.680553,12.22415,-73.298363,-15.113614,-52.546642,11.813158,-72.904457,-14.655323,-52.808094,11.954452,-72.943039,-13.835618,-52.31179,11.870535,-73.768715,-14.588394,-52.963558,12.075062,-74.416367,-15.850573,-52.874767,11.815126,-73.040245,-16.444122,-53.565666,11.943626,-74.042282,-15.904652,-54.37471,11.296093,-71.887871,-16.77697,-54.550739,10.918699,-71.44416,-15.308524,-54.984581,10.772046,-71.352737,-15.457122,-56.24448,10.918089,-73.01564,-15.980874,-56.773766,11.010494,-73.683762,-16.820917,-57.273552,11.115426,-74.542061,-16.137234,-57.509197,11.113318,-75.323387,-17.903858,-57.050003,10.618951,-71.593445,-17.023657,-57.510437,10.572404,-71.803375,-17.253759,-58.951881,10.849917,-73.921524,-18.085953,-60.176651,10.739304,-74.026939,-19.195347,-60.826412,10.755725,-75.944672,-18.62545,-61.175663,10.812799,-75.968025,-19.956989,-61.384243,10.738929,-76.493614,-20.275364,-60.993401,10.665506,-77.195732,-20.189339,-60.535217,10.655331,-75.906479,-19.903004,-60.520363,10.622469,-76.39798,-20.415352,-61.342403,10.526973,-78.149544,-21.180485,-62.204151,10.467043,-77.389656,-21.620869,-62.564285,10.434353,-76.813248,-21.745163,-62.878166,10.45971,-76.082756,-22.319384,-62.978508,10.357654,-76.456497,-22.547447,-63.112072,10.343971,-76.297241,-22.966291,-63.486366,10.304195,-75.965256,-23.189812,-63.596149,10.184913,-76.479034,-24.083778,-63.82444,10.082624,-76.345062,-25.050465,-64.186752,9.993088,-77.040321,-24.264339,-65.032982,9.917229,-77.983513,-26.151478,-66.297325,9.824694,-78.763115,-27.643932,-68.13102,9.741109,-80.0,-30.459169,-70.547966,9.589478,-80.0,-32.264328,-73.406082,9.107372,-80.0,-36.631458,0.000655,0.562481,-3.408246,3.87416,-0.002138,1.157037,-7.427499,4.393394,-0.002196,1.563738,-7.614388,6.485934,-0.004703,1.774328,-5.270807,6.553293,-0.005857,1.485678,-6.564243,6.117229,-0.003163,1.479937,-6.133159,6.00471,-0.000574,1.438803,-6.227189,5.358456,-0.001174,1.57352,-6.421951,5.685359,-0.005834,1.789931,-6.477557,7.043877,-0.007305,1.88595,-7.837217,6.395647,-0.004754,1.993862,-7.865154,6.964901,-0.0019,2.117957,-7.358836,7.410278,-0.002284,2.179384,-7.329955,8.069315,-0.004289,2.040538,-7.026336,8.059722,-0.005131,1.773072,-6.830982,7.075618,-0.004875,1.877056,-7.175396,7.275594,-0.001989,2.057817,-6.860421,7.416581,-0.001125,1.807707,-6.915091,7.225881,-0.002079,1.813398,-6.688712,7.002263,-0.002742,1.645075,-6.821152,6.407463,-0.002437,1.434233,-6.015069,6.505145,-0.002017,1.662543,-6.999271,6.295094,-0.001723,1.638962,-7.154384,6.782696,-0.001144,1.588885,-6.266981,6.293672,-0.001879,1.601522,-6.473611,6.402759,-0.002838,1.656332,-6.594781,6.486707,-0.002557,1.613482,-6.88164,6.627349,-0.003058,1.730491,-6.631011,6.73987,-0.003688,1.703441,-6.295053,7.082738,-0.004345,1.78008,-6.835969,7.010891,-0.00427,1.744458,-7.451605,6.658376,-0.003556,1.762355,-7.571341,6.864086,-0.00229,1.570089,-7.577459,6.656305,-0.000622,1.477519,-7.149912,6.90031,-0.000888,1.368096,-7.237137,6.845982,-0.000873,1.308669,-7.479065,6.268087,-0.001115,1.283352,-7.625987,6.27949,-0.002286,1.285248,-7.541911,6.190585,-0.002239,1.295134,-7.116451,6.531188,-0.002559,1.247369,-6.744134,6.399222,-0.002165,1.205066,-7.381474,6.430018,-0.001336,1.163207,-7.706937,6.391137,-0.001866,1.025413,-7.720008,6.195965,-0.001647,0.960673,-7.996519,6.27416,-0.001929,0.950841,-7.780514,5.819147,-0.001602,0.944503,-7.459291,5.938414,-0.001852,1.017366,-7.343104,6.055673,-0.002281,1.091199,-7.407117,5.832691,-0.002916,1.084247,-7.566036,6.007954,-0.002862,0.959336,-7.554816,6.03597,-0.002173,0.867211,-7.191154,5.946761,-0.002546,0.833454,-7.250513,5.929493,-0.001645,0.804061,-7.320018,6.10933,-0.001797,0.76969,-7.407846,5.547441,-0.001684,0.72048,-7.564429,5.553703,-0.001845,0.687834,-7.141067,5.753775,-0.001488,0.661736,-7.060959,5.318386,-0.001804,0.659297,-7.28023,5.303634,-0.001864,0.651912,-7.263845,5.314815,-0.001654,0.65076,-6.781481,5.343369,-0.001644,0.648088,-7.009334,5.261159,-0.001806,0.629735,-6.902781,5.289543,-0.002112,0.632005,-6.657485,5.10376,-0.001284,0.596797,-6.718698,4.89314,-0.00011,0.345677,-1.843542,1.729448,0.000704,0.595834,-2.975336,3.231094,0.000254,0.822058,-3.895775,4.39553,-0.002048,0.927267,-3.755056,4.226347,-0.002085,0.789995,-3.390566,3.323174,-0.002107,0.772569,-3.253763,2.809123,-0.000481,0.778983,-3.47996,4.298919,-0.000584,0.822063,-4.269476,4.351273,-0.002123,0.969448,-4.377288,3.603624,-0.002748,1.033088,-4.718294,4.196088,-0.002992,1.094983,-3.798604,5.761508,-0.001739,1.167877,-3.964405,6.079978,-0.001491,1.175475,-4.539234,4.157409,-0.00278,1.13108,-4.918217,4.16978,-0.003961,0.994498,-4.181981,5.647631,-0.004254,1.032044,-4.464016,5.314181,-0.003117,1.153215,-4.551128,4.369111,-0.001953,1.021063,-4.360654,4.504696,-0.00221,1.038282,-4.27176,4.529875,-0.001608,0.912826,-4.141519,4.504786,-0.001514,0.772611,-3.73266,3.843079,-0.001881,0.924066,-4.083868,4.409594,-0.001645,0.922713,-3.821781,5.324937,-0.002001,0.860745,-3.768911,4.391576,-0.001882,0.876277,-4.218463,3.854547,-0.002384,0.937947,-4.044465,4.788952,-0.00106,0.907104,-4.36791,4.761883,-0.001932,0.956815,-4.308661,3.945053,-0.003133,0.998583,-4.470064,3.904717,-0.003992,1.015091,-4.156464,3.759477,-0.002545,1.012325,-4.333559,4.07477,-0.00206,1.040694,-4.49176,4.460404,-0.001324,0.907613,-4.267463,3.624063,-0.000582,0.856946,-3.860828,3.675834,-0.000415,0.773285,-3.399029,3.413612,-0.000649,0.723855,-3.404227,3.679519,-0.000875,0.720833,-3.054933,3.524524,-0.000522,0.730972,-3.188881,3.893355,-0.000462,0.726457,-3.336474,4.051969,-0.00107,0.681175,-3.604669,3.904557,-0.001592,0.660269,-3.564255,3.93566,-0.0014,0.614451,-3.490843,3.397852,-0.001059,0.535764,-3.087711,3.517388,-0.000478,0.500531,-3.435765,3.61588,-0.000594,0.494188,-3.432452,3.305724,-0.000622,0.495186,-3.028486,3.050538,-0.000729,0.532017,-3.028147,3.187119,-0.001136,0.584926,-3.227058,3.181402,-0.001997,0.592864,-3.323748,3.677542,-0.000607,0.510747,-3.053588,3.885958,5.2e-05,0.458996,-3.340174,3.436489,-0.000264,0.431766,-3.310402,2.767556,-0.000199,0.406353,-3.22843,2.753009,0.000252,0.387721,-3.225262,2.993035,-0.000246,0.355671,-3.018779,3.07025,-0.000276,0.335354,-2.694158,2.694717,-0.000164,0.321524,-2.855458,2.891176,-0.000346,0.319489,-3.021807,2.88331,5.2e-05,0.306826,-2.962321,2.778399,1e-05,0.303709,-2.503587,2.453456,0.000122,0.306797,-2.745233,2.475453,-0.000183,0.299928,-2.774734,2.475754,9.5e-05,0.306427,-2.576145,2.543875,1.9e-05,0.286238,-2.665952,2.558672,300.0,2,0,1
1,-34.786011,3.358032,-44.879646,-23.219515,-29.060829,10.515779,-53.240997,-14.231997,-32.236298,9.939849,-58.014923,-7.619808,-30.143028,11.013476,-59.824051,-5.542061,-32.030411,10.867658,-56.220779,-5.682781,-36.031937,10.141639,-57.505447,-5.210167,-37.368904,10.451889,-60.692589,-6.297174,-37.039143,10.711411,-62.189766,-6.307545,-37.597626,11.782638,-63.909447,-4.699326,-37.692524,12.777701,-66.315414,-0.675177,-37.929832,12.641417,-66.395905,-5.825067,-38.256271,12.925496,-65.244629,-3.566361,-37.969402,13.204153,-65.99659,-3.508892,-39.766624,13.379935,-66.539505,-3.100595,-40.931011,13.47966,-68.71022,-2.193323,-41.629654,13.781569,-68.089371,0.0,-42.090237,13.090995,-68.711235,-0.828356,-43.468948,11.691388,-65.727646,-3.700083,-45.156689,11.324534,-66.018082,-3.624866,-46.497578,11.385719,-67.354744,-6.117212,-47.572041,11.342668,-70.620949,-7.512813,-49.335587,11.387427,-70.608612,-12.044534,-49.333294,11.282046,-70.263908,-12.796953,-49.398602,12.154994,-70.495506,-12.835116,-48.366116,12.351986,-70.989159,-9.560171,-48.93634,12.332754,-70.712029,-9.880013,-49.056435,12.477395,-73.122635,-10.610985,-49.910172,12.301474,-73.008713,-14.392643,-50.314922,11.809654,-71.761665,-14.649114,-51.125488,11.640377,-70.723656,-13.61887,-51.515751,11.153984,-72.092598,-17.473976,-51.772354,11.038113,-72.343536,-16.458107,-51.399204,10.185336,-70.009857,-15.131104,-51.75573,9.359445,-67.789101,-17.118204,-52.509068,9.556059,-70.676666,-16.595606,-53.855598,10.069683,-71.238205,-17.776957,-54.560001,10.134842,-71.749725,-20.10844,-55.250168,9.829446,-73.015129,-20.77285,-55.239037,9.026427,-70.633072,-21.527912,-55.028103,8.123954,-67.954964,-25.719372,-55.313007,8.616389,-69.742401,-24.426548,-56.088478,9.774688,-72.257614,-23.926105,-56.851929,9.395409,-72.176926,-21.414106,-58.241673,8.69665,-72.668411,-24.123726,-59.603909,8.208038,-73.148247,-30.235405,-60.435101,7.899898,-73.828934,-30.819963,-61.111877,8.243114,-74.451881,-25.614748,-61.265808,8.455508,-74.329124,-29.84235,-61.883801,8.366471,-74.622589,-30.562502,-62.607018,8.094605,-74.396042,-28.083597,-62.897217,8.021015,-74.431267,-30.542015,-63.521706,7.652587,-74.534409,-36.178806,-64.279633,6.722586,-73.844406,-35.815147,-65.958603,6.139832,-75.076149,-34.297066,-67.651169,5.122226,-75.780716,-34.722275,-67.525017,5.003294,-74.979637,-37.456642,-67.591476,5.315038,-74.065735,-36.723312,-67.53672,6.115058,-74.879196,-35.283356,-67.601959,6.731968,-74.53862,-33.450523,-68.407349,7.770634,-75.533417,-32.078804,-68.802116,8.560555,-77.04406,-30.55541,-69.85675,8.831872,-78.790161,-31.02492,-71.458382,8.791592,-80.0,-32.326187,-74.190773,8.507262,-80.0,-35.67342,-0.000228,0.547863,-2.556505,1.832141,-2.9e-05,1.75698,-4.81427,5.003709,-0.000415,1.66094,-5.925524,5.781195,-0.000512,1.862585,-6.024328,6.029046,-0.000302,1.793467,-5.278584,6.392132,-7e-05,1.704347,-5.363217,6.345536,-8.4e-05,1.774413,-5.572687,6.05115,-0.000395,1.855842,-5.847002,6.267033,-0.000236,2.049184,-6.466393,6.834742,-0.000694,2.209919,-6.793139,7.700038,-0.000594,2.213601,-6.573767,7.592107,-0.000374,2.291636,-6.831878,7.5797,-0.000381,2.374723,-6.616634,7.406949,-0.000229,2.428038,-7.055273,8.396403,0.000373,2.444839,-7.388039,8.32082,0.000959,2.507377,-7.034614,8.305781,0.000795,2.375421,-7.24679,7.78644,0.000932,2.129525,-6.75857,7.27807,0.000469,2.089232,-7.142992,7.870632,0.000207,2.070817,-6.892043,6.910765,-0.000405,2.083852,-6.622849,7.011225,-0.000345,2.09766,-6.675929,7.551085,-0.000474,2.074203,-6.550871,7.372426,-0.000357,2.221152,-6.615071,7.274904,-0.000229,2.23789,-6.537192,7.80313,-4.8e-05,2.220926,-6.222318,7.482855,0.000405,2.24175,-6.7307,7.288789,0.000639,2.240006,-6.95596,7.603156,0.000928,2.178485,-6.886145,7.335704,0.000854,2.150915,-6.337079,7.314126,0.000625,2.070173,-6.049308,7.040512,0.000866,2.028838,-6.318143,6.908805,0.000426,1.858191,-6.044658,6.170248,0.000265,1.703333,-5.638628,6.22979,0.000479,1.726096,-5.745324,6.755592,5.4e-05,1.821396,-5.911152,6.842749,-9.3e-05,1.840721,-6.364254,6.596788,0.000115,1.782385,-5.714539,6.363852,0.000147,1.636923,-5.455584,5.899785,0.000125,1.473573,-5.415421,5.104062,-0.000116,1.555818,-5.692486,5.770504,0.000358,1.761601,-5.939577,5.76361,0.000558,1.697675,-5.665002,6.085232,0.000325,1.570385,-5.482865,5.937726,0.000253,1.48076,-5.228698,5.579383,0.000185,1.428916,-5.150427,5.403829,0.000481,1.492057,-5.156378,6.056252,0.000475,1.538289,-5.237777,5.711278,0.000176,1.527723,-5.368091,5.434238,-8.1e-05,1.494664,-5.21835,5.515513,-6.7e-05,1.489729,-5.07567,5.318966,-9.8e-05,1.415119,-4.657671,4.828682,0.000144,1.260744,-4.418991,5.145064,7.2e-05,1.199044,-4.577263,5.329707,0.000205,1.01628,-4.557378,5.827478,0.000214,0.996999,-4.803331,5.10147,8.1e-05,1.062774,-5.125795,5.091107,0.000255,1.232479,-5.266501,5.797585,0.000161,1.367156,-5.708364,5.893638,0.000148,1.579173,-5.972969,6.483659,0.000215,1.734211,-6.216993,6.186902,7.2e-05,1.774907,-6.139166,6.597427,7.8e-05,1.740914,-5.999963,6.633176,5.6e-05,1.693182,-6.466531,6.65544,-0.000101,0.394958,-1.43269,1.38154,-0.001039,1.060117,-3.584485,3.756902,-0.000968,0.999497,-3.94193,3.818823,-0.000661,1.091842,-3.746035,3.870853,-0.000788,1.064669,-3.711049,3.971898,-0.000782,1.02984,-4.092535,3.676883,-0.000673,1.088073,-4.017005,4.318094,-0.000486,1.133479,-3.951298,4.004588,-0.000795,1.237488,-4.329955,4.369985,-0.001063,1.344791,-4.6419,5.033251,-0.001177,1.337909,-4.537592,4.647406,-0.001463,1.37567,-4.517756,5.162681,-0.001249,1.417925,-4.592708,5.099078,-0.001391,1.465734,-5.555953,5.111919,-0.001372,1.469957,-5.216434,4.723258,-0.001719,1.503461,-5.188988,4.803594,-0.001087,1.439228,-4.948042,4.742201,-0.001065,1.305017,-5.015997,4.531786,-0.000803,1.274912,-5.279198,4.069356,-0.000989,1.273132,-4.597051,4.375212,-0.001401,1.275513,-4.972398,4.261891,-0.001327,1.259231,-4.963903,4.662827,-0.001137,1.263639,-4.759346,4.290592,-0.001171,1.333262,-4.457909,4.381674,-0.001236,1.355035,-4.81438,4.427106,-0.001234,1.330216,-4.412353,4.642101,-0.001293,1.3431,-4.833606,4.633005,-0.001422,1.362715,-4.869596,4.560936,-0.000907,1.341451,-4.695038,4.386358,-0.000891,1.3449,-5.022954,4.836495,-0.000859,1.306571,-5.194457,4.529933,-0.000546,1.282336,-4.823397,4.260787,-0.000466,1.188221,-4.423777,4.050364,-0.000449,1.111436,-4.166761,3.7741,-0.000246,1.124503,-4.290976,3.802511,-0.000422,1.170747,-4.417428,4.094736,-0.00045,1.155017,-4.846234,4.354733,-0.00035,1.104621,-4.446986,3.87128,-0.000508,1.017771,-4.038434,3.801999,-0.000416,0.929988,-3.85168,3.456408,-0.000446,1.004248,-4.45926,3.916385,-0.000335,1.127811,-4.570722,4.010119,-0.000458,1.082036,-4.23334,3.70134,-0.000453,0.996795,-4.119242,3.638494,-0.000398,0.945697,-3.904773,3.602951,-0.00039,0.913312,-3.486021,3.554817,-0.000308,0.946981,-3.894907,3.592723,-0.000169,0.977045,-3.858791,3.592449,-0.000228,0.958078,-3.820132,3.575255,-0.000391,0.908841,-3.790657,3.245145,-0.000322,0.900029,-3.534322,3.377987,-0.000125,0.862609,-3.603699,3.156713,-0.000167,0.773314,-3.36165,3.10901,-0.000141,0.730576,-3.753596,3.315866,-0.000248,0.621822,-3.724786,3.102611,-0.000242,0.610294,-3.478741,2.882083,-0.000105,0.656715,-3.544268,2.965258,2.4e-05,0.76656,-3.985547,3.011336,8.5e-05,0.855687,-4.212993,3.461694,5.9e-05,0.981123,-4.566537,3.839639,4.1e-05,1.06948,-4.733699,3.998524,-1.2e-05,1.094735,-4.561327,4.156236,-3e-05,1.073933,-4.847902,4.199106,8e-06,1.054937,-4.998971,4.148556,301.0,3,0,1


In [22]:
train_ids = DF_TRAIN["Participant_ID"].unique()
dev_ids   = DF_DEV["Participant_ID"].unique()
test_ids  = DF_TEST["Participant_ID"].unique()

train_df = logmel_with_labels[logmel_with_labels["Participant_ID"].isin(train_ids)]
val_df   = logmel_with_labels[logmel_with_labels["Participant_ID"].isin(dev_ids)]
test_df  = logmel_with_labels[logmel_with_labels["Participant_ID"].isin(test_ids)]

print("Train:", train_df.shape, "Val:", val_df.shape, "Test:", test_df.shape)

Train: (107, 772) Val: (35, 772) Test: (47, 772)


In [23]:
exclude_cols = ["Participant_ID", "PHQ8_Score", "PHQ8_Binary", "Gender"]

feature_cols = [c for c in logmel_with_labels.columns if c not in exclude_cols]

X_train = train_df[feature_cols]
y_train = train_df["PHQ8_Score"]

X_val   = val_df[feature_cols]
y_val   = val_df["PHQ8_Score"]

X_test  = test_df[feature_cols]
y_test  = test_df["PHQ8_Score"]

print("Number of MFCC features:", len(feature_cols))

Number of MFCC features: 768


### Use Random Forest to Find the Most Important Features.

In [24]:
from sklearn.ensemble import RandomForestRegressor
import numpy as np

rf_for_fs = RandomForestRegressor(
    n_estimators=500,
    random_state=28,
    n_jobs=-1
)

rf_for_fs.fit(X_train, y_train)

importances = rf_for_fs.feature_importances_

In [44]:
top_k = 40  # or 50, or 30 â€” your choice

idx_sorted = np.argsort(importances)[::-1]  # descending
top_idx = idx_sorted[:top_k]

top_features = [feature_cols[i] for i in top_idx]

print("Top Logmel features (names):")
for f in top_features:
    print(f)

Top Logmel features (names):
logmel_43_std
logmel_delta2_5_min
logmel_delta_50_max
logmel_delta_27_std
logmel_delta_18_min
logmel_delta2_63_mean
logmel_delta2_26_std
logmel_7_mean
logmel_delta2_17_min
logmel_delta_39_min
logmel_delta_43_std
logmel_delta_4_std
logmel_delta_51_max
logmel_delta_23_mean
logmel_delta_49_max
logmel_delta2_2_min
logmel_delta2_25_max
logmel_delta_52_mean
logmel_42_std
logmel_delta_45_max
logmel_1_mean
logmel_delta2_49_std
logmel_delta_51_mean
logmel_delta_52_max
logmel_delta_27_min
logmel_delta_26_std
logmel_delta2_1_min
logmel_51_max
logmel_delta_60_mean
logmel_delta2_27_std
logmel_delta_24_std
logmel_delta_22_mean
logmel_delta2_4_std
logmel_delta2_7_min
logmel_delta2_11_max
logmel_delta_11_mean
logmel_delta_45_mean
logmel_52_max
logmel_2_mean
logmel_delta2_3_min


### Build reduced MFCC DataFrames using only selected features

In [33]:
# Reduced feature matrices
X_train_red = X_train[top_features].copy()
X_val_red   = X_val[top_features].copy()
X_test_red  = X_test[top_features].copy()

print("Reduced train shape:", X_train_red.shape)
print("Reduced val shape:", X_val_red.shape)
print("Reduced test shape:", X_test_red.shape)

Reduced train shape: (107, 40)
Reduced val shape: (35, 40)
Reduced test shape: (47, 40)


In [34]:
data_folder = root / "data"

logmel_reduced_all = logmel_with_labels[["Participant_ID", "PHQ8_Score"] + top_features]
logmel_reduced_all.to_csv(data_folder / "LIBROSA_LOGMEL_TOP40.csv", index=False)

print("Saved reduced MFCC feature set at:", data_folder / "LIBROSA_LOGMEL_TOP40.csv")

Saved reduced MFCC feature set at: C:\Users\DELL\Desktop\Conversational-Health-Analytics-\data\LIBROSA_LOGMEL_TOP40.csv


In [35]:
import pandas as pd
from pathlib import Path

data_path = Path(r"C:\Users\DELL\Desktop\Conversational-Health-Analytics-\data")

df_logmel_all = pd.read_csv(data_path / "LIBROSA_LOGMEL_TOP40.csv")
print(df_logmel_all.shape)
df_logmel_all.head()


(189, 42)


Unnamed: 0,Participant_ID,PHQ8_Score,logmel_43_std,logmel_delta2_5_min,logmel_delta_50_max,logmel_delta_27_std,logmel_delta_18_min,logmel_delta2_63_mean,logmel_delta2_26_std,logmel_7_mean,logmel_delta2_17_min,logmel_delta_39_min,logmel_delta_43_std,logmel_delta_4_std,logmel_delta_51_max,logmel_delta_23_mean,logmel_delta_49_max,logmel_delta2_2_min,logmel_delta2_25_max,logmel_delta_52_mean,logmel_42_std,logmel_delta_45_max,logmel_1_mean,logmel_delta2_49_std,logmel_delta_51_mean,logmel_delta_52_max,logmel_delta_27_min,logmel_delta_26_std,logmel_delta2_1_min,logmel_51_max,logmel_delta_60_mean,logmel_delta2_27_std,logmel_delta_24_std,logmel_delta_22_mean,logmel_delta2_4_std,logmel_delta2_7_min,logmel_delta2_11_max,logmel_delta_11_mean,logmel_delta_45_mean,logmel_52_max,logmel_2_mean,logmel_delta2_3_min
0,300.0,2,10.739304,-3.390566,6.03597,1.613482,-6.915091,9.477621e-05,0.937947,-42.335514,-4.551128,-7.116451,1.025413,1.774328,5.946761,-0.001723,6.007954,-2.975336,3.854547,-0.002546,10.849917,5.819147,-39.851742,0.592864,-0.002173,5.929493,-6.88164,1.656332,-1.843542,-21.620869,-0.001654,0.907104,1.588885,-0.002017,0.927267,-3.47996,5.761508,-0.004754,-0.001929,-21.745163,-36.666473,-3.895775
1,301.0,3,9.395409,-3.711049,5.515513,2.24175,-6.75857,-2.955596e-05,1.330216,-37.368904,-4.948042,-5.455584,1.697675,1.862585,5.318966,-0.000474,5.434238,-3.584485,4.427106,-9.8e-05,9.774688,5.579383,-34.786011,0.958078,-6.7e-05,4.828682,-6.7307,2.220926,-1.43269,-30.542015,0.000148,1.3431,2.221152,-0.000345,1.091842,-4.017005,4.647406,-0.000594,0.000253,-36.178806,-29.060829,-3.94193
2,302.0,4,5.783973,-3.374838,4.380237,1.681334,-5.304066,-0.0007131106,1.019084,-32.149399,-3.263655,-4.337917,1.031796,1.442157,4.33159,-0.003016,4.877388,-2.74017,3.888376,-0.001019,6.057352,4.227523,-26.297705,0.635646,-0.001046,4.160639,-5.12351,1.686172,-1.945076,-33.668652,-0.000683,1.037769,1.753835,-0.004311,0.878476,-3.209999,3.398974,-0.002828,-0.001782,-34.625015,-20.675009,-3.446138
3,303.0,0,8.490192,-3.621092,5.452006,2.10539,-7.10478,0.0002284568,1.305185,-39.742962,-4.837091,-6.417822,1.554623,1.787458,5.292192,-0.001742,5.712991,-2.230157,4.562909,-0.000424,8.184835,6.293627,-37.777328,0.779309,-0.000493,5.600675,-7.099382,2.164683,-2.539845,-32.798985,-0.00013,1.28823,2.169303,-0.001437,0.972501,-3.872371,4.57728,-0.001383,-0.000674,-32.301853,-44.292381,-3.12105
4,304.0,6,7.878151,-3.339752,4.832123,1.909255,-7.139767,2.419532e-10,1.134411,-47.634342,-5.684505,-5.69874,1.347867,1.569708,4.635792,0.000206,6.025497,-3.549773,4.249362,0.000207,7.97619,5.967069,-41.200623,0.69952,0.00053,4.726497,-6.308327,1.95024,-3.170179,-40.946724,0.000392,1.104353,1.936945,0.00047,0.846326,-4.459381,3.868308,0.000127,0.00038,-42.157715,-47.938351,-3.846102


### Feature Engineering.

### Handle Missing, Infinite, and Constant Features

In [49]:
import numpy as np

# Replace inf with NaN
X_train_red.replace([np.inf, -np.inf], np.nan, inplace=True)
X_val_red.replace([np.inf, -np.inf], np.nan, inplace=True)
X_test_red.replace([np.inf, -np.inf], np.nan, inplace=True)

# Fill NaN with column mean
X_train_red = X_train_red.fillna(X_train_red.mean())
X_val_red   = X_val_red.fillna(X_train_red.mean())
X_test_red  = X_test_red.fillna(X_train_red.mean())

# Remove constant columns
constant_cols = X_train_red.columns[X_train_red.nunique() == 1]
print("Removed constant features:", constant_cols.tolist())

X_train_red.drop(columns=constant_cols, inplace=True)
X_val_red.drop(columns=constant_cols, inplace=True)
X_test_red.drop(columns=constant_cols, inplace=True)


Removed constant features: []


### Outlier Detection & Removal (MFCC ranges vary widely).


In [50]:
Q1 = X_train_red.quantile(0.25)
Q3 = X_train_red.quantile(0.75)
IQR = Q3 - Q1

# Keep only non-outliers
mask = ~((X_train_red < (Q1 - 1.5 * IQR)) | (X_train_red > (Q3 + 1.5 * IQR))).any(axis=1)

X_train_red = X_train_red[mask]
y_train = y_train[mask]

### Feature Scaling.

In [51]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train_red)
X_val_scaled   = scaler.transform(X_val_red)
X_test_scaled  = scaler.transform(X_test_red)


### Train and Evaluate the Three Best Regression Models.

### Evaluation Function

In [52]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

def evaluate_model(model, X, y, name="Model"):
    preds = model.predict(X)
    mae  = mean_absolute_error(y, preds)
    mse  = mean_squared_error(y, preds)
    rmse = np.sqrt(mse)
    r2   = r2_score(y, preds)

    print(f"\n{name} Validation Performance")
    print("---------------------------------------")
    print("MAE :", mae)
    print("MSE :", mse)
    print("RMSE:", rmse)
    print("RÂ²  :", r2)

    return [mae, mse, rmse, r2]

### Train XGBoost (Best Model for AVEC-type datasets).

In [53]:
from xgboost import XGBRegressor

xgb = XGBRegressor(
    n_estimators=600,
    learning_rate=0.03,
    max_depth=4,
    subsample=0.9,
    colsample_bytree=0.9,
    reg_lambda=1.0,
    reg_alpha=0.0,
    random_state=28
)

xgb.fit(X_train_scaled, y_train)
xgb_results = evaluate_model(xgb, X_val_scaled, y_val, "XGBoost")


XGBoost Validation Performance
---------------------------------------
MAE : 6.062353610992432
MSE : 59.09469223022461
RMSE: 7.687307215808707
RÂ²  : -0.4007546901702881


### Train LightGBM (Fast and Excellent for MFCC features).

In [54]:
from lightgbm import LGBMRegressor

lgb = LGBMRegressor(
    n_estimators=800,
    learning_rate=0.03,
    num_leaves=50,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=28
)

lgb.fit(X_train_scaled, y_train)
lgb_results = evaluate_model(lgb, X_val_scaled, y_val, "LightGBM")

[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 28, number of used features: 0
[LightGBM] [Info] Start training from score 5.607143

LightGBM Validation Performance
---------------------------------------
MAE : 5.474489795918368
MSE : 45.505357142857136
RMSE: 6.745765867776403
RÂ²  : -0.07863898026315774




### Train Random Forest.

In [55]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(
    n_estimators=600,
    random_state=28
)

rf.fit(X_train_red, y_train)    # no scaling needed
rf_results = evaluate_model(rf, X_val_red, y_val, "Random Forest")


Random Forest Validation Performance
---------------------------------------
MAE : 5.8020476190476185
MSE : 49.94087738095238
RMSE: 7.066885974809016
RÂ²  : -0.18377660200593415
