### Set up paths and Participant list

In [18]:
from pathlib import Path
import pandas as pd
import numpy as np
import librosa

# Base folder with 300_P, 301_P, ...
base_path = Path(r"C:\Users\DELL\Desktop\Conversational-Health-Analytics-\Dataset\extracted_folders")

# Get participant folder names like ['300_P', '301_P', ...]
participant_folders = sorted(
    [f for f in base_path.iterdir() if f.is_dir() and f.name.endswith("_P")]
)

print("Number of participants:", len(participant_folders))
print("Example folders:", [f.name for f in participant_folders[:5]])


Number of participants: 189
Example folders: ['300_P', '301_P', '302_P', '303_P', '304_P']


In [19]:
from pathlib import Path
import pandas as pd

pid = 300
folder = base_path / f"{pid}_P"
transcript_path = folder / f"{pid}_TRANSCRIPT.csv"

df_t = pd.read_csv(transcript_path, sep="\t")  # ðŸ‘ˆ IMPORTANT
print("Columns in transcript file:", df_t.columns.tolist())
df_t.head()



Columns in transcript file: ['start_time', 'stop_time', 'speaker', 'value']


Unnamed: 0,start_time,stop_time,speaker,value
0,36.588,39.668,Ellie,hi i'm ellie thanks for coming in today
1,39.888,43.378,Ellie,i was created to talk to people in a safe and ...
2,43.728,48.498,Ellie,think of me as a friend i don't judge i can't ...
3,49.188,52.388,Ellie,i'm here to learn about people and would love ...
4,52.658,58.958,Ellie,i'll ask a few questions to get us started and...


In [20]:
import librosa
import numpy as np
import pandas as pd

def load_audio_and_clean(pid, sr_target=None):
    """
    Loads original audio AND cleaned (no Ellie) audio.
    Prints both durations.
    Returns: y_full, y_clean, sr
    """
    folder = base_path / f"{pid}_P"
    audio_path = folder / f"{pid}_AUDIO.wav"
    transcript_path = folder / f"{pid}_TRANSCRIPT.csv"

    # Load original audio
    y_full, sr = librosa.load(audio_path, sr=sr_target)
    full_duration = len(y_full) / sr

    # Load transcript (tab-separated)
    df_t = pd.read_csv(transcript_path, sep="\t")
    
    # Ensure numeric times
    df_t["start_time"] = pd.to_numeric(df_t["start_time"], errors="coerce")
    df_t["stop_time"]  = pd.to_numeric(df_t["stop_time"],  errors="coerce")

    keep_segments = []

    for _, row in df_t.iterrows():
        speaker = str(row["speaker"]).lower()

        # Keep only participant speech
        if "ellie" in speaker:
            continue

        start_sec = float(row["start_time"])
        end_sec   = float(row["stop_time"])
        if np.isnan(start_sec) or np.isnan(end_sec):
            continue

        start_idx = int(start_sec * sr)
        end_idx   = int(end_sec * sr)

        # Clip to audio boundaries
        start_idx = max(0, min(start_idx, len(y_full)))
        end_idx   = max(0, min(end_idx, len(y_full)))

        if end_idx > start_idx:
            keep_segments.append(y_full[start_idx:end_idx])

    # Concatenate kept segments
    if keep_segments:
        y_clean = np.concatenate(keep_segments)
    else:
        y_clean = y_full  # fallback

    clean_duration = len(y_clean) / sr

    # ðŸŸ¦ Print both lengths
    print(f"Participant {pid}:")
    print(f" - Original audio length: {full_duration:.2f} seconds")
    print(f" - Cleaned audio length (no Ellie): {clean_duration:.2f} seconds")

    return y_full, y_clean, sr




In [21]:
pid = 300
y_full, y_clean, sr = load_audio_and_clean(pid)


Participant 300:
 - Original audio length: 648.50 seconds
 - Cleaned audio length (no Ellie): 155.76 seconds


### Summarize a feature matrix.

In [22]:
import numpy as np
import pandas as pd

def summarize_feature_matrix(mat, prefix):
    """
    mat: 2D numpy array (n_features x n_frames)
    prefix: string for column names, e.g. 'mfcc', 'delta', 'delta2'
    
    Returns: pandas.Series with stats for each row:
             {prefix}_{i}_mean, _std, _min, _max
    """
    stats = {}
    
    n_features = mat.shape[0]
    for i in range(n_features):
        row = mat[i, :]
        row = row[np.isfinite(row)]  # just in case

        if row.size == 0:
            mean = std = rmin = rmax = np.nan
        else:
            mean = row.mean()
            std  = row.std()
            rmin = row.min()
            rmax = row.max()
        
        stats[f"{prefix}_{i+1}_mean"] = mean
        stats[f"{prefix}_{i+1}_std"]  = std
        stats[f"{prefix}_{i+1}_min"]  = rmin
        stats[f"{prefix}_{i+1}_max"]  = rmax

    return pd.Series(stats)


### Extract MFCC + Delta + Delta from one audio signal.

In [23]:
import librosa

def extract_mfcc_family_features(y, sr, n_mfcc=20, prefix="mfcc"):
    """
    Returns MFCC, Delta MFCC and Delta-Delta MFCC summarised stats.
    One Series you will later store in its own MFCC DataFrame.
    """
    feats = {}

    # MFCC
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc_stats = summarize_feature_matrix(mfcc, prefix=f"{prefix}")
    feats.update(mfcc_stats.to_dict())

    # Delta
    delta = librosa.feature.delta(mfcc)
    delta_stats = summarize_feature_matrix(delta, prefix=f"{prefix}_delta")
    feats.update(delta_stats.to_dict())

    # Delta-Delta
    delta2 = librosa.feature.delta(mfcc, order=2)
    delta2_stats = summarize_feature_matrix(delta2, prefix=f"{prefix}_delta2")
    feats.update(delta2_stats.to_dict())

    return pd.Series(feats)



### Chroma STFT.

In [24]:
def extract_chroma_features(y, sr, prefix="chroma"):
    """
    Chroma STFT family (12-dim) summarised to mean/std/min/max.
    Separate function for a separate CHROMA dataframe.
    """
    feats = {}

    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_stats = summarize_feature_matrix(chroma, prefix=f"{prefix}")
    feats.update(chroma_stats.to_dict())

    return pd.Series(feats)


### Special Contrast.

In [25]:
def extract_contrast_features(y, sr, prefix="contrast"):
    """
    Spectral contrast family summarised to mean/std/min/max.
    Separate function for a separate CONTRAST dataframe.
    """
    feats = {}

    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    contrast_stats = summarize_feature_matrix(contrast, prefix=f"{prefix}")
    feats.update(contrast_stats.to_dict())

    return pd.Series(feats)


### Tonnetz.

In [26]:
def extract_tonnetz_features(y, sr, prefix="tonnetz"):
    """
    Tonnetz features from harmonic component, summarised.
    Separate function for a separate TONNETZ dataframe.
    """
    feats = {}

    y_harm = librosa.effects.harmonic(y)
    tonnetz = librosa.feature.tonnetz(y=y_harm, sr=sr)
    tonnetz_stats = summarize_feature_matrix(tonnetz, prefix=f"{prefix}")
    feats.update(tonnetz_stats.to_dict())

    return pd.Series(feats)


### Build 4 Sepaarte DataFrames for One Participant.

In [27]:
pid = 300
y_full, y_clean, sr = load_audio_and_clean(pid)

# 1) MFCC family
mfcc_series = extract_mfcc_family_features(y_clean, sr)
mfcc_series["Participant_ID"] = pid
df_mfcc = mfcc_series.to_frame().T

# 2) Chroma
chroma_series = extract_chroma_features(y_clean, sr)
chroma_series["Participant_ID"] = pid
df_chroma = chroma_series.to_frame().T

# 3) Spectral Contrast
contrast_series = extract_contrast_features(y_clean, sr)
contrast_series["Participant_ID"] = pid
df_contrast = contrast_series.to_frame().T

# 4) Tonnetz
tonnetz_series = extract_tonnetz_features(y_clean, sr)
tonnetz_series["Participant_ID"] = pid
df_tonnetz = tonnetz_series.to_frame().T

print("MFCC DF shape:", df_mfcc.shape)
print("Chroma DF shape:", df_chroma.shape)
print("Contrast DF shape:", df_contrast.shape)
print("Tonnetz DF shape:", df_tonnetz.shape)


Participant 300:
 - Original audio length: 648.50 seconds
 - Cleaned audio length (no Ellie): 155.76 seconds
MFCC DF shape: (1, 241)
Chroma DF shape: (1, 49)
Contrast DF shape: (1, 29)
Tonnetz DF shape: (1, 25)


In [28]:
# Print MFCC Family DataFrame
print("===== MFCC FAMILY FEATURES =====")
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 2000)
display(df_mfcc)

# Print Chroma DataFrame
print("===== CHROMA FEATURES =====")
display(df_chroma)

# Print Spectral Contrast DataFrame
print("===== CONTRAST FEATURES =====")
display(df_contrast)

# Print Tonnetz DataFrame
print("===== TONNETZ FEATURES =====")
display(df_tonnetz)


===== MFCC FAMILY FEATURES =====


Unnamed: 0,mfcc_1_mean,mfcc_1_std,mfcc_1_min,mfcc_1_max,mfcc_2_mean,mfcc_2_std,mfcc_2_min,mfcc_2_max,mfcc_3_mean,mfcc_3_std,mfcc_3_min,mfcc_3_max,mfcc_4_mean,mfcc_4_std,mfcc_4_min,mfcc_4_max,mfcc_5_mean,mfcc_5_std,mfcc_5_min,mfcc_5_max,mfcc_6_mean,mfcc_6_std,mfcc_6_min,mfcc_6_max,mfcc_7_mean,mfcc_7_std,mfcc_7_min,mfcc_7_max,mfcc_8_mean,mfcc_8_std,mfcc_8_min,mfcc_8_max,mfcc_9_mean,mfcc_9_std,mfcc_9_min,mfcc_9_max,mfcc_10_mean,mfcc_10_std,mfcc_10_min,mfcc_10_max,mfcc_11_mean,mfcc_11_std,mfcc_11_min,mfcc_11_max,mfcc_12_mean,mfcc_12_std,mfcc_12_min,mfcc_12_max,mfcc_13_mean,mfcc_13_std,mfcc_13_min,mfcc_13_max,mfcc_14_mean,mfcc_14_std,mfcc_14_min,mfcc_14_max,mfcc_15_mean,mfcc_15_std,mfcc_15_min,mfcc_15_max,mfcc_16_mean,mfcc_16_std,mfcc_16_min,mfcc_16_max,mfcc_17_mean,mfcc_17_std,mfcc_17_min,mfcc_17_max,mfcc_18_mean,mfcc_18_std,mfcc_18_min,mfcc_18_max,mfcc_19_mean,mfcc_19_std,mfcc_19_min,mfcc_19_max,mfcc_20_mean,mfcc_20_std,mfcc_20_min,mfcc_20_max,mfcc_delta_1_mean,mfcc_delta_1_std,mfcc_delta_1_min,mfcc_delta_1_max,mfcc_delta_2_mean,mfcc_delta_2_std,mfcc_delta_2_min,mfcc_delta_2_max,mfcc_delta_3_mean,mfcc_delta_3_std,mfcc_delta_3_min,mfcc_delta_3_max,mfcc_delta_4_mean,mfcc_delta_4_std,mfcc_delta_4_min,mfcc_delta_4_max,mfcc_delta_5_mean,mfcc_delta_5_std,mfcc_delta_5_min,mfcc_delta_5_max,mfcc_delta_6_mean,mfcc_delta_6_std,mfcc_delta_6_min,mfcc_delta_6_max,mfcc_delta_7_mean,mfcc_delta_7_std,mfcc_delta_7_min,mfcc_delta_7_max,mfcc_delta_8_mean,mfcc_delta_8_std,mfcc_delta_8_min,mfcc_delta_8_max,mfcc_delta_9_mean,mfcc_delta_9_std,mfcc_delta_9_min,mfcc_delta_9_max,mfcc_delta_10_mean,mfcc_delta_10_std,mfcc_delta_10_min,mfcc_delta_10_max,mfcc_delta_11_mean,mfcc_delta_11_std,mfcc_delta_11_min,mfcc_delta_11_max,mfcc_delta_12_mean,mfcc_delta_12_std,mfcc_delta_12_min,mfcc_delta_12_max,mfcc_delta_13_mean,mfcc_delta_13_std,mfcc_delta_13_min,mfcc_delta_13_max,mfcc_delta_14_mean,mfcc_delta_14_std,mfcc_delta_14_min,mfcc_delta_14_max,mfcc_delta_15_mean,mfcc_delta_15_std,mfcc_delta_15_min,mfcc_delta_15_max,mfcc_delta_16_mean,mfcc_delta_16_std,mfcc_delta_16_min,mfcc_delta_16_max,mfcc_delta_17_mean,mfcc_delta_17_std,mfcc_delta_17_min,mfcc_delta_17_max,mfcc_delta_18_mean,mfcc_delta_18_std,mfcc_delta_18_min,mfcc_delta_18_max,mfcc_delta_19_mean,mfcc_delta_19_std,mfcc_delta_19_min,mfcc_delta_19_max,mfcc_delta_20_mean,mfcc_delta_20_std,mfcc_delta_20_min,mfcc_delta_20_max,mfcc_delta2_1_mean,mfcc_delta2_1_std,mfcc_delta2_1_min,mfcc_delta2_1_max,mfcc_delta2_2_mean,mfcc_delta2_2_std,mfcc_delta2_2_min,mfcc_delta2_2_max,mfcc_delta2_3_mean,mfcc_delta2_3_std,mfcc_delta2_3_min,mfcc_delta2_3_max,mfcc_delta2_4_mean,mfcc_delta2_4_std,mfcc_delta2_4_min,mfcc_delta2_4_max,mfcc_delta2_5_mean,mfcc_delta2_5_std,mfcc_delta2_5_min,mfcc_delta2_5_max,mfcc_delta2_6_mean,mfcc_delta2_6_std,mfcc_delta2_6_min,mfcc_delta2_6_max,mfcc_delta2_7_mean,mfcc_delta2_7_std,mfcc_delta2_7_min,mfcc_delta2_7_max,mfcc_delta2_8_mean,mfcc_delta2_8_std,mfcc_delta2_8_min,mfcc_delta2_8_max,mfcc_delta2_9_mean,mfcc_delta2_9_std,mfcc_delta2_9_min,mfcc_delta2_9_max,mfcc_delta2_10_mean,mfcc_delta2_10_std,mfcc_delta2_10_min,mfcc_delta2_10_max,mfcc_delta2_11_mean,mfcc_delta2_11_std,mfcc_delta2_11_min,mfcc_delta2_11_max,mfcc_delta2_12_mean,mfcc_delta2_12_std,mfcc_delta2_12_min,mfcc_delta2_12_max,mfcc_delta2_13_mean,mfcc_delta2_13_std,mfcc_delta2_13_min,mfcc_delta2_13_max,mfcc_delta2_14_mean,mfcc_delta2_14_std,mfcc_delta2_14_min,mfcc_delta2_14_max,mfcc_delta2_15_mean,mfcc_delta2_15_std,mfcc_delta2_15_min,mfcc_delta2_15_max,mfcc_delta2_16_mean,mfcc_delta2_16_std,mfcc_delta2_16_min,mfcc_delta2_16_max,mfcc_delta2_17_mean,mfcc_delta2_17_std,mfcc_delta2_17_min,mfcc_delta2_17_max,mfcc_delta2_18_mean,mfcc_delta2_18_std,mfcc_delta2_18_min,mfcc_delta2_18_max,mfcc_delta2_19_mean,mfcc_delta2_19_std,mfcc_delta2_19_min,mfcc_delta2_19_max,mfcc_delta2_20_mean,mfcc_delta2_20_std,mfcc_delta2_20_min,mfcc_delta2_20_max,Participant_ID
0,-367.103943,104.416664,-511.09317,32.688351,89.892448,31.626925,-3.024082,197.098373,8.258336,21.220268,-81.608917,56.842575,18.156752,18.080063,-57.43103,90.965736,2.469162,10.754705,-57.881775,36.133606,4.391455,12.240224,-45.975403,38.772568,-7.739872,11.127036,-50.669243,40.041779,5.470604,8.625935,-42.73859,37.789291,1.013182,8.72834,-27.174694,34.742599,5.52652,7.316226,-22.8832,30.891502,-2.394045,5.705299,-22.073643,30.32077,0.7187,6.095616,-23.608685,25.149139,-3.429722,6.169363,-36.860439,18.958786,4.274493,5.210022,-15.121805,27.252699,-2.215292,5.36946,-28.735039,15.865007,-1.38062,6.011368,-27.278543,18.785343,-3.082834,5.429797,-24.935108,17.390759,-4.246538,5.91087,-29.329124,12.371996,-4.319562,5.579246,-24.288597,14.448802,0.37007,5.067324,-26.361507,16.899147,-0.025395,10.325793,-77.146019,66.340828,-0.004618,5.029216,-22.40242,26.036928,0.000992,3.993233,-16.212385,14.604114,0.000809,3.346514,-16.107817,15.237745,0.002499,1.94497,-9.269785,7.98996,0.000562,2.120987,-10.096308,8.417168,0.005406,1.982087,-7.675164,9.111294,0.004306,1.510606,-5.611754,7.045578,0.000487,1.578276,-7.139418,5.895853,-0.001936,1.306455,-4.941306,5.442559,0.000313,1.009625,-4.090022,4.813454,0.002403,1.114011,-5.023258,4.997054,0.001698,1.077078,-4.956304,4.473161,-0.00284,0.978085,-3.225807,3.944576,0.001733,0.954511,-4.638453,3.562509,0.002575,1.106814,-4.881056,3.671689,0.000971,0.938154,-3.258886,4.095674,-8.4e-05,1.040326,-4.246813,4.671335,0.002143,1.000417,-3.832006,3.673248,0.003034,0.854607,-3.570876,3.468308,-0.013102,5.015399,-32.715481,40.115246,-0.006175,2.910322,-13.637662,17.930847,0.00507,2.456839,-11.165504,12.616557,0.003915,1.953153,-11.358303,11.572164,0.003618,1.180413,-5.081996,6.799324,0.001535,1.230826,-5.650796,5.559774,0.00198,1.222255,-5.654696,6.053223,0.002406,0.921281,-5.256922,3.935163,-0.001134,0.990804,-4.475405,3.892595,-0.002441,0.816577,-3.074981,3.205309,0.000529,0.676066,-2.830192,2.9498,0.002196,0.740465,-3.262192,3.375813,0.001577,0.709518,-3.698368,4.297448,-0.000477,0.644831,-2.423341,2.862016,0.001269,0.626811,-3.142368,3.011565,0.00191,0.706779,-2.650153,3.847953,-0.001088,0.604608,-2.50126,2.969941,-3.9e-05,0.657573,-2.348132,2.730083,0.000162,0.644345,-2.408664,2.651933,0.001466,0.594561,-2.256882,2.556742,300.0


===== CHROMA FEATURES =====


Unnamed: 0,chroma_1_mean,chroma_1_std,chroma_1_min,chroma_1_max,chroma_2_mean,chroma_2_std,chroma_2_min,chroma_2_max,chroma_3_mean,chroma_3_std,chroma_3_min,chroma_3_max,chroma_4_mean,chroma_4_std,chroma_4_min,chroma_4_max,chroma_5_mean,chroma_5_std,chroma_5_min,chroma_5_max,chroma_6_mean,chroma_6_std,chroma_6_min,chroma_6_max,chroma_7_mean,chroma_7_std,chroma_7_min,chroma_7_max,chroma_8_mean,chroma_8_std,chroma_8_min,chroma_8_max,chroma_9_mean,chroma_9_std,chroma_9_min,chroma_9_max,chroma_10_mean,chroma_10_std,chroma_10_min,chroma_10_max,chroma_11_mean,chroma_11_std,chroma_11_min,chroma_11_max,chroma_12_mean,chroma_12_std,chroma_12_min,chroma_12_max,Participant_ID
0,0.445551,0.309529,0.00063,1.0,0.336944,0.296302,0.000701,1.0,0.309652,0.28996,0.000641,1.0,0.30947,0.287874,0.001677,1.0,0.282452,0.274421,0.000372,1.0,0.279052,0.272335,0.001188,1.0,0.31413,0.296715,0.001565,1.0,0.278915,0.263483,0.000475,1.0,0.322481,0.283665,0.000811,1.0,0.387524,0.327419,0.001216,1.0,0.427376,0.30097,0.000635,1.0,0.576527,0.396186,0.000798,1.0,300.0


===== CONTRAST FEATURES =====


Unnamed: 0,contrast_1_mean,contrast_1_std,contrast_1_min,contrast_1_max,contrast_2_mean,contrast_2_std,contrast_2_min,contrast_2_max,contrast_3_mean,contrast_3_std,contrast_3_min,contrast_3_max,contrast_4_mean,contrast_4_std,contrast_4_min,contrast_4_max,contrast_5_mean,contrast_5_std,contrast_5_min,contrast_5_max,contrast_6_mean,contrast_6_std,contrast_6_min,contrast_6_max,contrast_7_mean,contrast_7_std,contrast_7_min,contrast_7_max,Participant_ID
0,19.465624,4.452102,6.87511,39.70262,17.570055,5.312681,4.956128,39.687036,19.005525,5.835758,4.875051,49.298412,17.759906,4.325428,5.703931,36.884918,16.759227,3.511109,8.072721,31.726313,14.868642,2.153695,10.546043,27.923844,15.862005,1.570053,5.088359,24.131423,300.0


===== TONNETZ FEATURES =====


Unnamed: 0,tonnetz_1_mean,tonnetz_1_std,tonnetz_1_min,tonnetz_1_max,tonnetz_2_mean,tonnetz_2_std,tonnetz_2_min,tonnetz_2_max,tonnetz_3_mean,tonnetz_3_std,tonnetz_3_min,tonnetz_3_max,tonnetz_4_mean,tonnetz_4_std,tonnetz_4_min,tonnetz_4_max,tonnetz_5_mean,tonnetz_5_std,tonnetz_5_min,tonnetz_5_max,tonnetz_6_mean,tonnetz_6_std,tonnetz_6_min,tonnetz_6_max,Participant_ID
0,0.000235,0.127191,-0.692052,0.659175,-0.071041,0.151684,-0.59418,0.568096,0.078814,0.221283,-0.668396,0.71959,-0.050953,0.192614,-0.727401,0.601966,-0.029105,0.072407,-0.286013,0.250741,-0.028097,0.068588,-0.195878,0.243666,300.0


In [29]:
from pathlib import Path
import pandas as pd

# ---- 1. Get all participant IDs ----
participant_ids = sorted([
    int(f.name.split("_")[0])
    for f in base_path.iterdir()
    if f.is_dir() and f.name.endswith("_P")
])

print("Total participants detected:", len(participant_ids))
print("First 10 participants:", participant_ids[:10])

Total participants detected: 189
First 10 participants: [300, 301, 302, 303, 304, 305, 306, 307, 308, 309]


In [30]:
# ---- 2. Prepare lists for each DF ----
mfcc_rows     = []
chroma_rows   = []
contrast_rows = []
tonnetz_rows  = []

In [None]:
# ---- 3. Loop over participants ----
for pid in participant_ids:
    print(f"\nProcessing Participant: {pid}")

    # Load full + cleaned audio
    y_full, y_clean, sr = load_audio_and_clean(pid)

    # 1) MFCC Family
    s_mfcc = extract_mfcc_family_features(y_clean, sr)
    s_mfcc["Participant_ID"] = pid
    mfcc_rows.append(s_mfcc)

    # 2) Chroma Features
    s_chroma = extract_chroma_features(y_clean, sr)
    s_chroma["Participant_ID"] = pid
    chroma_rows.append(s_chroma)

    # 3) Spectral Contrast
    s_contrast = extract_contrast_features(y_clean, sr)
    s_contrast["Participant_ID"] = pid
    contrast_rows.append(s_contrast)

    # 4) Tonnetz
    s_tonnetz = extract_tonnetz_features(y_clean, sr)
    s_tonnetz["Participant_ID"] = pid
    tonnetz_rows.append(s_tonnetz)


Processing Participant: 300
Participant 300:
 - Original audio length: 648.50 seconds
 - Cleaned audio length (no Ellie): 155.76 seconds

Processing Participant: 301
Participant 301:
 - Original audio length: 823.90 seconds
 - Cleaned audio length (no Ellie): 475.44 seconds

Processing Participant: 302
Participant 302:
 - Original audio length: 758.80 seconds
 - Cleaned audio length (no Ellie): 208.93 seconds

Processing Participant: 303
Participant 303:
 - Original audio length: 985.30 seconds
 - Cleaned audio length (no Ellie): 642.93 seconds

Processing Participant: 304
Participant 304:
 - Original audio length: 792.60 seconds
 - Cleaned audio length (no Ellie): 362.60 seconds

Processing Participant: 305
Participant 305:
 - Original audio length: 1704.00 seconds
 - Cleaned audio length (no Ellie): 1118.49 seconds

Processing Participant: 306
Participant 306:
 - Original audio length: 858.10 seconds
 - Cleaned audio length (no Ellie): 509.37 seconds

Processing Participant: 307
Par

In [None]:
print("Hello World!!!")