In [2]:
import os
import nibabel as nib
import pandas as pd
import numpy as np
from nilearn.input_data import NiftiLabelsMasker
from nilearn import datasets
from tqdm import tqdm  # progress bar

# Set up
base_path = "."
subject = "sub-005"
n_runs = 6
t_r = 2.0  # repetition time (2 seconds)

# Load atlas & prepare masker
atlas = datasets.fetch_atlas_schaefer_2018(n_rois=100, resolution_mm=2)
masker = NiftiLabelsMasker(labels_img=atlas.maps, standardize=True, t_r=t_r)

# Prepare result lists
X_all = []
y_all = []

# Loop through all 6 runs
for run in tqdm(range(1, n_runs + 1)):
    run_str = f"run-0{run}"
    bold_file = f"{base_path}/{subject}_task-Test_{run_str}_bold.nii"
    events_file = f"{base_path}/{subject}_task-Test_{run_str}_events.tsv"

    # Check if both files exist
    
    print(f"Looking for: {bold_file}")
    print(f"Looking for: {events_file}")


    if not os.path.exists(bold_file) or not os.path.exists(events_file):
        print(f"Missing: {run_str}")
        continue

    # Load data
    func_img = nib.load(bold_file)
    events = pd.read_csv(events_file, sep="\t")
    roi_ts = masker.fit_transform(func_img)  # shape: [timepoints, 100]

    # Segment into trials
    for _, row in events.iterrows():
        onset = row['onset']
        duration = row['duration']
        genre = row['genre'].strip("'").strip('"')

        start_vol = int(onset / t_r)
        end_vol = int((onset + duration) / t_r)
        trial_ts = roi_ts[start_vol:end_vol, :]

        if trial_ts.shape[0] < 2:
            continue  # skip too-short segments

        # Compute connectivity
        conn_matrix = np.corrcoef(trial_ts.T)

        # Flatten upper triangle
        try:
            flat = conn_matrix[np.triu_indices_from(conn_matrix, k=1)]
            if not np.isnan(flat).any() and len(flat) == 4950:
                X_all.append(flat)
                y_all.append(genre)
        except:
            print(f"⚠️ Skipped a trial due to shape or NaN issue.")


# Convert to numpy arrays
X_all = np.array(X_all)
y_all = np.array(y_all)

print("✅ All runs processed!")
print("Total trials:", X_all.shape[0])
print("Feature shape per trial:", X_all.shape[1])
print("Unique genres:", np.unique(y_all))

# 💾 Save X_all for later visualization use
np.save("X_all.npy", X_all)

  0%|                                                     | 0/6 [00:00<?, ?it/s]

Looking for: ./sub-005_task-Test_run-01_bold.nii
Looking for: ./sub-005_task-Test_run-01_events.tsv


 17%|███████▌                                     | 1/6 [00:24<02:03, 24.64s/it]

Looking for: ./sub-005_task-Test_run-02_bold.nii
Looking for: ./sub-005_task-Test_run-02_events.tsv


 33%|███████████████                              | 2/6 [00:48<01:37, 24.39s/it]

Looking for: ./sub-005_task-Test_run-03_bold.nii
Looking for: ./sub-005_task-Test_run-03_events.tsv


 50%|██████████████████████▌                      | 3/6 [01:12<01:12, 24.25s/it]

Looking for: ./sub-005_task-Test_run-04_bold.nii
Looking for: ./sub-005_task-Test_run-04_events.tsv


 67%|██████████████████████████████               | 4/6 [01:37<00:48, 24.42s/it]

Looking for: ./sub-005_task-Test_run-05_bold.nii
Looking for: ./sub-005_task-Test_run-05_events.tsv


 83%|█████████████████████████████████████▌       | 5/6 [02:02<00:24, 24.51s/it]

Looking for: ./sub-005_task-Test_run-06_bold.nii
Looking for: ./sub-005_task-Test_run-06_events.tsv


100%|█████████████████████████████████████████████| 6/6 [02:21<00:00, 23.58s/it]

✅ All runs processed!
Total trials: 164
Feature shape per trial: 4950
Unique genres: ['blues' 'classical' 'country' 'disco' 'hiphop' 'jazz' 'metal' 'pop'
 'reggae' 'rock']





In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

clf = RandomForestClassifier(n_estimators=100, random_state=42)
scores = cross_val_score(clf, X_all, y_all, cv=3)

print("🎯 Mean cross-validated accuracy:", scores.mean())

🎯 Mean cross-validated accuracy: 0.13389450056116722
