In [None]:
import openneuro as on

# Download entire dataset
on.download(dataset='ds003774', target_dir='./musin_g_data')


👋 Hello! This is openneuro-py 2025.1.0. Great to see you! 🤗

   👉 Please report problems 🤯 and bugs 🪲 at
      https://github.com/hoechenberger/openneuro-py/issues

🌍 Preparing to download ds003774 …


📁 Traversing directories for ds003774 : 0 entities [00:00, ? entities/s]

📥 Retrieving up to 1840 files (5 concurrent downloads). 
✅ Finished downloading ds003774.
 
🧠 Please enjoy your brains.
 


🔄 Request timed out while downloading /home/zmrocze/studia/uwr/magisterka/musin_g_data/sub-005/ses-08/eeg/sub-005_ses-08_task-MusicListening_run-8_eeg.set, retrying in 0.5 sec …
🔄 Request timed out while downloading /home/zmrocze/studia/uwr/magisterka/musin_g_data/sub-005/ses-08/eeg/sub-005_ses-08_task-MusicListening_run-8_electrodes.tsv, retrying in 0.5 sec …
🔄 Request timed out while downloading /home/zmrocze/studia/uwr/magisterka/musin_g_data/sub-005/ses-08/eeg/sub-005_ses-08_task-MusicListening_run-8_events.json, retrying in 0.5 sec …
🔄 Request timed out while downloading /home/zmrocze/studia/uwr/magisterka/musin_g_data/sub-005/ses-09/eeg/sub-005_ses-09_task-MusicListening_run-9_channels.tsv, retrying in 0.5 sec …
🔄 Request timed out while downloading /home/zmrocze/studia/uwr/magisterka/musin_g_data/sub-005/ses-08/eeg/sub-005_ses-08_task-MusicListening_run-8_events.json, retrying in 0.5 sec …
🔄 Request timed out while downloading /home/zmrocze/studia/uwr/magisterka/musin_g_data/sub

In [5]:
%load_ext autoreload
%autoreload 2

from load_nmed_t import NMEDTLoader, main

main("./datasets/nmed-t")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
=== NMED-T Dataset Summary ===
Dataset: Naturalistic Music EEG Dataset - Tempo (NMED-T)
Participants: 20
Songs: 10
Electrodes (cleaned): 125
Sampling rate (cleaned): 125 Hz

=== Loading Cleaned EEG Data ===
Loaded song 1: First Fires - Shape: (125, 34795, 20)
Loaded song 1: First Fires - Shape: (125, 34795, 20)
Loaded song 2: Oino - Shape: (125, 33891, 20)
Loaded song 2: Oino - Shape: (125, 33891, 20)
Loaded song 3: Tiptoes - Shape: (125, 34469, 20)
Song 1: First Fires by Bonobo
  Tempo: 55.97 BPM
  EEG shape: (125, 34795, 20) - (electrodes=125, time_samples, participants=20)
  Sampling rate: 125 Hz
Song 2: Oino by LA Priest
  Tempo: 69.44 BPM
  EEG shape: (125, 33891, 20) - (electrodes=125, time_samples, participants=20)
  Sampling rate: 125 Hz
Song 3: Tiptoes by Daedelus
  Tempo: 74.26 BPM
  EEG shape: (125, 34469, 20) - (electrodes=125, time_samples, participants=20)
  Sampling rate: 125 Hz

=== 

In [10]:
# Comprehensive MUSIN-G Dataset Loader Based on Research
# Dataset described in: Miyapuram et al. (2022), Data in Brief, doi:10.1016/j.dib.2022.108663

import mne
import mne_bids
from mne_bids import BIDSPath, read_raw_bids, get_entity_vals
import pandas as pd
import numpy as np
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

print("🎵 MUSIN-G Dataset: Complete Import and Description")
print("=" * 70)
print("Based on: Miyapuram et al. (2022) - Electroencephalography (EEG) dataset")
print("during naturalistic music listening comprising different genres")
print("🔗 https://openneuro.org/datasets/ds003774")
print()

# Dataset paths
bids_root = '/home/zmrocze/studia/uwr/magisterka/datasets/musin_g_data'

# EXPERIMENTAL PROTOCOL (from research)
print("🧪 EXPERIMENTAL PROTOCOL:")
print("• Participants: 20 Indian participants (16 male, 4 female, avg age 25.3±3.38)")
print("• EEG System: 128-channel Hydrocel Geodesic Sensor Net (HCGSN)")
print("• Recording: NetStation 5.4, referenced to Cz, 250Hz sampling")
print("• Location: Indian Institute of Technology Gandhinagar, India")
print()

print("📋 EXPERIMENTAL PROCEDURE:")
print("1. Single beep → Participant closes eyes")
print("2. Song plays (~2 minutes) → EEG recorded with eyes closed")
print("3. Double beep → Participant opens eyes")
print("4. Ratings collected: Familiarity & Enjoyment (1-5 scale)")
print("   - 1 = Most familiar/enjoyable, 5 = Least familiar/enjoyable")
print("5. Repeat for all 12 songs")
print()

# Load behavioral data
behavioral_file = Path(bids_root) / 'stimuli' / 'Behavioural_data'
behavioral_data = pd.read_csv(behavioral_file, sep='\t')

print("📊 BEHAVIORAL DATA SUMMARY:")
print(f"• Total ratings: {len(behavioral_data)} (20 subjects × 12 songs)")
print(f"• Enjoyment range: {behavioral_data['Enjoyment'].min()}-{behavioral_data['Enjoyment'].max()}")
print(f"• Familiarity range: {behavioral_data['Familiarity'].min()}-{behavioral_data['Familiarity'].max()}")
print(f"• Mean enjoyment: {behavioral_data['Enjoyment'].mean():.2f}±{behavioral_data['Enjoyment'].std():.2f}")
print(f"• Mean familiarity: {behavioral_data['Familiarity'].mean():.2f}±{behavioral_data['Familiarity'].std():.2f}")
print()

# Enhanced song information with research context
songs_info_enhanced = {
    1: {"name": "Trip to the lonely planet", "artist": "Mark Alow", "genre": "Deep House", 
        "duration": 125, "tempo": 121.95, "characteristics": "Electronic, Western"},
    2: {"name": "Sail", "artist": "Awolnation", "genre": "Indie", 
        "duration": 114, "tempo": 119, "characteristics": "Rock, English lyrics"},
    3: {"name": "Concept 15", "artist": "Kodomo", "genre": "Electronics", 
        "duration": 132, "tempo": 161, "characteristics": "Experimental electronic"},
    4: {"name": "Aurore", "artist": "Claire David", "genre": "New Age", 
        "duration": 111, "tempo": None, "characteristics": "Ambient, meditative"},
    5: {"name": "Proof", "artist": "Idiotape", "genre": "Electronic Dance", 
        "duration": 124, "tempo": 123, "characteristics": "Danceable electronic"},
    6: {"name": "Glider", "artist": "Tycho", "genre": "Ambient", 
        "duration": 100, "tempo": 126, "characteristics": "Atmospheric, instrumental"},
    7: {"name": "Raag Bihag", "artist": "B.Sivaramakrishna Rao", "genre": "Hindustani Classical", 
        "duration": 116, "tempo": 70, "characteristics": "Indian classical, traditional"},
    8: {"name": "Albela sajan", "artist": "Ismail Darbar", "genre": "Indian Semi-Classical", 
        "duration": 121, "tempo": 194, "characteristics": "Indian, Hindi lyrics"},
    9: {"name": "Mor Bani Thanghat Kare", "artist": "Sanjay Leela Bhansali", "genre": "Indian Folk", 
        "duration": 126, "tempo": 117, "characteristics": "Indian folk, Gujarati lyrics"},
    10: {"name": "Fallin", "artist": "Dr. SaxLove", "genre": "Soft Jazz", 
         "duration": 129, "tempo": 197, "characteristics": "Jazz, instrumental"},
    11: {"name": "Master of Running", "artist": "Rickeyabo", "genre": "Goth Rock", 
         "duration": 113, "tempo": 120, "characteristics": "Dark, alternative rock"},
    12: {"name": "JB", "artist": "Nobody.one", "genre": "Progressive Instrumental Rock", 
         "duration": 117, "tempo": 146, "characteristics": "Complex, instrumental"}
}

# Create comprehensive dataset class
class CompleteMUSINGDataset:
    def __init__(self, bids_root):
        self.bids_root = bids_root
        self.subjects = get_entity_vals(bids_root, 'subject')
        self.sessions = get_entity_vals(bids_root, 'session')
        self.songs_info = songs_info_enhanced
        self.behavioral_data = behavioral_data
        self.all_data = {}
        
    def load_complete_dataset(self, max_subjects=None, verbose=True):
        """Load EEG data and behavioral ratings for all subjects and sessions"""
        subjects_to_load = self.subjects[:max_subjects] if max_subjects else self.subjects
        
        print(f"🔄 Loading complete MUSIN-G dataset...")
        print(f"📦 Subjects to load: {len(subjects_to_load)} of {len(self.subjects)}")
        print()
        
        for i, subject in enumerate(subjects_to_load, 1):
            if verbose:
                print(f"Subject {subject} ({i}/{len(subjects_to_load)}):")
            
            self.all_data[subject] = {}
            
            for session in self.sessions:
                # Load EEG data - Fix run number format (remove zero padding)
                bids_path = BIDSPath(
                    subject=subject,
                    session=session,
                    task='MusicListening',
                    run=int(session),  # Use integer instead of zero-padded string
                    datatype='eeg',
                    root=self.bids_root
                )
                
                try:
                    raw = read_raw_bids(bids_path, verbose=False)
                    
                    # Get song info
                    song_info = self.songs_info[int(session)]
                    
                    # Get behavioral ratings for this subject-song combination
                    ratings = self.behavioral_data[
                        (self.behavioral_data['Subject'] == int(subject)) & 
                        (self.behavioral_data['Song_ID'] == int(session))
                    ]
                    
                    if not ratings.empty:
                        enjoyment = ratings['Enjoyment'].iloc[0]
                        familiarity = ratings['Familiarity'].iloc[0]
                    else:
                        enjoyment, familiarity = None, None
                    
                    # Enhanced metadata - store in info['temp'] to avoid MNE restrictions
                    raw.info['description'] = f"Song {session}: {song_info['name']} - {song_info['artist']}"
                    
                    # Use temp storage for custom metadata (MNE-compatible way)
                    if 'temp' not in raw.info:
                        raw.info['temp'] = {}
                    
                    raw.info['temp']['song_metadata'] = {
                        'genre': song_info['genre'],
                        'tempo': song_info['tempo'],
                        'characteristics': song_info['characteristics'],
                        'enjoyment_rating': enjoyment,
                        'familiarity_rating': familiarity,
                        'song_name': song_info['name'],
                        'artist': song_info['artist']
                    }
                    
                    # Store complete information
                    self.all_data[subject][session] = {
                        'raw': raw,
                        'song_info': song_info,
                        'enjoyment': enjoyment,
                        'familiarity': familiarity,
                        'file_path': str(bids_path.fpath)
                    }
                    
                    if verbose:
                        print(f"  ✓ Song {session}: {song_info['name']} ({song_info['genre']}) | "
                              f"Enjoy: {enjoyment}/5, Familiar: {familiarity}/5")
                        
                except Exception as e:
                    if verbose:
                        error_msg = str(e)
                        if "File does not exist" in error_msg:
                            # Extract suggestions from error message
                            lines = error_msg.split('\n')
                            if len(lines) > 2 and "Did you mean" in lines[1]:
                                print(f"  ✗ Song {session}: File naming issue - trying run number without zero padding")
                            else:
                                print(f"  ✗ Song {session}: File not found")
                        else:
                            print(f"  ✗ Song {session}: {error_msg[:100]}...")
            
            if verbose:
                print()
        
        return self.all_data
    
    def get_dataset_statistics(self):
        """Get comprehensive dataset statistics"""
        if not self.all_data:
            print("No data loaded. Call load_complete_dataset() first.")
            return
        
        # Count successful loads
        total_files = 0
        successful_loads = 0
        
        for subject in self.all_data:
            for session in self.all_data[subject]:
                total_files += 1
                if 'raw' in self.all_data[subject][session]:
                    successful_loads += 1
        
        print(f"📈 DATASET STATISTICS:")
        print(f"• Total expected files: {len(self.subjects)} × {len(self.sessions)} = {len(self.subjects) * len(self.sessions)}")
        print(f"• Successfully loaded: {successful_loads}/{total_files}")
        print(f"• Success rate: {successful_loads/total_files*100:.1f}%")
        
        # Behavioral statistics by genre
        print(f"\n🎭 GENRE-WISE BEHAVIORAL STATISTICS:")
        for session_num, song_info in self.songs_info.items():
            genre_ratings = self.behavioral_data[self.behavioral_data['Song_ID'] == session_num]
            if not genre_ratings.empty:
                mean_enjoy = genre_ratings['Enjoyment'].mean()
                mean_familiar = genre_ratings['Familiarity'].mean()
                print(f"  {song_info['genre']:25} | Enjoy: {mean_enjoy:.1f}/5 | Familiar: {mean_familiar:.1f}/5")
    
    def get_subject_data(self, subject_id):
        """Get all data for a specific subject"""
        return self.all_data.get(subject_id, {})
    
    def get_song_data_across_subjects(self, session_id):
        """Get data for a specific song across all subjects"""
        song_data = {}
        for subject in self.all_data:
            if session_id in self.all_data[subject]:
                song_data[subject] = self.all_data[subject][session_id]
        return song_data
    
    def get_song_metadata(self, raw_object):
        """Helper function to extract song metadata from MNE Raw object"""
        if 'temp' in raw_object.info and 'song_metadata' in raw_object.info['temp']:
            return raw_object.info['temp']['song_metadata']
        return None

# Initialize and demonstrate
print("🚀 Initializing Complete MUSIN-G Dataset Loader...")
complete_dataset = CompleteMUSINGDataset(bids_root)

print(f"\n🎯 Dataset ready for complete loading!")
print(f"• Use: complete_dataset.load_complete_dataset(max_subjects=5) for demo")
print(f"• Use: complete_dataset.load_complete_dataset() for full dataset")
musin_data = complete_dataset.load_complete_dataset()
complete_dataset.get_dataset_statistics()
print(f"• Use: complete_dataset.get_dataset_statistics() for analysis")

🎵 MUSIN-G Dataset: Complete Import and Description
Based on: Miyapuram et al. (2022) - Electroencephalography (EEG) dataset
during naturalistic music listening comprising different genres
🔗 https://openneuro.org/datasets/ds003774

🧪 EXPERIMENTAL PROTOCOL:
• Participants: 20 Indian participants (16 male, 4 female, avg age 25.3±3.38)
• EEG System: 128-channel Hydrocel Geodesic Sensor Net (HCGSN)
• Recording: NetStation 5.4, referenced to Cz, 250Hz sampling
• Location: Indian Institute of Technology Gandhinagar, India

📋 EXPERIMENTAL PROCEDURE:
1. Single beep → Participant closes eyes
2. Song plays (~2 minutes) → EEG recorded with eyes closed
3. Double beep → Participant opens eyes
4. Ratings collected: Familiarity & Enjoyment (1-5 scale)
   - 1 = Most familiar/enjoyable, 5 = Least familiar/enjoyable
5. Repeat for all 12 songs

📊 BEHAVIORAL DATA SUMMARY:
• Total ratings: 240 (20 subjects × 12 songs)
• Enjoyment range: nan-nan
• Familiarity range: 1-5
• Mean enjoyment: nan±nan
• Mean famil

In [14]:
# Iterate over all subjects and sessions in complete_dataset
for subject_id, subject_data in complete_dataset.all_data.items():
  # print(f"Subject: {subject_id}")
  for session_id, session_data in subject_data.items():
    song_name = session_data['song_info']['name']
    enjoyment = session_data['enjoyment']
    familiarity = session_data['familiarity']
    # print(f"  Session: {session_id} | Song: {song_name} | Enjoyment: {enjoyment} | Familiarity: {familiarity}")

In [11]:
# Demonstration: Load subset and perform analyses
print("🔍 DEMONSTRATION: Loading subset of MUSIN-G dataset")
print("=" * 60)

# Load first 3 subjects as demonstration (to avoid overwhelming output)
demo_data = complete_dataset.load_complete_dataset(max_subjects=3, verbose=True)

# Get dataset statistics
complete_dataset.get_dataset_statistics()

print("\n" + "="*60)
print("📊 EXAMPLE ANALYSES")
print("="*60)

# Example 1: Genre preference analysis
print("\n1️⃣ GENRE PREFERENCE ANALYSIS (Demo subjects):")
genre_enjoyment = {}
genre_familiarity = {}

for subject in demo_data:
    for session in demo_data[subject]:
        data_point = demo_data[subject][session]
        genre = data_point['song_info']['genre']
        enjoyment = data_point['enjoyment']
        familiarity = data_point['familiarity']
        
        if genre not in genre_enjoyment:
            genre_enjoyment[genre] = []
            genre_familiarity[genre] = []
        
        if enjoyment is not None:
            genre_enjoyment[genre].append(enjoyment)
        if familiarity is not None:
            genre_familiarity[genre].append(familiarity)

for genre in genre_enjoyment:
    if genre_enjoyment[genre]:
        mean_enjoy = np.mean(genre_enjoyment[genre])
        mean_familiar = np.mean(genre_familiarity[genre])
        print(f"  {genre:25} | Enjoy: {mean_enjoy:.1f}/5 | Familiar: {mean_familiar:.1f}/5")

# Example 2: EEG data quality check
print(f"\n2️⃣ EEG DATA QUALITY CHECK:")
for subject in list(demo_data.keys())[:2]:  # Check first 2 subjects
    print(f"\nSubject {subject}:")
    for session in ['01', '07', '12']:  # Sample: Electronic, Classical, Rock
        if session in demo_data[subject]:
            raw = demo_data[subject][session]['raw']
            song_name = demo_data[subject][session]['song_info']['name']
            
            # Basic quality metrics
            duration = raw.times[-1]
            n_channels = raw.info['nchan']
            sfreq = raw.info['sfreq']
            
            # Check for bad channels (simple amplitude check)
            data = raw.get_data()
            channel_std = np.std(data, axis=1)
            outlier_channels = np.sum(channel_std > 3 * np.median(channel_std))
            
            print(f"  Song {session} ({song_name[:20]}...)")
            print(f"    Duration: {duration:.1f}s | Channels: {n_channels} | Freq: {sfreq}Hz")
            print(f"    Potential bad channels: {outlier_channels}")

# Example 3: Cross-cultural music analysis setup
print(f"\n3️⃣ CROSS-CULTURAL MUSIC CATEGORIES:")
categories = {
    'Western_Electronic': ['Deep House', 'Electronics', 'Electronic Dance', 'Ambient'],
    'Western_Traditional': ['Indie', 'New Age', 'Soft Jazz', 'Goth Rock', 'Progressive Instrumental Rock'],
    'Indian_Traditional': ['Hindustani Classical', 'Indian Semi-Classical', 'Indian Folk']
}

for category, genres in categories.items():
    matching_songs = [s for s in songs_info_enhanced.values() if s['genre'] in genres]
    print(f"  {category}: {len(matching_songs)} songs")
    for song in matching_songs:
        print(f"    - {song['name']} ({song['genre']})")

print(f"\n🎯 READY FOR ADVANCED ANALYSES:")
print(f"• Genre classification from EEG")
print(f"• Cross-cultural music perception")
print(f"• Individual vs. group music preferences")
print(f"• Temporal dynamics of music listening")
print(f"• Familiarity vs. enjoyment neural correlates")

print(f"\n💡 NEXT STEPS:")
print(f"# Load complete dataset:")
print(f"# all_data = complete_dataset.load_complete_dataset()")
print(f"# ")
print(f"# Access specific data:")
print(f"# subject_001_data = complete_dataset.get_subject_data('001')")
print(f"# classical_song_data = complete_dataset.get_song_data_across_subjects('07')")
print(f"# ")
print(f"# Access song metadata from Raw object:")
print(f"# raw = subject_001_data['01']['raw']")
print(f"# metadata = complete_dataset.get_song_metadata(raw)")
print(f"# print(metadata['genre'], metadata['enjoyment_rating'])")

🔍 DEMONSTRATION: Loading subset of MUSIN-G dataset
🔄 Loading complete MUSIN-G dataset...
📦 Subjects to load: 3 of 20

Subject 001 (1/3):
  ✓ Song 01: Trip to the lonely planet (Deep House) | Enjoy: None/5, Familiar: None/5
  ✓ Song 01: Trip to the lonely planet (Deep House) | Enjoy: None/5, Familiar: None/5
  ✓ Song 02: Sail (Indie) | Enjoy: None/5, Familiar: None/5
  ✓ Song 02: Sail (Indie) | Enjoy: None/5, Familiar: None/5
  ✓ Song 03: Concept 15 (Electronics) | Enjoy: None/5, Familiar: None/5
  ✓ Song 03: Concept 15 (Electronics) | Enjoy: None/5, Familiar: None/5
  ✓ Song 04: Aurore (New Age) | Enjoy: None/5, Familiar: None/5
  ✓ Song 05: Proof (Electronic Dance) | Enjoy: None/5, Familiar: None/5
  ✓ Song 04: Aurore (New Age) | Enjoy: None/5, Familiar: None/5
  ✓ Song 05: Proof (Electronic Dance) | Enjoy: None/5, Familiar: None/5
  ✓ Song 06: Glider (Ambient) | Enjoy: None/5, Familiar: None/5
  ✓ Song 07: Raag Bihag (Hindustani Classical) | Enjoy: None/5, Familiar: None/5
  ✓ Song 0