In [None]:
import mne
%pip install mne_bids
%pip install --upgrade mne_bids

from mne_bids import BIDSPath, print_dir_tree, write_raw_bids, make_dataset_description, update_sidecar_json

In [None]:
#Allows us to work with file paths
import os.path as op
#Makes file path handling simpler
from pathlib import Path

In [None]:
#Change the file path to your data's location
data_dir = Path(r"C:\N8_internship_code\second_tester_data\source_data")
print_dir_tree(data_dir, max_depth=1)

In [None]:
children = [child for child in data_dir.iterdir()]
children

In [None]:
#Change this to match the file number
dir_number = 0
files = [file for file in children[dir_number].iterdir()]

In [None]:
file_path = files[0]
file_path

NameError: name 'files' is not defined

In [None]:
data = mne.io.read_raw_eeglab(file_path)

In [None]:
data

In [None]:
from datetime import datetime, timezone
from dateutil.relativedelta import relativedelta
from mne.transforms import Transform

data.info["device_info"] = {
    "type": "Emotiv Epoc X",
    "model": "12-channel EEG",
    "serial": 33456423,
    "site": "yes"
    }



# Generates the (approx) birthdate of the participant based on the measurement date and age
data.set_meas_date(datetime(2015, 6, 7, tzinfo= timezone.utc))
recording_date = data.info["meas_date"]
Birthdate = recording_date - relativedelta(years=30)


data.info["subject_info"] = {
    "id": 1,
    "his_id": "sub-001",
    "last_name": "Doe",
    "first_name": "John",
    "middle_name": "A",
    "birthday": Birthdate,
    "sex": 2,
    "hand": 1,
    "weight": 70.0,
    "height": 175.0,
}

data.info["line_freq"] = 50

data.info["bads"] = ["C3", "F3" ]

data.info["description"] = "a resting state dataset"

data.info["dev_head_t"] = Transform("meg", "head")

data.info["experimenter"] = "John Doe"

data.info["helium_info"] = {
    "he_level_raw": 20,
    "helium_level": 12,
    "orig_file_guid": "1234567890abcdef",
    "meas_date": (datetime(2015, 6, 7, tzinfo= timezone.utc))
    }


In [None]:
data

In [None]:
bids_root = op.join(data_dir.parent, "bids_example")
bids_root

In [None]:
mne.channels.get_builtin_montages()

In [None]:
builtin_montages = mne.channels.get_builtin_montages(descriptions=True)
for montage_name, montage_description in builtin_montages:
    print(f"{montage_name}: {montage_description}")

In [None]:
import mne

my_montage = mne.channels.make_standard_montage("biosemi64")

# Printing montage information
print(my_montage)

# Viualising montage in 2D
my_montage.plot()

# Visualising montage in 3D
fig = my_montage.plot(kind="3d", show=False)  # 3D
fig = fig.gca().view_init(azim=70, elev=15)  # set view angle

In [None]:
data.set_montage(my_montage, match_case=True, match_alias=False, on_missing='ignore', verbose=None)

In [None]:
#Edit this information ot match your data
subject_id = "001"
task = "rest"

bids_path = BIDSPath(subject=subject_id, task=task, root=bids_root)
write_raw_bids(data, bids_path, overwrite=True, allow_preload=True, format="EEGLAB")

In [None]:
#Edit this information ot match your data
subject_id = "001"
task = "rest"

bids_path = BIDSPath(subject=subject_id, task=task, root=bids_root)
write_raw_bids(data, bids_path, overwrite=True, allow_preload=True, format="EEGLAB")

In [None]:
#creating a dataset description JSON file (separate to other data entry types) [for people who want a better description of their dataset than previously provided] 
# #{will overwrite any existing dataset_description.json file in the root of the BIDS directory}
make_dataset_description(
    path=bids_root,
    name="EEGManyLabs Resting State Study", 
    hed_version="1",
    dataset_type='raw',
    data_license="CCO",
    authors=["Ariana Williams", "Daniel Brady"],
    acknowledgements=None,
    how_to_acknowledge="Cite (Williams et al., 2025) when using this dataset",
    funding=["The NHS", "The Uk government"],
    ethics_approvals="Ethical approval was granted by the University of Leeds School of Psychology Ethics committee (12345 2025)",
    references_and_links="https://mne.tools/mne-bids/stable/whats_new_previous_releases.html",
    doi="doi:https://doi.org/10.1016/j.tins.2017.02.004",
    generated_by=[
        {
            "Name": "MNE-BIDS",
            "Version": "0.14",
            "Description": "Used to convert MEG data into BIDS format."
        },
        {
            "Name": "MNE-Python",
            "Version": "1.6.1",
            "Description": "Used for MEG preprocessing and analysis."
        }
    ],
    source_datasets=[
        {
            "URL": "https://example.com/source_dataset",
            "DOI": "10.1234/example.doi",
        }],
            overwrite=True,
            verbose=True)

----

# SEPARATE

### iterating code
removed all visualisations, should be able to be run in one go

The following block of code is intended to be run in one go. It uses the same code as explained earlier in the pipeline, but has no visualisations and has some added code to allow it to format all of your participant data into BIDS formatting automatically, with minimal input.

You should edit:
- The file path for the 'data_dir' variable -> the file path for the folder containing your entire dataset (in single quotation marks)
- The file path in the df variable -> the file path for your excel spreadsheet (in single quotation marks)
- The inputs for each of the 'raw.info' items (excluding subject_info) -> by editing the second set of double quotation marks or swapping out the integer (either after the colon or equals symbol)

Due to the differences in inputs for the 'subject_info' entry between participants, in order to automatically input the data, we must use a different method. This entails a spreadsheet, in which you must input entries for all of the subject_info variables (in columns) for every participant (one per row) in order (starting with the lowest participant id). The process also requires a 'for' loop, which will input (preset or spreadsheet) information to the 'info' variable and create a BIDS formatted dataset for each participant's files.

The required variables are:
- his_id - The string subject identifier
- last_name - The participant's last name
- first_name - The participant's first name
- middle_name - The participants middle name
- birthday - The participant's birthday (YYYY/M/D)
- sex - The biological sex of the participant (0 = unknown, 1 = male, 2 = female)
- hand - Whether the participant is right handed (1), left handed (2) or ambidextrous (3)
- weight - Weight in kilograms
- height - Height in meters

A sample version of an excel sheet formatted this way can be downloaded [here]. 

this should be done for each of the tasks present (e.g. one run through for 'rest' and one for 'video')

most of these columns will map directly onto the inputs for subject_info, however, we have chosen to collect 'age' instead of 'birthday', as it is more commonly generated, and requires an additional section of code to transform it into 'birthday', the accepted format for this 'info' section. If your datasets use 'birthday' instead of 'age', you must simply change the column name, along with the in-code input (in single quotation marks) for the age variable. You may also change the variable name but this is not necessary; the variable name must also be changed in the input section for the 'subject_info' variable, "birthday".


any variables that you have no information for can be labelled n/a (not really true)

In [None]:

# Imports
import mne
%pip install mne_bids
%pip install --upgrade mne_bids

from mne_bids import BIDSPath, write_raw_bids
import os.path as op
from pathlib import Path

import pandas as pd

from datetime import datetime, timezone
from dateutil.relativedelta import relativedelta
from mne.transforms import Transform

# Setting overall file location [edit this]
data_dir = Path(r'c:\N8_internship_code\Motor_Imaging_Dataset')

# Specifying the files
children = [child for child in data_dir.iterdir()]
dir_number = 5
files = [file for file in children[dir_number].iterdir()]
file_path = files[1]

# Reading the files
raw = mne.io.read_raw_edf(file_path)

# Load your Excel data into a dataframe [edit this]
df = pd.read_excel(r'c:\N8_internship_code\N8 tester sheet.xlsx')

# Making a list of (only) participant folders from the 'location' file path
folders = [p_folder for p_folder in data_dir.iterdir() if p_folder.is_dir()]


# Looping through every participant folder and its index in the folders list, using enumerate
for index, participant_folder in enumerate(folders):

    # Setting contents of excel columns (at the current index row) to variables
    his_id = df.loc[index, 'his_id']    
    last_name = df.loc[index, 'last_name']
    first_name = df.loc[index, 'first_name']
    middle_name = df.loc[index, 'middle_name']
    age = df.loc[index, 'age']
    sex = df.loc[index, 'sex']
    hand = df.loc[index, 'hand']
    weight = df.loc[index, 'weight']
    height = df.loc[index, 'height']

    # Sets the variable 'years' to 'age' from the imported excel sheet
    years = int(age)

    # Generates the (approximate) birthdate of the participant based on the measurement date and age
    raw.set_meas_date(datetime(2015, 6, 7, tzinfo= timezone.utc))
    recording_date = raw.info["meas_date"]
    birthdate = recording_date - relativedelta(years=years)

    # Setting the participant's metadata using the excel sheet (different for every participant)
    raw.info["subject_info"] = {
        "id": index,
        "his_id": str(his_id),
        "last_name": str(last_name),
        "first_name": first_name,
        "middle_name": middle_name,
        "birthday": birthdate,
        "sex": int(sex),
        "hand": int(hand),
        "weight": weight,
        "height": height,
    }
    # Setting the dataset's metadata manually (inputs will often be the same for all participants)
    raw.info["device_info"] = {
        "type": "EEG",
        "model": "12-channel EEG",
        "serial": 33456423
        }

    raw.info["line_freq"] = 50

    raw.info["description"] = "a resting state dataset"

    raw.info["dev_head_t"] = Transform("meg", "head")

    raw.info["experimenter"] = "John Doe"

    my_montage = mne.channels.make_standard_montage("biosemi64")
    

    # Creating the output file location for the dataset
    bids_root = op.join(data_dir.parent, "iterate_test")
    
    # Collecting the file name (also participant id) from the file
    file_name = participant_folder.name

    # Setting participant id, task name should be inputted manually
    subject_id = file_name
    task = "task1"

    # Writing the BIDS dataset
    bids_path = BIDSPath(subject=subject_id, task=task, root=bids_root)
    write_raw_bids(raw, bids_path, overwrite=True, allow_preload=True, format="EDF")


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Extracting EDF parameters from c:\N8_internship_code\Motor_Imaging_Dataset\S001\S001R01.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Writing 'c:\N8_internship_code\iterate_test\participants.tsv'...
Writing 'c:\N8_internship_code\iterate_test\participants.json'...
The provided raw data contains annotations, but you did not pass an "event_id" mapping from annotation descriptions to event codes. We will generate arbitrary event codes. To specify custom event codes, please pass "event_id".
Used Annotations descriptions: ['T0']
Writing 'c:\N8_internship_code\iterate_test\sub-S001\eeg\sub-S001_task-task1_events.tsv'...
Writing 'c:\N8_internship_code\iterate_test\sub-S001\eeg\sub-S001_task-task1_events.json'...
Writing 'c:\N8_internship_code\iterate_test\dataset_description.json'...
Writing 'c:\N8_internship_code\itera

  write_raw_bids(raw, bids_path, overwrite=True, allow_preload=True, format="EDF")
  write_raw_bids(raw, bids_path, overwrite=True, allow_preload=True, format="EDF")
  write_raw_bids(raw, bids_path, overwrite=True, allow_preload=True, format="EDF")
  write_raw_bids(raw, bids_path, overwrite=True, allow_preload=True, format="EDF")
  write_raw_bids(raw, bids_path, overwrite=True, allow_preload=True, format="EDF")


TypeError: subject_info["last_name"] must be an instance of str, got <class 'float'> instead.

In [1]:
print(birthdate)

NameError: name 'birthdate' is not defined

In [None]:
data.info["bads"] = ["", ""]