In [1]:
# CREATED: 17-NOV-2022
# LAST EDIT: 12-APR-2023
# AUTHOR: DUANE RINEHART, MBA (drinehart@ucsd.edu)

# IMPLEMENTS CONVERSION OF EXTRACELLULAR ELECTROPHYSIOLOGY DATASETS TO NEURODATA WITHOUT BORDERS (NWB) nbformat
# REQUIREMENT BY GRANT TO UPLOAD DATA IN OPEN FORMAT TO PUBIC-ACCESSIBLE REPOSITORY

import os, math, time, pynwb
from pathlib import Path, PurePath, PureWindowsPath
from datetime import datetime, timedelta
from dateutil.tz import tzlocal
import pandas as pd
import pandas as pd

from ConvertIntanToNWB import convert_to_nwb

#################################################################
# APP CONSTANTS
excel_file = 'input_ephys.xlsx'
base_path = Path('/mnt/e/temp/Devor-gdrive/')
output_path = Path('/mnt/e/temp/Devor-gdrive/output/')
infile = Path(base_path, excel_file)
debug = True
experimenter = 'Sang Heon Lee , Shadi Dayeh'
institution = 'UC San Diego'
#################################################################

def load_data():
    '''Used for meta-data loading'''
    lstNWBFields = ['session_id',
                    'subject_id',
                    'age',
                    'subject_description',
                    'genotype',
                    'sex',
                    'species',
                    'subject_weight',
                    'subject_strain',
                    'date_of_birth(YYYY-MM-DD)',
                    'session_description',
                    'src_folder_directory',
                    'stimulus_notes_include',
                    'stimulus_notes_paradigm',
                    'stimulus_notes_direct_electrical_stimulation',
                    'stimulus_notes_direct_electrical_stimulation_paradigm',
                    'pharmacology_notes_anesthetized_during_recording',
                    'pharmacology',
                    'electrode_device_name',
                    'electrode_recordings',
                    'electrode_recordings_type',
                    'electrode_recordings_contact_material',
                    'electrode_recordings_substrate',
                    'electrode_recordings_system'
                    'electrode_recordings_location',
                    'electrode_filtering'
                    'identifier'] #headers I need

    lstExtractionFields = pd.read_excel(infile, sheet_name="auto", usecols=lstNWBFields) #just extract columns/fields I need
    return lstExtractionFields

def get_measurements_data(src_folder_directory, electrode_recordings_file, electrode_device_name, electrode_recordings_type, electrode_recordings_contact_material, electrode_recordings_substrate, electrode_recordings_system, electrode_recordings_location):
    '''Used for electrode measurements table processing'''

    base_directory = PureWindowsPath(src_folder_directory).parts[:-1] #remove last part of path
    input_filename = Path(output_path, *base_directory, electrode_recordings_file)
    input_map = pd.read_excel(input_filename)

    electrode_mappings = list(zip(input_map['device_channel'], input_map['intan_channel']))
    return electrode_mappings


def get_subject(age, subject_description, genotype, sex, species, subject_id, subject_weight, date_of_birth, subject_strain):
    '''Used for meta-data '''
    if isinstance(age, str) != True:
        try:
            subject_age = "P" + str(int(age)) + "D" #ISO 8601 Duration format - assumes 'days'
        except:
            subject_age = "P0D" #generic

    dob = date_of_birth.to_pydatetime() #convert pandas timestamp to python datetime format
    if isinstance(dob.year, int) and isinstance(dob.month, int) and isinstance(dob.day, int) == True:
        date_of_birth = datetime(dob.year, dob.month, dob.day, tzinfo=tzlocal())
    else:
        date_of_birth = None

    subject = pynwb.file.Subject(age=subject_age,
                             description=subject_description,
                             genotype=str(genotype),
                             sex=sex,
                             species=species,
                             subject_id=subject_id,
                             weight=subject_weight,
                             date_of_birth=date_of_birth,
                             strain=subject_strain
                            )
    return subject


def main():
    lstRecords = load_data().to_dict('records') #creates list of dictionaries

    for cnt, dataset in enumerate(lstRecords):
        print(f"PROCESSING DATASET #{cnt+1}")
        print(f"\tsession_id: {dataset['session_id']}")

        age = dataset['age']
        subject_description = dataset['subject_description']
        genotype = dataset['genotype']
        if dataset['sex'] == 'Male':
            sex = 'M'
        elif dataset['sex'] == 'Female':
            sex = 'F'
        else:
            sex = 'U' #unknown
        species = dataset['species']
        subject_id = dataset['subject_id']
        subject_weight = dataset['subject_weight']
        date_of_birth = dataset['date_of_birth(YYYY-MM-DD)']
        subject_strain = dataset['subject_strain']

        #CONCATENATE STIMULUS NOTES
        stimulus_notes = 'NA'
        if dataset['stimulus_notes_include'] == 1: #1 (include) or 2 (do not include)
            stimulus_notes = "Stimulus paradigm: " + str(dataset['stimulus_notes_paradigm']) + "; "
            if dataset['stimulus_notes_direct_electrical_stimulation'] == 1:
                stimulus_notes += "Direct electrical stimulation paradigm: " + str(dataset['stimulus_notes_direct_electrical_stimulation_paradigm']) + "; "
        ##################################################################################
        # CREATE EXPERIMENTAL SUBJECT OBJECT
        subject = get_subject(age,
                              subject_description,
                              genotype,
                              sex,
                              species,
                              subject_id,
                              subject_weight,
                              date_of_birth,
                              subject_strain)
        ##################################################################################

        ##################################################################################
        output_filename = None
        session_id = dataset['session_id']
        filename = Path(session_id) #wrong extension; replace with 'nwb'
        output_filename = filename.with_suffix('.nwb')
        dest_path = str(PurePath('/mnt/e/temp/Devor-gdrive/output/', output_filename)) #path must be string for Itan converter
        print(f'\tDESTINATION FILE: {dest_path}')

        src_path_supplement = PureWindowsPath(dataset['src_folder_directory'])

        input_filename = str(PurePath('/mnt/e/temp/Devor-gdrive/', src_path_supplement, session_id))
        print(f'\tINPUT FILE: {input_filename}')
        ##################################################################################

        ##################################################################################
        # CREATE/CONVERT ELECTRODES TABLE(S) OBJECT
        electrode_recordings_file = 'electrode_mappings.xlsx'
        electrode_mappings = get_measurements_data(dataset['src_folder_directory'], electrode_recordings_file, dataset['electrode_device_name'], dataset['electrode_recordings_type'], dataset['electrode_recordings_contact_material'], dataset['electrode_recordings_substrate'], dataset['electrode_recordings_system'], dataset['electrode_recordings_location'])

        electrode_recordings_description = 'Type: ' + str(dataset['electrode_recordings_type']) + '; Contact material: ' + str(dataset['electrode_recordings_contact_material']) + '; Substrate: ' + str(dataset['electrode_recordings_substrate'])

        electrode_headers = {'electrode_device_name': dataset['electrode_device_name'], 'electrode_recordings_description': electrode_recordings_description, 'electrode_recordings_system': dataset['electrode_recordings_system'], 'electrode_recordings_location': dataset['electrode_recordings_location'], 'electrode_filtering': dataset['electrode_filtering']}

        ##################################################################################

        ##################################################################################
        # PROCESS META-DATA
        session_description = dataset['session_description']
        pharmacology = None #NEED DESTINATION FIELD FOR NWB FILE
        if dataset['pharmacology_notes_anesthetized_during_recording'] == 1:
            pharmacology = dataset['pharmacology']

        surgery = None #NEED DESTINATION FIELD FOR NWB FILE
        manual_start_time = None #NEED DESTINATION FIELD FOR NWB FILE
        exp_identifier = dataset['identifier']
        ##################################################################################

        if os.path.isfile(dest_path) != True: #file conversion completed
            convert_to_nwb(intan_filename=input_filename,
                       nwb_filename=dest_path,
                       session_description=session_description,
                       blocks_per_chunk=1000,
                       use_compression=True,
                       compression_level=4,
                       lowpass_description='Unknown lowpass filtering process',
                       highpass_description='Unknown lowpass filtering process',
                       merge_files=False,
                       subject=subject,
                       surgery=surgery,
                       stimulus_notes=stimulus_notes,
                       pharmacology=pharmacology,
                       manual_start_time=manual_start_time,
                       exp_identifier=exp_identifier,
                       electrode_mappings=electrode_mappings,
                       experimenter=experimenter,
                       institution=institution,
                       electrode_headers=electrode_headers)
        ##################################################################################


if __name__ == "__main__":
    main()

PROCESSING DATASET #1
	session_id: san4_run06_optogen_N30_400au_5ms_ISI5s_centerofarray_awake_191107_150118.rhd
	DESTINATION FILE: /mnt/e/temp/Devor-gdrive/output/san4_run06_optogen_N30_400au_5ms_ISI5s_centerofarray_awake_191107_150118.nwb
	INPUT FILE: /mnt/e/temp/Devor-gdrive/20191017_Chronic implant_SL2701_San4/20191107_San4_day21/ephys/san4_run06_optogen_N30_400au_5ms_ISI5s_centerofarray_awake_191107_150118.rhd
PROCESSING DATASET #2
	session_id: san4_run05_optogen_N30_400au_5ms_ISI5s_centerofarray_awake_191113_154743.rhd
	DESTINATION FILE: /mnt/e/temp/Devor-gdrive/output/san4_run05_optogen_N30_400au_5ms_ISI5s_centerofarray_awake_191113_154743.nwb
	INPUT FILE: /mnt/e/temp/Devor-gdrive/20191017_Chronic implant_SL2701_San4/20191113_San4_day27/ephys/san4_run05_optogen_N30_400au_5ms_ISI5s_centerofarray_awake_191113_154743.rhd
PROCESSING DATASET #3
	session_id: san4_run06_optogen_N30_400au_5ms_ISI5s_centerofarray_awake_191127_114535.rhd
	DESTINATION FILE: /mnt/e/temp/Devor-gdrive/output/s