An initial attempt at NWB conversion for NeuraLynx data following "manual" conversion described in https://pynwb.readthedocs.io/en/stable/tutorials/domain/ecephys.html .  Unlike the example(s) there I (Yarik) was trying to identify levels of data and metadata to consider, and also to store them across multiple .nwb files. 

In [1]:
import numpy as np
import pynwb
from datetime import datetime
from dateutil.tz import tzlocal
from pynwb import NWBFile

import neo

In [9]:
session_data = '../data/BiconditionalOdor/M040-2020-04-28-CDOD11'

In [3]:
# Common lab wide metadata
lab_metadata = dict(
    lab="MVDMLab",
    institution="Dartmouth College",
    keywords=["DANDI Pilot"], # arbitrary, so let's promote!
)
# Experiment specific one
experiment_metadata = dict(
    experimenter="Jimmie Gmaz <jim.gmaz@gmail.com>",  # Let's see if nwb can swallow such a record ;)
    experiment_description="Contextual odor discrimination task",
)


In [10]:
# create a reader
reader = neo.io.NeuralynxIO(dirname=session_data) # TODO: newer version should support: , keep_original_times=True)
reader.parse_header()
print(reader)

NeuralynxIO: /Users/jimmiegmaz/Desktop/M040-2020-04-28-CDOD11
nb_block: 1
nb_segment:  [1]
signal_channels: []
unit_channels: [chTT2#27#0, chTT2#28#0, chTT2#11#0, chTT2#12#0, chTT4#31#0, chTT4#15#0, chTT4#9#0, chTT4#25#0]
event_channels: [Events event_id=11 ttl=0, Events event_id=11 ttl=1, Events event_id=11 ttl=2, Events event_id=11 ttl=4 ... Events event_id=11 ttl=48 Events event_id=11 ttl=64 Events event_id=11 ttl=96 Events event_id=19 ttl=0]



In [11]:
import os.path as op
import re
filename_metadata = re.match(
    '(?P<subject_id>[A-Za-z0-9]*)-(?P<date>20..-..-..)-(?P<task>[A-Za-z]*)(?P<day_of_recording>[0-9]*)$',
    op.basename(session_data)).groupdict()
assert filename_metadata
filename_metadata

{'date': '2020-04-28',
 'day_of_recording': '11',
 'subject_id': 'M040',
 'task': 'CDOD'}

In [12]:
import time

In [38]:
# Those time stamps are in sub-second and not the one we would want to the "session time"
# time.gmtime(reader.get_event_timestamps()[0][0])
# TODO: figure out where in this 

In [None]:
# TODO: figure out what those timestamps in.

In [13]:
# Scans through Experimental Keys to extract relevant metadata for NWB file

# name of ExpKeys file
keys_name = session_data + '/'  + filename_metadata['subject_id'] + '_' + filename_metadata['date'].replace('-','_') + '_keys.m'

# read session ExpKeys
with open (keys_name, 'rt') as keys_file:
    exp_keys = keys_file.read()

# list of metadata to extract
metadata_list = ['ExpKeys.species','ExpKeys.hemisphere','ExpKeys.weight','ExpKeys.probeDepth','ExpKeys.target']

# initialize metadata dictionary
metadata_keys = dict.fromkeys(metadata_list)

# extract metadata
for item in exp_keys.split("\n"):
    for field in metadata_list:
        if field in item:
            metadata_keys[field] = re.search('(?<=\=)(.*?)(?=\;)', item).group(0).strip() 
            metadata_keys[field] = re.sub('[^A-Za-z0-9]+', '', metadata_keys[field])
            print(metadata_keys[field])
            
# TODO: add surgery details to ExpKeys, including AP and ML coordinates, change probeDepth to mm,
# add filtering, individual tetrode depth, tetrode referencing

Mouse
left
vStr
4200



In [18]:
# Metadata which is likely to come from data files and "promotion" metadata records

# Most likely many could be parsed from the filenames which are likely to encode some of it
# So "heuristical" converter could establish metadata harvesting from the filenames

#
# Session specific
session_metadata = dict(
    session_id="%(subject_id)s-%(date)s" % filename_metadata,
    session_description="Extracellular ephys recording in the left hemisphere of the nucleus accumbens",  # args[0] in nwbfile
    session_start_time=datetime.now(tzlocal()), # TEMP  # args[2] in nwbfile; TODO needs to be datetime
)
subject_metadata = dict(
    subject_id=filename_metadata['subject_id'],
    weight=metadata_keys['ExpKeys.weight'],
    age="TODO",  # duplicate with session_start_time and date_of_birth but why not?
    species=metadata_keys['ExpKeys.species'],
    sex="female",
#     hemisphere=metadata_keys['ExpKeys.hemisphere'],
#     depth=metadata_keys['ExpKeys.probeDepth'],
#     region=metadata_keys['ExpKeys.target'],
    date_of_birth=datetime.now(tzlocal()), # TEMP: TODO
)
surgery_metadata = dict(
    surgery="Headbar on xx/xx/2020, craniotomy over right hemisphere on xx/xx/2020, craniotomy over left hemisphere on xx/xx/2020. All surgeries performed by JG."
)
# Actually probably only "identifier" should be file specific, the rest common across files
# we would like to produce: separate for .ncs, .ntt, behavioral metadata, etc
file_metadata = dict(
    source_script="somescript-not-clear-whyneeds to be not empty if file_name is provided",
    source_script_file_name="TODO", # __file__,
)

# common filename prefix - let's mimic DANDI filenaming convention right away
filename_prefix = "sub-{subject_id}_ses-{session_id}".format(**subject_metadata, **session_metadata)
# the rest will be specific to the corresponding file. E.g. we will have separate
#  - `_probe-<name>_ecephys.nwb` (from each .ncs) - contineous data from each tetrode. probably chunked and compressed
#  - `_???_ecephys.nwb` (from each .ntt) - spike detected windowed data. 
#  - `_behav.mpg` + `_behav.nwb` - video recording and metadata (including those .png?) for behavior component within experiment recording session
# Pretty much we need to establish a framework where EVERY file present would be
# provided

In [19]:
subject_metadata

{'age': 'TODO',
 'date_of_birth': datetime.datetime(2020, 8, 19, 7, 50, 52, 264817, tzinfo=tzlocal()),
 'sex': 'female',
 'species': 'Mouse',
 'subject_id': 'M040',
 'weight': ''}

Code below would need to follow common pattern 
- create a new NWBFile with common metadata,
- populate with relevant data and metadata
- save

In [20]:
# Such NWBFile will be created for each separate file, and then fill up with the corresponding
#
filename_suffix = "TODO"
nwbfile = NWBFile(
    identifier="{}_{}".format(filename_prefix, filename_suffix), # args[1] in nwbfile, may be just UUID? not sure why user has to provide it really
    subject=pynwb.file.Subject(**subject_metadata),
    **lab_metadata,
    **experiment_metadata,
    **session_metadata,
    **surgery_metadata,
    **file_metadata,
)

In [21]:
print(nwbfile.identifier)

sub-M040_ses-M040-2020-04-28_TODO


In [None]:
# add electrode metadata
# create probe device
device = nwbfile.create_device(name='silicon probe', description='A4x2-tet-5mm-150-200-121', manufacturer='NeuroNexus')

# for each channel on the probe
for chl in reader.header['unit_channels']:
    
    # get tetrode id
    tetrode = re.search('(?<=TT)(.*?)(?=#)', chl[0]).group(0)
    electrode_name = 'tetrode' + tetrode
    
    # get channel id
    channel = re.search('(?<=#)(.*?)(?=#)', chl[0]).group(0)
           
    if electrode_name not in nwbfile.electrode_groups: # make tetrode if does not exist
    
        description = electrode_name
        location = metadata_keys['ExpKeys.hemisphere'] + ' ' + metadata_keys['ExpKeys.target'] + ' ' + \
            '(' + metadata_keys['ExpKeys.probeDepth'] + ' um)'

        electrode_group = nwbfile.create_electrode_group(electrode_name,
                                                         description=description,
                                                         location=location,
                                                         device=device)
        
    # add channel to tetrode
    nwbfile.add_electrode(id=int(channel),
                          x=-1.2, y=float(metadata_keys['ExpKeys.probeDepth']), z=-1.5,
                          location=metadata_keys['ExpKeys.target'], filtering='none',
                          imp = 0.0, group=nwbfile.electrode_groups[electrode_name])

In [None]:
# add electrode data
# copy and pasted example
electrode_table_region = nwbfile.create_electrode_table_region([0, 2], 'the first and third electrodes')

from pynwb.ecephys import ElectricalSeries

rate = 10.0
np.random.seed(1234)
data_len = 1000
ephys_data = np.random.rand(data_len * 2).reshape((data_len, 2))
ephys_timestamps = np.arange(data_len) / rate

ephys_ts = ElectricalSeries('test_ephys_data',
                            ephys_data,
                            electrode_table_region,
                            timestamps=ephys_timestamps,
                            # Alternatively, could specify starting_time and rate as follows
                            # starting_time=ephys_timestamps[0],
                            # rate=rate,
                            resolution=0.001,
                            comments="This data was randomly generated with numpy, using 1234 as the seed",
                            description="Random numbers generated with numpy.random.rand")
nwbfile.add_acquisition(ephys_ts)

nwbfile.add_unit(id=1, electrodes=[0])
nwbfile.add_unit(id=2, electrodes=[0])

In [114]:
# Save the generated file
from pynwb import NWBHDF5IO

# TODO: I think we should right away use dandi-cli provided API to create the filename based on metadata
# in the NWBFile
with NWBHDF5IO('ecephys_example.nwb', 'w') as io:
    io.write(nwbfile)