An initial attempt at NWB conversion for NeuraLynx data following "manual" conversion described in https://pynwb.readthedocs.io/en/stable/tutorials/domain/ecephys.html .  Unlike the example(s) there I (Yarik) was trying to identify levels of data and metadata to consider, and also to store them across multiple .nwb files. 

In [10]:
import numpy as np
import pynwb
from datetime import datetime
from dateutil.tz import tzlocal
from pynwb import NWBFile

import neo

In [21]:
session_data = '../data/BiconditionalOdor/M040-2020-04-28-CDOD11'

In [2]:
# Common lab wide metadata
lab_metadata = dict(
    lab="MVDMLab",
    institution="Dartmouth College",
    keywords=["DANDI Pilot"], # arbitrary, so let's promote!
)
# Experiment specific one
experiment_metadata = dict(
    experimenter="Jimmie Gmaz <jim.gmaz@gmail.com>",  # Let's see if nwb can swallow such a record ;)
    experiment_description="Whatever Jimmi did and why",
)


In [15]:
# create a reader
reader = neo.io.NeuralynxIO(dirname=session_data) # TODO: newer version should support: , keep_original_times=True)
reader.parse_header()
print(reader)

NeuralynxIO: ../data/BiconditionalOdor/M040-2020-04-28-CDOD11/
nb_block: 1
nb_segment:  [7]
signal_channels: [CSC1, CSC3, CSC8]
unit_channels: [chTT1#26#0, chTT1#10#0, chTT1#29#0, chTT1#30#0]
event_channels: [Events event_id=11 ttl=0, Events event_id=11 ttl=1, Events event_id=11 ttl=2, Events event_id=11 ttl=4 ... Events event_id=11 ttl=48 Events event_id=11 ttl=64 Events event_id=11 ttl=96 Events event_id=19 ttl=0]



In [30]:
import os.path as op
import re
filename_metadata = re.match(
    '(?P<subject_id>[A-Za-z0-9]*)-(?P<date>20..-..-..)-(?P<task>[A-Za-z]*)(?P<day_of_recording>[0-9]*)$',
    op.basename(session_data)).groupdict()
assert filename_metadata
filename_metadata

{'subject_id': 'M040',
 'date': '2020-04-28',
 'task': 'CDOD',
 'day_of_recording': '11'}

In [36]:
import time

In [38]:
# Those time stamps are in sub-second and not the one we would want to the "session time"
# time.gmtime(reader.get_event_timestamps()[0][0])
# TODO: figure out where in this 

In [None]:
# TODO: figure out what those timestamps in.

In [31]:
# Metadata which is likely to come from data files and "promotion" metadata records

# Most likely many could be parsed from the filenames which are likely to encode some of it
# So "heuristical" converter could establish metadata harvesting from the filenames

#
# Session specific
session_metadata = dict(
    session_id="%(subject_id)s-%(date)s" % filename_metadata,
    session_description="Extracellular ephys recording in .... TODO ... ",  # args[0] in nwbfile
    session_start_time=datetime.now(tzlocal()), # TEMP  # args[2] in nwbfile; TODO needs to be datetime
)
subject_metadata = dict(
    subject_id="TODO",
    weight="TODO",
    age="TODO",  # duplicate with session_start_time and date_of_birth but why not?
    species="Mus musculus",
    sex="TODO",
    date_of_birth=datetime.now(tzlocal()), # TEMP: TODO
)
surgery_metadata = dict(
    surgery="TODO: Narrative description about surgery/surgeries, including date(s) and who performed surgery."
)
# Actually probably only "identifier" should be file specific, the rest common across files
# we would like to produce: separate for .ncs, .ntt, behavioral metadata, etc
file_metadata = dict(
    source_script="somescript-not-clear-whyneeds to be not empty if file_name is provided",
    source_script_file_name="TODO", # __file__,
)

# common filename prefix - let's mimic DANDI filenaming convention right away
filename_prefix = "sub-{subject_id}_ses-{session_id}".format(**subject_metadata, **session_metadata)
# the rest will be specific to the corresponding file. E.g. we will have separate
#  - `_probe-<name>_ecephys.nwb` (from each .ncs) - contineous data from each tetrode. probably chunked and compressed
#  - `_???_ecephys.nwb` (from each .ntt) - spike detected windowed data. 
#  - `_behav.mpg` + `_behav.nwb` - video recording and metadata (including those .png?) for behavior component within experiment recording session
# Pretty much we need to establish a framework where EVERY file present would be
# provided

In [4]:
subject_metadata

{'subject_id': 'TODO',
 'weight': 'TODO',
 'age': 'TODO',
 'species': 'Mus musculus',
 'sex': 'TODO',
 'date_of_birth': datetime.datetime(2020, 6, 22, 16, 39, 41, 154839, tzinfo=tzlocal())}

Code below would need to follow common pattern 
- create a new NWBFile with common metadata,
- populate with relevant data and metadata
- save

In [5]:
# Such NWBFile will be created for each separate file, and then fill up with the corresponding
#
filename_suffix = "TODO"
nwbfile = NWBFile(
    identifier="{}_{}".format(filename_prefix, filename_suffix), # args[1] in nwbfile, may be just UUID? not sure why user has to provide it really
    subject=pynwb.file.Subject(**subject_metadata),
    **lab_metadata,
    **experiment_metadata,
    **session_metadata,
    **surgery_metadata,
    **file_metadata,
)

In [6]:
print(nwbfile.identifier)

sub-TODO_ses-TODO_TODO


In [7]:
# copy/pasted example - yet to be tuned for data at hands.
# Most likely we would want to establish Neo-based "sweep and convert".
# but somehow need to ensure that we had not forgotten any piece of data/file.
device = nwbfile.create_device(name='trodes_rig123')

electrode_name = 'tetrode1'
description = "an example tetrode"
location = "somewhere in the hippocampus"

electrode_group = nwbfile.create_electrode_group(electrode_name,
                                                 description=description,
                                                 location=location,
                                                 device=device)

In [8]:
#
# Save the generated file
from pynwb import NWBHDF5IO

# TODO: I think we should right away use dandi-cli provided API to create the filename based on metadata
# in the NWBFile
with NWBHDF5IO('ecephys_example.nwb', 'w') as io:
    io.write(nwbfile)