## Notebook for spike sorting from SGL data using Kilosort
Uses:
    - intan2kwik (https://github.com/zekearneodo/intan2kwik/blob/master/README.md)
    - mountainlab suite(https://github.com/flatironinstitute/mountainlab-js)
    - mountainsort https://github.com/flatironinstitute/mountainsort_examples/blob/master/README.md
    - mountainsort examples https://github.com/flatironinstitute/mountainsort_examples/blob/master/README.md

In [1]:
import socket
import os
import glob
import json
import shutil 
from typing import Union
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import subprocess
from datetime import timedelta
from importlib import reload

# pipeline imports
from pipefinch.h5tools.kwik import kutil
from pipefinch.pipeline import probes
from pipefinch.pipeline import sglxutil as sglu

import logging

# Setup the logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)
        
logger.info('Logger set')
logger.info('Hostname {}'.format(socket.gethostname()))

2019-07-28 14:10:12,411 - root - INFO - Logger set
2019-07-28 14:10:12,412 - root - INFO - Hostname zpikezorter


### Session parameters and raw files

In [2]:
from pipefinch.neural.sort import kilo

In [21]:
from pipefinch.pipeline import filestructure as et
reload(et)

# sess_par = {'bird': 'p14r14',
#            'sess': '2019-02-14_2250_01'}
# sess_par = {'bird': 'g3v3',
#            'sess': '110A2490R3800V_audiostimgood_g0',
#            'probe': 'probe_0', # probe to sort ('probe_0', 'probe_1') (to lookup in the rig_par which port to extract)
#            'sort': 1}

sess_par = {'bird': 'g4r4',
           'sess': '20190711_03_tipref',
           'probe': 'probe_0', # probe to sort ('probe_0', 'probe_1') (to lookup in the rig_par which port to extract)
           'sort': 1}

exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], sess_par['sort'])

sort_params = {'adjacency_radius': -1,
              'detect_threshold': 2,
              'freq_min': 600}

ds_params = {'detect_sign': -1}

ks_params = {'use_gpu': 1,
            'auto_merge': 1,
            'filt_per_chan': 4,
            }

# convenient paths
kwik_folder = exp_struct['folders']['kwik']
ksort_folder = exp_struct['folders']['ksort']
raw_folder = exp_struct['folders']['raw']

In [22]:
exp_struct

{'folders': {'raw': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190711_03_tipref',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190711_03_tipref',
  'msort': '/data/experiment/microdrive/g4r4/Ephys/msort/20190711_03_tipref',
  'ksort': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190711_03_tipref'},
 'files': {'par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190711_03_tipref/params.json',
  'set': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190711_03_tipref/settings.isf',
  'rig': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190711_03_tipref/rig.json',
  'kwd': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190711_03_tipref/streams.kwd',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190711_03_tipref/sort_1/spikes.kwik',
  'kwe': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190711_03_tipref/events.kwe',
  'mda_raw': '/data/experiment/microdrive/g4r4/Ephys/msort/20190711_03_tipref/raw.mda',
  'bin_raw': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190711_03_tipref

#### get the spikeglx folder/file structure

In [23]:
# in zpike
sess_folder = exp_struct['folders']['raw']
#in lookfar
#sess_folder = '/Users/zeke/experiment/stimsim/Ephys/2019-05-27_stimsim_0000_00_g0'
all_probe_folders = glob.glob(os.path.join(sess_folder, '*'))
all_probe_folders

sgl_folder, sgl_pd = sglu.sgl_file_struct(sess_folder)
sgl_folder

{'nidq': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190711_03_tipref', 'imec': {}}

##### get the AP files for one imec probe

The imec file.

Here's the meaning of some of the metadata https://github.com/JaneliaSciComp/JRCLUST/wiki/.meta-file. In particular, there is an explanation of what channels in the probe are used and where they are located in the block. More detailed meta here https://github.com/billkarsh/SpikeGLX/blob/master/Markdown/Metadata.md.


In [6]:
probe_id = int(sess_par['probe'].split('_')[-1])

probe_data_folder = sgl_folder['imec'][probe_id]
probe_data_folder
ap_meta_files = glob.glob(os.path.join(probe_data_folder, '*.ap.meta'))

ap_meta_files[0]

'/mnt/microdrive/birds/g4r4/Ephys/raw/20190712_01_extref_g0/20190712_01_extref_g0_imec0/20190712_01_extref_g0_t0.imec0.ap.meta'

#### read a file and its meta

In [7]:
imec_meta_file_path = ap_meta_files[0]
# these should come from the .meta file
imec_meta_dict = sglu.get_imec_meta(imec_meta_file_path)

imec0 = sglu.get_imec_data(imec_meta_file_path)
n_chan = imec_meta_dict['nsavedchans'] #nSavedChans in meta file
s_f = imec0['meta']['s_f'] #30000.533148 #imSampleRate in meta file



In [8]:
# get rid of the sync channel
imec0['only_neural'] = imec0['neural'][:384, :]
imec0['only_neural'].shape
n_chan = imec0['only_neural'].shape[0]

### Load the rig parameters and get the probe file, behavior trigers, etc
 - Get the rig par file
 - Get the aux channels
 - Detect onset of wav files

In [9]:
# do it

## Scripts for sorting with Kilosort
Steps involved:
 - Make binary file with selected recs, chans
 - Set kilosort parameters
 - Make kilosort chanmap
 - Make kilosort scripts and phy parameters file (for manual curation)
 - Run the kilosort scripts (via matlab)
 - Expose the paths for manual curation
 - After curation, make the kwik file with sorted data
 - Cleanup and move metadata to permanentt locations

### prep the files with their nice formats, locations and names


In [8]:
from pipefinch.neural.sort.kilo import core as ksc

In [9]:
reload(ksc)
ks_params = {'kilo_version': 2,
             'use_gpu': 1,
            'auto_merge': 1,
            'filt_per_chan': 4,
            's_f': (s_f),
            'n_chan': n_chan}

In [11]:
exp_struct['folders']['ksort']

'/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0'

In [12]:
reload(ksc)
reload(probes)
file_paths, out_folder = ksc.make_paths(exp_struct['folders']['ksort'])

In [13]:
import shutil

In [14]:
file_paths

{'bin': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/raw.bin',
 'params': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/params.json',
 'prb': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/chanMap.mat',
 'rez': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/rez2.mat',
 'mat_log': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/kilosort_mat.log',
 'phy_par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/params.py'}

In [16]:
reload(ksc)
reload(probes)
file_paths, out_folder = ksc.make_paths(exp_struct['folders']['ksort'])
os.makedirs(out_folder, exist_ok=True)

# make the probe file
# for now, just copy copy the probe file from defaults to dict

shutil.copyfile('/home/ezequiel/repos/Kilosort2/configFiles/neuropixPhase3B2_kilosortChanMap.mat', file_paths['prb'])

# copy the binary file as the 'raw' binary file for the sorting
# this has to be done either way because /data partition is faster
# todo: concatenate them or something, from a kwd
#logger.info('copying raw file into {}'.format(file_paths['bin']))
#imec0['only_neural'].tofile(file_paths['bin'])

#shutil.copyfile(sglu.get_data_meta_path(imec_meta_file_path)[0], file_paths['bin'])

# parameters to pass to the msort scripts, other than de defaults
ks_params.update({'s_f': s_f, # required,
                  'n_chan': n_chan, # total number of chans in the .bin file,
                  'dtype_name': imec0['neural'].dtype.name
            })
ksc.make_kilo_scripts(exp_struct['folders']['ksort'], ks_params)
phy_pars = ksc.make_phy_par_file(ks_params, file_paths)

2019-07-28 14:12:38,656 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/master.m
2019-07-28 14:12:38,659 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/config.m
2019-07-28 14:12:38,661 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/run_master.m
2019-07-28 14:12:38,663 - pipefinch.neural.sort.kilo.core - INFO - Written phy parameters file /data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/params.py
2019-07-28 14:12:38,666 - pipefinch.neural.sort.kilo.core - INFO - Written ksort parameters file /data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/params.json


In [17]:
file_paths

{'bin': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/raw.bin',
 'params': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/params.json',
 'prb': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/chanMap.mat',
 'rez': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/rez2.mat',
 'mat_log': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/kilosort_mat.log',
 'phy_par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/params.py'}

In [18]:
ks_params

{'kilo_version': 2,
 'use_gpu': 1,
 'auto_merge': 1,
 'filt_per_chan': 4,
 's_f': 30000.0,
 'n_chan': 385,
 'dtype_name': 'int16'}

In [19]:
reload(ksc)
sort_result, sort_return_value = ksc.do_the_sort(file_paths)

2019-07-28 14:12:50,784 - pipefinch.neural.sort.kilo.core - INFO - Running kilosort on matlab
2019-07-28 14:12:50,786 - pipefinch.neural.sort.kilo.core - INFO - Sort folder is /data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0
2019-07-28 14:12:50,787 - pipefinch.neural.sort.kilo.core - INFO - output to /data/experiment/microdrive/g4r4/Ephys/ksort/20190712_01_extref_g0/kilosort_mat.log


In [None]:
sort_return_value

In [20]:
sort_result



In [26]:
file_paths

{'bin': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/raw.bin',
 'params': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/params.json',
 'prb': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/chanMap.mat',
 'rez': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/rez2.mat',
 'mat_log': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/kilosort_mat.log',
 'phy_par': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/params.py'}

## Command for viewing:
 - open up terminal with the environment msort
 - go go the ss_data folder for the session
 - run the command: qt-mountainview --raw raw.mda --filt sort_out/filt.mda.prv --pre sort_out/pre.mda.prv --samplerate=20000 --firings sort_out/firings.mda --cluster_metrics sort_out/cluster_metrics.json

# After manual curation
 - save the curated spikes in the sort_out/firings_curated.mda
 - come back to the notebook and run 

In [28]:
from pipefinch.h5tools.kwik import kwikfunctions as kwkf
reload(kwkf)
reload(et)

kwkf.kilo_to_kwik(exp_struct['files']['kwd'],
                 exp_struct['files']['kwik'],
                 exp_struct['folders']['ksort'],
                 rec_in_binary=selection_rec_list)


2019-05-21 15:42:14,162 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Creating kwik file /data/experiment/microdrive/p14r14/Ephys/kwik/2019-02-15_3125_01/sort_1/spikes.kwik from kilosort folder /data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01
2019-05-21 15:42:14,164 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Found clu file, will attempt to unpack manual sorted data from kilosort
2019-05-21 15:42:14,171 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making spike tables
2019-05-21 15:42:14,361 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making rec tables (make_rec_groups)
2019-05-21 15:42:14,365 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making cluster group tables
2019-05-21 15:42:14,366 - pipefinch.h5tools.kwik.kwikfunctions - INFO - found cluster tags file in /data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/cluster_group.tsv
2019-05-21 15:42:14,368 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Done


In [29]:
os.path.isfile('/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_02/cluster_group.tsv')

True

In [31]:
### extract all unit waveforms
reload(kwkf)

from pipefinch.neural import units
reload(units)
units.get_all_unit_waveforms(exp_struct['files']['kwik'], exp_struct['files']['kwd'])

2019-05-21 15:42:28,359 - pipefinch.neural.units - INFO - About to get all waveforms for 3 units in file /data/experiment/microdrive/p14r14/Ephys/kwik/2019-02-15_3125_01/sort_1/spikes.kwik


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




0

In [None]:
def msort_cleanup(exp_struct: dict):
    # remove the mda files and try to cleanup the msort temp location
    mda_raw_path = exp_struct['files']['mda_raw']
    logger.info('removing intermediate msort mda file {}'.format(mda_raw_path))
    os.remove(mda_raw_path)

def msort_tmp_clean():
    tmp_dir = os.path.abspath(os.environ['ML_TEMPORARY_DIRECTORY'])
    logger.info('Cleaning up msort temp dir {}'.format(tmp_dir))
    
#msort_tmp_clean()
msort_cleanup(exp_struct)

In [165]:
 exp_struct['files']

{'par': '/media/zinch/Windows/experiment/p14r14/ephys/msort/2019-02-13_1750_01/params.json',
 'set': '/mnt/zuperfinchjr/Data/p14r14/ephys/raw/2019-02-13_1750_01/settings.isf',
 'kwd': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/streams.kwd',
 'kwik': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/spikes.kwik',
 'kwe': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/events.kwe',
 'mda_raw': '/media/zinch/Windows/experiment/p14r14/ephys/msort/2019-02-13_1750_01/raw.mda'}