## Notebook for spike sorting from .kwd data using the Pipeline of mountainsort (linux channel has pipeline)
Uses:
    - intan2kwik (https://github.com/zekearneodo/intan2kwik/blob/master/README.md)
    - mountainlab suite(https://github.com/flatironinstitute/mountainlab-js)
    - mountainsort https://github.com/flatironinstitute/mountainsort_examples/blob/master/README.md
    - mountainsort examples https://github.com/flatironinstitute/mountainsort_examples/blob/master/README.md

In [1]:
import socket
import os
import glob
import json
from typing import Union
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import subprocess
from datetime import timedelta
from importlib import reload

# pipeline imports
from pipefinch.neural.convert import intan
from pipefinch.neural.sort.mountain import core as msc
from pipefinch.h5tools.kwik import kutil
from pipefinch.pipeline import probes


from pipefinch.h5tools.kwik import kwdfunctions as kwdf

from intan2kwik import kwd

#mountainsort imports (for sorting)
#import mountainlab_pytools.mlproc as mlp

import logging

# Setup the logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)
        
logger.info('Logger set')
logger.info('Hostname {}'.format(socket.gethostname()))

2019-07-15 12:58:21,353 - root - INFO - Logger set
2019-07-15 12:58:21,355 - root - INFO - Hostname zpikezorter


### Session parameters and raw files

In [2]:
from pipefinch.neural.sort import kilo

In [3]:
from pipefinch.pipeline import filestructure as et
reload(et)
reload(kwd)

# sess_par = {'bird': 'p14r14',
#            'sess': '2019-02-14_2250_01'}
sess_par = {'bird': 'acute_hvc_ra_stim_g19o19',
           'sess': '500a_2500l_3400v_acute_g19o19',
           'probe': 'probe_0', # probe to sort ('probe_0', 'probe_1') (to lookup in the rig_par which port to extract)
           'sort': 1}

exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], sess_par['sort'])

sort_params = {'adjacency_radius': -1,
              'detect_threshold': 2,
              'freq_min': 600}

ds_params = {'detect_sign': -1}

ks_params = {'use_gpu': 1,
            'auto_merge': 1,
            'filt_per_chan': 4,
            }

# convenient paths
kwik_folder = exp_struct['folders']['kwik']
msort_folder = exp_struct['folders']['msort']
raw_folder = exp_struct['folders']['raw']
kwd_path = exp_struct['files']['kwd']
bin_path = exp_struct['files']['mda_raw']

In [4]:
exp_struct

{'folders': {'raw': '/mnt/microdrive/birds/acute_hvc_ra_stim_g19o19/Ephys/raw/500a_2500l_3400v_acute_g19o19',
  'kwik': '/data/experiment/microdrive/acute_hvc_ra_stim_g19o19/Ephys/kwik/500a_2500l_3400v_acute_g19o19',
  'msort': '/data/experiment/microdrive/acute_hvc_ra_stim_g19o19/Ephys/msort/500a_2500l_3400v_acute_g19o19',
  'ksort': '/data/experiment/microdrive/acute_hvc_ra_stim_g19o19/Ephys/ksort/500a_2500l_3400v_acute_g19o19'},
 'files': {'par': '/data/experiment/microdrive/acute_hvc_ra_stim_g19o19/Ephys/ksort/500a_2500l_3400v_acute_g19o19/params.json',
  'set': '/mnt/microdrive/birds/acute_hvc_ra_stim_g19o19/Ephys/raw/500a_2500l_3400v_acute_g19o19/settings.isf',
  'rig': '/mnt/microdrive/birds/acute_hvc_ra_stim_g19o19/Ephys/raw/500a_2500l_3400v_acute_g19o19/rig.json',
  'kwd': '/data/experiment/microdrive/acute_hvc_ra_stim_g19o19/Ephys/kwik/500a_2500l_3400v_acute_g19o19/streams.kwd',
  'kwik': '/data/experiment/microdrive/acute_hvc_ra_stim_g19o19/Ephys/kwik/500a_2500l_3400v_acute_

### convert the whole session to a .kwd file
Conversion sends every .rhd file in the folder to a rec in the .kwd file (experiment.kwd in the session ss folder)
All of the files and all of the channels are converted; filtering and subselection of sub-epochs and channels occurs later.
The .kwd is raw data, only in a friendlier format.


#### Make a file for the session for the first time

In [5]:
reload(kwd)
## Convert the whole session to a kwd file
os.makedirs(kwik_folder, exist_ok=True)
first_intan_hdr, sess_pd = kwd.intan_to_kwd(raw_folder, kwd_path, overwrite=False, notempfile=True)



RuntimeError: Kwd file already exists

In [6]:
from intan2kwik.core.file import util as fu
import datetime
import time
reload(fu)

<module 'intan2kwik.core.file.util' from '/home/ezequiel/repos/intan2kwik/intan2kwik/core/file/util.py'>

In [7]:
sess_pd.head()

NameError: name 'sess_pd' is not defined

#### Update a session with subsequently recorded rhd files

In [8]:
reload(kwdf)
_, nu_pd, _ = kwdf.update_kwd(kwd_path, raw_folder)

2019-07-15 12:58:36,021 - pipefinch.h5tools.kwik.kwdfunctions - INFO - updating kwd file /data/experiment/microdrive/acute_hvc_ra_stim_g19o19/Ephys/kwik/500a_2500l_3400v_acute_g19o19/streams.kwd from folder /mnt/microdrive/birds/acute_hvc_ra_stim_g19o19/Ephys/raw/500a_2500l_3400v_acute_g19o19
2019-07-15 12:58:36,096 - pipefinch.h5tools.kwik.kwdfunctions - INFO - No new files to add to the file


## Make .bin file with a set of recordings in a session
 - pick all in port A
 - get all rec within a time range


In [9]:
# get the session meta
pd_meta = kwdf.get_all_rec_meta(kwd_path)
pd_meta.head()


Unnamed: 0,bit_depth,name,sample_rate,start_sample,start_time,channel_bit_volts,channel_names,channels_sample_rate,is_multiSampleRate_data,valid_samples,samples_count
0,16,0,20000.0,0,2019-06-07 18:21:47,"[0.195, 0.195, 0.195, 0.195, 0.195, 0.195, 0.1...","[A-000, A-001, A-002, A-003, A-004, A-005, A-0...","[20000.0, 20000.0, 20000.0, 20000.0, 20000.0, ...",0,"[8391600.0, 8391600.0, 8391600.0, 8391600.0, 8...",8391600


### pick a time interval of the recordings

In [10]:
def select_time_span(meta_pd, start, span_minutes):
    end = start + timedelta(minutes=span_minutes)
    pd_selection = meta_pd.loc[meta_pd['start_time'].between(start, end)]
    return pd_selection

pd_meta_selection = select_time_span(pd_meta, pd_meta['start_time'][0], 30)
pd_meta_selection.shape

(1, 11)

In [21]:
# for instance
pd_meta_selection.head()

Unnamed: 0,bit_depth,name,sample_rate,start_sample,start_time,channel_bit_volts,channel_names,channels_sample_rate,is_multiSampleRate_data,valid_samples,samples_count
0,16,0,20000.0,0,2019-06-07 18:21:47,"[0.195, 0.195, 0.195, 0.195, 0.195, 0.195, 0.1...","[A-000, A-001, A-002, A-003, A-004, A-005, A-0...","[20000.0, 20000.0, 20000.0, 20000.0, 20000.0, ...",0,"[8391600.0, 8391600.0, 8391600.0, 8391600.0, 8...",8391600


In [24]:
# load the rig parameters
reload(kwdf)
rig_par_file = exp_struct['files']['rig']
with open(rig_par_file, 'r') as fp:
    rig_par = json.load(fp)

# get the probe and the port where the probe was connected
selected_probe = sess_par['probe']
probe_port = rig_par['chan']['port'][selected_probe].strip('-')

# get the channel indices of the probe's port
wanted_chans = np.array([probe_port + '-'])  # all ephys channels

chan_list = kwdf.get_all_chan_names(pd_meta_selection, chan_filt=wanted_chans)

#all_rec_list = kutil.get_rec_list(exp_struct['files']['kwd'])
selection_rec_list = np.unique(pd_meta_selection['name'])

rec_chans = pd_meta.loc[pd_meta['name'] == selection_rec_list[0], 'channel_names'].values
rec_chans_idx = kwdf.find_chan_names_idx(rec_chans[0], chan_list)

# make the mda binary file
bin_path = exp_struct['files']['bin_raw']
os.makedirs(exp_struct['folders']['ksort'], exist_ok=True)
bin_file = kwdf.kwd_to_binary(exp_struct['files']['kwd'],
                              exp_struct['files']['bin_raw'],
                              chan_list=chan_list,
                              rec_list=selection_rec_list, 
                              hi_pass=None,
                              header='bin')


2019-07-14 23:42:36,109 - pipefinch.h5tools.kwik.kwdfunctions - INFO - Writing kwd_file /data/experiment/microdrive/acute_hvc_ra_stim_g19o19/Ephys/kwik/500a_2500l_3400v_acute_g19o19/streams.kwd to binary
2019-07-14 23:42:36,117 - pipefinch.h5tools.kwik.kwdfunctions - INFO - Channels to extract: ['A-000' 'A-001' 'A-002' 'A-003' 'A-004' 'A-005' 'A-006' 'A-007' 'A-008'
 'A-009' 'A-011' 'A-012' 'A-013' 'A-014' 'A-015' 'A-016' 'A-017' 'A-018'
 'A-019' 'A-020' 'A-021' 'A-022' 'A-023' 'A-024' 'A-025' 'A-026' 'A-027'
 'A-028' 'A-029' 'A-030' 'A-031' 'A-032' 'A-033' 'A-034' 'A-035' 'A-036'
 'A-037' 'A-038' 'A-039' 'A-040' 'A-041' 'A-042' 'A-043' 'A-044' 'A-045'
 'A-046' 'A-047' 'A-048' 'A-049' 'A-050' 'A-051' 'A-052' 'A-053' 'A-054'
 'A-055' 'A-056' 'A-057' 'A-058' 'A-059' 'A-060' 'A-061' 'A-062' 'A-063']
2019-07-14 23:42:36,118 - pipefinch.h5tools.kwik.kwdfunctions - INFO - Will go through recs [0]
2019-07-14 23:42:36,119 - pipefinch.h5tools.kwik.kwdfunctions - INFO - Creating binary file /dat

HBox(children=(IntProgress(value=0, description='raw.bin', max=1, style=ProgressStyle(description_width='initi…

2019-07-14 23:42:46,241 - pipefinch.h5tools.kwik.kwdfunctions - INFO - 528670800 elements written





In [25]:
rig_par

{'chan': {'port': {'probe_0': 'A-'}},
 'probe': {'probe_0': {'model': 'ASSY-236-H5',
   'serial': '4534',
   'headstage': 'cambridge64'}}}

In [26]:
chan_list.size

63

## Scripts for sorting with Kilosort
Steps involved:
 - Make binary file with selected recs, chans
 - Set kilosort parameters
 - Make kilosort chanmap
 - Make kilosort scripts and phy parameters file (for manual curation)
 - Run the kilosort scripts (via matlab)
 - Expose the paths for manual curation
 - After curation, make the kwik file with sorted data
 - Cleanup and move metadata to permanentt locations

### prep the files with their nice formats, locations and names


In [27]:
from pipefinch.neural.sort.kilo import core as ksc

In [28]:
reload(ksc)
ks_params = {'kilo_version': 1,
             'use_gpu': 1,
            'auto_merge': 1,
            'filt_per_chan': 4,
            's_f': kwdf.get_sampling_rate(pd_meta, selection_rec_list[0]),
            'n_chan': chan_list.size}

In [29]:
reload(kwdf)

<module 'pipefinch.h5tools.kwik.kwdfunctions' from '/home/ezequiel/repos/pipefinch/pipefinch/h5tools/kwik/kwdfunctions.py'>

In [22]:
exp_struct['folders']['ksort']

'/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01'

In [23]:
reload(ksc)
reload(probes)
file_paths, out_folder = ksc.make_paths(exp_struct['folders']['ksort'])
#os.makedirs(exp_struct['folders']['msort'], exist_ok=True)

# make the probe file
rec_chans = pd_meta.loc[pd_meta['name']==0, 'channel_names'].values
rec_chans_idx = kwdf.find_chan_names_idx(rec_chans[0], chan_list)
probe = rig_par['probe'][selected_probe]['model']
headstage = rig_par['probe'][selected_probe]['headstage']
probe_chans = rec_chans_idx - np.min(rec_chans_idx)

probe_dict = probes.make_map(probe, probe_chans, return_dict=True)
kilo_chan_map = ksc.make_kilo_chanmap(probe_dict, file_paths['prb'])

# parameters to pass to the msort scripts, other than de defaults
ks_params.update({'s_f': int(kwdf.get_sampling_rate(pd_meta, 0)), # required,
                  'n_chan': probe_chans.size, # total number of chans in the .bin file,
                  'dtype_name': kwdf.get_data_type(exp_struct['files']['kwd']).name
            })
ksc.make_kilo_scripts(exp_struct['folders']['ksort'], ks_params)
phy_pars = ksc.make_phy_par_file(ks_params, file_paths)

2019-05-21 15:35:00,391 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/master.m
2019-05-21 15:35:00,393 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/config.m
2019-05-21 15:35:00,394 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/run_master.m
2019-05-21 15:35:00,395 - pipefinch.neural.sort.kilo.core - INFO - Written phy parameters file /data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/params.py


In [24]:
file_paths

{'bin': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/raw.bin',
 'params': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/params.json',
 'prb': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/chanMap.mat',
 'rez': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/rez2.mat',
 'mat_log': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/kilosort_mat.log',
 'phy_par': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/params.py'}

In [25]:
reload(ksc)
sort_result, sort_return_value = ksc.do_the_sort(file_paths)

2019-05-21 15:35:03,545 - pipefinch.neural.sort.kilo.core - INFO - Running kilosort on matlab
2019-05-21 15:35:03,546 - pipefinch.neural.sort.kilo.core - INFO - Sort folder is /data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01
2019-05-21 15:35:03,547 - pipefinch.neural.sort.kilo.core - INFO - output to /data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/kilosort_mat.log


In [25]:
sort_return_value

0

In [26]:
sort_result

"\n                            < M A T L A B (R) >\n                  Copyright 1984-2019 The MathWorks, Inc.\n              R2019a Update 1 (9.6.0.1099231) 64-bit (glnxa64)\n                               April 12, 2019\n\n \nTo get started, type doc.\nFor product information, visit www.mathworks.com.\n \n\n.                         kilosort_mat.log          spike_templates.npy       \n..                        master.m                  spike_times.npy           \namplitudes.npy            params.py                 temp_wh.dat               \nchanMap.mat               pc_feature_ind.npy        template_feature_ind.npy  \nchannel_map.npy           pc_features.npy           template_features.npy     \nchannel_positions.npy     raw.bin                   templates.npy             \ncluster_Amplitude.tsv     rez.mat                   templates_ind.npy         \ncluster_ContamPct.tsv     run_master.m              whitening_mat.npy         \ncluster_KSLabel.tsv       similar_templates.npy   

In [26]:
file_paths

{'bin': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/raw.bin',
 'params': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/params.json',
 'prb': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/chanMap.mat',
 'rez': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/rez2.mat',
 'mat_log': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/kilosort_mat.log',
 'phy_par': '/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/params.py'}

## Command for viewing:
 - open up terminal with the environment msort
 - go go the ss_data folder for the session
 - run the command: qt-mountainview --raw raw.mda --filt sort_out/filt.mda.prv --pre sort_out/pre.mda.prv --samplerate=20000 --firings sort_out/firings.mda --cluster_metrics sort_out/cluster_metrics.json

# After manual curation
 - save the curated spikes in the sort_out/firings_curated.mda
 - come back to the notebook and run 

In [28]:
from pipefinch.h5tools.kwik import kwikfunctions as kwkf
reload(kwkf)
reload(et)

kwkf.kilo_to_kwik(exp_struct['files']['kwd'],
                 exp_struct['files']['kwik'],
                 exp_struct['folders']['ksort'],
                 rec_in_binary=selection_rec_list)


2019-05-21 15:42:14,162 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Creating kwik file /data/experiment/microdrive/p14r14/Ephys/kwik/2019-02-15_3125_01/sort_1/spikes.kwik from kilosort folder /data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01
2019-05-21 15:42:14,164 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Found clu file, will attempt to unpack manual sorted data from kilosort
2019-05-21 15:42:14,171 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making spike tables
2019-05-21 15:42:14,361 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making rec tables (make_rec_groups)
2019-05-21 15:42:14,365 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making cluster group tables
2019-05-21 15:42:14,366 - pipefinch.h5tools.kwik.kwikfunctions - INFO - found cluster tags file in /data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_01/cluster_group.tsv
2019-05-21 15:42:14,368 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Done


In [29]:
os.path.isfile('/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_02/cluster_group.tsv')

True

In [31]:
### extract all unit waveforms
reload(kwkf)

from pipefinch.neural import units
reload(units)
units.get_all_unit_waveforms(exp_struct['files']['kwik'], exp_struct['files']['kwd'])

2019-05-21 15:42:28,359 - pipefinch.neural.units - INFO - About to get all waveforms for 3 units in file /data/experiment/microdrive/p14r14/Ephys/kwik/2019-02-15_3125_01/sort_1/spikes.kwik


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




0

In [None]:
def msort_cleanup(exp_struct: dict):
    # remove the mda files and try to cleanup the msort temp location
    mda_raw_path = exp_struct['files']['mda_raw']
    logger.info('removing intermediate msort mda file {}'.format(mda_raw_path))
    os.remove(mda_raw_path)

def msort_tmp_clean():
    tmp_dir = os.path.abspath(os.environ['ML_TEMPORARY_DIRECTORY'])
    logger.info('Cleaning up msort temp dir {}'.format(tmp_dir))
    
#msort_tmp_clean()
msort_cleanup(exp_struct)

In [165]:
 exp_struct['files']

{'par': '/media/zinch/Windows/experiment/p14r14/ephys/msort/2019-02-13_1750_01/params.json',
 'set': '/mnt/zuperfinchjr/Data/p14r14/ephys/raw/2019-02-13_1750_01/settings.isf',
 'kwd': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/streams.kwd',
 'kwik': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/spikes.kwik',
 'kwe': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/events.kwe',
 'mda_raw': '/media/zinch/Windows/experiment/p14r14/ephys/msort/2019-02-13_1750_01/raw.mda'}