## Notebook for spike sorting from .kwd data using the Pipeline of mountainsort (linux channel has pipeline)
Uses:
    - intan2kwik (https://github.com/zekearneodo/intan2kwik/blob/master/README.md)
    - mountainlab suite(https://github.com/flatironinstitute/mountainlab-js)
    - mountainsort https://github.com/flatironinstitute/mountainsort_examples/blob/master/README.md
    - mountainsort examples https://github.com/flatironinstitute/mountainsort_examples/blob/master/README.md

In [1]:
import socket
import os
import glob
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import subprocess
from datetime import timedelta
from importlib import reload

# pipeline imports
from pipefinch.neural.convert import intan
from pipefinch.neural.sort.mountain import core as msc
from pipefinch.h5tools.kwik import kutil
from pipefinch.pipeline import probes


from pipefinch.h5tools.kwik import kwdfunctions as kwdf

from intan2kwik import kwd

#mountainsort imports (for sorting)
#import mountainlab_pytools.mlproc as mlp

import logging

# Setup the logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)
        
logger.info('Logger set')
logger.info('Hostname {}'.format(socket.gethostname()))

2019-03-16 17:33:37,702 - root - INFO - Logger set
2019-03-16 17:33:37,703 - root - INFO - Hostname zebra


### Session parameters and raw files

In [9]:
from pipefinch.pipeline import filestructure as et
reload(et)
reload(kwd)

sess_par = {'bird': 'p1j1',
           'sess': '2019-02-27_1800_02',
           'probe': 'probe_0', # probe to sort ('probe_0', 'probe_1') (to lookup in the rig_par which port to extract)
            'sort': '0' # sort version
           }

exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], sess_par['sort'])

# mountainsort parameters
sort_params = {'adjacency_radius': -1,
              'detect_threshold': 2,
              'freq_min': 600}

# differetn mountainsort parameters
ds_params = {'detect_sign': -1}


# convenient paths
kwik_folder = exp_struct['folders']['kwik']
msort_folder = exp_struct['folders']['msort']
raw_folder = exp_struct['folders']['raw']
kwd_path = exp_struct['files']['kwd']
bin_path = exp_struct['files']['mda_raw']

In [10]:
exp_struct

{'folders': {'raw': '/mnt/zuperfinch/microdrive/birds/p1j1/Ephys/raw/2019-02-27_1800_02',
  'kwik': '/data/experiment/microdrive/p1j1/Ephys/kwik/2019-02-27_1800_02',
  'msort': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02'},
 'files': {'par': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/params.json',
  'set': '/mnt/zuperfinch/microdrive/birds/p1j1/Ephys/raw/2019-02-27_1800_02/settings.isf',
  'rig': '/mnt/zuperfinch/microdrive/birds/p1j1/Ephys/raw/2019-02-27_1800_02/rig.json',
  'kwd': '/data/experiment/microdrive/p1j1/Ephys/kwik/2019-02-27_1800_02/streams.kwd',
  'kwik': '/data/experiment/microdrive/p1j1/Ephys/kwik/2019-02-27_1800_02/sort_0/spikes.kwik',
  'kwe': '/data/experiment/microdrive/p1j1/Ephys/kwik/2019-02-27_1800_02/events.kwe',
  'mda_raw': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/raw.mda'}}

### convert the whole session to a .kwd file
Conversion sends every .rhd file in the folder to a rec in the .kwd file (experiment.kwd in the session ss folder)
All of the files and all of the channels are converted; filtering and subselection of sub-epochs and channels occurs later.
The .kwd is raw data, only in a friendlier format.


#### Make a file for the session for the first time

In [11]:
reload(kwd)
## Convert the whole session to a kwd file
os.makedirs(kwik_folder, exist_ok=True)
first_intan_hdr, sess_pd = kwd.intan_to_kwd(raw_folder, kwd_path)



RuntimeError: Kwd file already exists

#### Update a session with subsequently recorded rhd files

In [12]:
reload(kwdf)
_, nu_pd, _ = kwdf.update_kwd(kwd_path, raw_folder)

2019-03-16 17:41:44,428 - pipefinch.h5tools.kwik.kwdfunctions - INFO - updating kwd file /data/experiment/microdrive/p1j1/Ephys/kwik/2019-02-27_1800_02/streams.kwd from folder /mnt/zuperfinch/microdrive/birds/p1j1/Ephys/raw/2019-02-27_1800_02
2019-03-16 17:41:46,180 - pipefinch.h5tools.kwik.kwdfunctions - INFO - No new files to add to the file


## Make .mda file with a set of recordings in a session
 - pick all in port A
 - get all rec within a time range


In [13]:
# get the session meta
pd_meta = kwdf.get_all_rec_meta(kwd_path)
pd_meta.head(1)


Unnamed: 0,bit_depth,name,sample_rate,start_sample,start_time,channel_bit_volts,channel_names,channels_sample_rate,dig_channel_names,is_multiSampleRate_data,valid_samples,samples_count
0,16,0,30000.0,0,2019-02-27 10:24:26,"[0.195, 0.195, 0.195, 0.195, 0.195, 0.195, 0.1...","[A-000, A-001, A-002, A-003, A-004, A-005, A-0...","[30000.0, 30000.0, 30000.0, 30000.0, 30000.0, ...","[DIN-00, DIN-01]",0,"[1800000.0, 1800000.0, 1800000.0, 1800000.0, 1...",1800000


### pick a time interval of the recordings

In [14]:
def select_time_span(meta_pd, start, span_minutes):
    end = start + timedelta(minutes=span_minutes)
    pd_selection = meta_pd.loc[meta_pd['start_time'].between(start, end)]
    return pd_selection

pd_meta_selection = select_time_span(pd_meta, pd_meta['start_time'][0], 240)

In [15]:
# for instance
pd_meta_selection.head(2)

Unnamed: 0,bit_depth,name,sample_rate,start_sample,start_time,channel_bit_volts,channel_names,channels_sample_rate,dig_channel_names,is_multiSampleRate_data,valid_samples,samples_count
0,16,0,30000.0,0,2019-02-27 10:24:26,"[0.195, 0.195, 0.195, 0.195, 0.195, 0.195, 0.1...","[A-000, A-001, A-002, A-003, A-004, A-005, A-0...","[30000.0, 30000.0, 30000.0, 30000.0, 30000.0, ...","[DIN-00, DIN-01]",0,"[1800000.0, 1800000.0, 1800000.0, 1800000.0, 1...",1800000
1,16,1,30000.0,0,2019-02-27 10:24:56,"[0.195, 0.195, 0.195, 0.195, 0.195, 0.195, 0.1...","[A-000, A-001, A-002, A-003, A-004, A-005, A-0...","[30000.0, 30000.0, 30000.0, 30000.0, 30000.0, ...","[DIN-00, DIN-01]",0,"[1520640.0, 1520640.0, 1520640.0, 1520640.0, 1...",1520640


In [17]:
# load the rig parameters
rig_par_file = exp_struct['files']['rig']
with open(rig_par_file, 'r') as fp:
    rig_par = json.load(fp)

# get the probe and the port where the probe was connected
selected_probe = sess_par['probe']
probe_port = rig_par['chan']['port'][selected_probe].strip('-')

# get the channel indices of the probe's port
wanted_chans = np.array([probe_port + '-'])  # all ephys channels

chan_list = kwdf.get_all_chan_names(pd_meta_selection, chan_filt=wanted_chans)

#all_rec_list = kutil.get_rec_list(exp_struct['files']['kwd'])
selection_rec_list = np.unique(pd_meta_selection['name'])

rec_chans = pd_meta.loc[pd_meta['name'] == selection_rec_list[0], 'channel_names'].values
rec_chans_idx = kwdf.find_chan_names_idx(rec_chans[0], chan_list)

# make the mda binary file
bin_path = exp_struct['files']['mda_raw']
os.makedirs(exp_struct['folders']['msort'], exist_ok=True)
bin_file = kwdf.kwd_to_binary(exp_struct['files']['kwd'],
                              exp_struct['files']['mda_raw'],
                              chan_list=chan_list,
                              rec_list=selection_rec_list, header='mda')


2019-03-16 17:42:08,493 - pipefinch.h5tools.kwik.kwdfunctions - INFO - Writing kwd_file /data/experiment/microdrive/p1j1/Ephys/kwik/2019-02-27_1800_02/streams.kwd to binary
2019-03-16 17:42:09,255 - pipefinch.h5tools.kwik.kwdfunctions - INFO - Channels to extract: ['A-000' 'A-001' 'A-002' 'A-003' 'A-004' 'A-005' 'A-006' 'A-007' 'A-008'
 'A-009' 'A-010' 'A-011' 'A-012' 'A-013' 'A-014' 'A-015' 'A-016' 'A-017'
 'A-018' 'A-019' 'A-020' 'A-021' 'A-022' 'A-023' 'A-024' 'A-025' 'A-026'
 'A-027' 'A-028' 'A-029' 'A-030' 'A-031']
2019-03-16 17:42:09,259 - pipefinch.h5tools.kwik.kwdfunctions - INFO - Will go through recs [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
 72 73 74 75 76 77 78]
2019-03-16 17:42:09,264 - pipefinch.h5tools.kwik.kwdfunctions - INFO - Creating binary file /mnt/scratch/experiment/p1j1/Ephys/msort/2019-02

HBox(children=(IntProgress(value=0, description='raw.mda', max=79, style=ProgressStyle(description_width='init…

KeyboardInterrupt: 

## Scripts for sorting

### prep the files with their nice formats, locations and names


In [20]:
reload(msc)
file_paths, out_folder = msc.make_paths(exp_struct['folders']['msort'])
os.makedirs(exp_struct['folders']['msort'], exist_ok=True)

# make the probe file
rec_chans = pd_meta.loc[pd_meta['name']==0, 'channel_names'].values
rec_chans_idx = kwdf.find_chan_names_idx(rec_chans[0], chan_list)
probe = rig_par['probe'][selected_probe]['model']
headstage = rig_par['probe'][selected_probe]['headstage']
probe_chans = rec_chans_idx - np.min(rec_chans_idx)
# try to make a probe. If it is not possible, force adjacency_radius to -1.
try:
    probe_geom = probes.make_map(probe, probe_chans)
    np.savetxt(file_paths['geom'], probe_geom, delimiter=',')
except KeyError as err:
    logger.warning('Probe could not be made, probe or headstage not found {} in probes.py. Will sort with no geometry'.format(err))
    sort_params['adjacency_radius'] = -1

# parameters to pass to the msort scripts
ds_params.update({'samplerate': int(kwdf.get_sampling_rate(pd_meta, 0)), # required
            })


with open(file_paths['params'], 'w') as fp:
    json.dump(ds_params, fp)
    logger.info('Created session par files {}'.format(file_paths['params']))


2019-03-16 17:42:44,373 - root - INFO - Created session par files /mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/params.json


In [19]:
file_paths

{'mda': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/raw.mda',
 'params': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/params.json',
 'geom': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/geom.csv',
 'filt': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/filt.mda.prv',
 'pre': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/pre.mda.prv',
 'firings': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/firings.mda',
 'firings_curated': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/firings_curated.mda',
 'cluster_metrics': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/cluster_metrics.json',
 'cluster_metrics_curated': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/cluster_metrics_curated.json'}

In [21]:
# test mountainsort method for reading sorting parameters
msc.read_dataset_params(exp_struct['folders']['msort'])

{'detect_sign': -1, 'samplerate': 30000}

In [13]:
output_dir = os.path.join(exp_struct['folders']['msort'], 'sort_out');
msc.sort_dataset(file_paths=file_paths, **sort_params);

2019-03-14 21:17:53,919 - pipefinch.sort.mountain.comre - INFO - Bandpass filter


RUNNING: ml-run-process ephys.bandpass_filter --inputs timeseries:/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/raw.mda --parameters freq_max:6000 freq_min:600 samplerate:30000 --outputs timeseries_out:/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/filt.mda.prv


2019-03-14 21:23:58,048 - pipefinch.sort.mountain.comre - INFO - Whitening


[34m[ Getting processor spec... ][0m
[34m[ Checking inputs and substituting prvs ... ][0m
[34m[ Computing process signature ... ][0m
[34mProcess signature: 7027675613927afd829dc6ab039157fa2276af97[0m
[34m[ Checking outputs... ][0m
[34m{"timeseries_out":"/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/filt.mda.prv"}[0m
[34mProcessing ouput - /mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/filt.mda.prv[0m
[34mfalse[0m
[34m{"timeseries_out":"/mnt/scratch/experiment/mountainlab-tmp/output_7027675613927afd829dc6ab039157fa2276af97_timeseries_out.mda"}[0m
[34m[ Checking process cache ... ][0m
[34m[ Creating temporary directory ... ][0m
[34m[ Creating links to input files... ][0m
[34m[ Preparing temporary outputs... ][0m
[34mProcessing ouput - /mnt/scratch/experiment/mountainlab-tmp/output_7027675613927afd829dc6ab039157fa2276af97_timeseries_out.mda[0m
[34mfalse[0m
[34m[ Initializing process ... ][0m
[34m[ Running ... ] 

2019-03-14 21:29:37,750 - pipefinch.sort.mountain.comre - INFO - Sorting


[34m[ Getting processor spec... ][0m
[34m[ Checking inputs and substituting prvs ... ][0m
[34m[ Computing process signature ... ][0m
[34mProcess signature: 2d5cea94ae9d3be13ad3a0a8eb4d422ef2c28a99[0m
[34m[ Checking outputs... ][0m
[34m{"timeseries_out":"/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/pre.mda.prv"}[0m
[34mProcessing ouput - /mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/pre.mda.prv[0m
[34mfalse[0m
[34m{"timeseries_out":"/mnt/scratch/experiment/mountainlab-tmp/output_2d5cea94ae9d3be13ad3a0a8eb4d422ef2c28a99_timeseries_out.mda"}[0m
[34m[ Checking process cache ... ][0m
[34m[ Creating temporary directory ... ][0m
[34m[ Creating links to input files... ][0m
[34m[ Preparing temporary outputs... ][0m
[34mProcessing ouput - /mnt/scratch/experiment/mountainlab-tmp/output_2d5cea94ae9d3be13ad3a0a8eb4d422ef2c28a99_timeseries_out.mda[0m
[34mfalse[0m
[34m[ Initializing process ... ][0m
[34m[ Running ... ] /h

2019-03-14 23:33:50,547 - pipefinch.sort.mountain.comre - INFO - Getting cluster metrics


[34m[ Getting processor spec... ][0m
[34m[ Checking inputs and substituting prvs ... ][0m
[34m[ Computing process signature ... ][0m
[34mProcess signature: 6bfe9b8f025947748a21e149efa9241bb1038bb6[0m
[34m[ Checking outputs... ][0m
[34m{"firings_out":"/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/firings.mda"}[0m
[34mProcessing ouput - /mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/firings.mda[0m
[34mfalse[0m
[34m{"firings_out":"/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/firings.mda"}[0m
[34m[ Checking process cache ... ][0m
[34m[ Creating temporary directory ... ][0m
[34m[ Creating links to input files... ][0m
[34m[ Preparing temporary outputs... ][0m
[34mProcessing ouput - /mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/firings.mda[0m
[34mfalse[0m
[34m[ Initializing process ... ][0m
[34m[ Running ... ] /home/zeke/anaconda3/envs/mountain/bin/python3 /home/zeke/an

2019-03-14 23:47:54,039 - pipefinch.sort.mountain.comre - INFO - Automatically curating


[34m[ Getting processor spec... ][0m
[34m[ Checking inputs and substituting prvs ... ][0m
[34m[ Computing process signature ... ][0m
[34mProcess signature: c7118020c1654244ac20cca49759d9ded3328015[0m
[34m[ Checking outputs... ][0m
[34m{"metrics_out":"/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/cluster_metrics.json"}[0m
[34mProcessing ouput - /mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/cluster_metrics.json[0m
[34mfalse[0m
[34m{"metrics_out":"/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/cluster_metrics.json"}[0m
[34m[ Checking process cache ... ][0m
[34m[ Creating temporary directory ... ][0m
[34m[ Creating links to input files... ][0m
[34m[ Preparing temporary outputs... ][0m
[34mProcessing ouput - /mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/cluster_metrics.json[0m
[34mfalse[0m
[34m[ Initializing process ... ][0m
[34m[ Running ... ] /home/zeke/anaconda3/env

In [15]:
file_paths

{'mda': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/raw.mda',
 'params': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/params.json',
 'geom': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/geom.csv',
 'filt': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/filt.mda.prv',
 'pre': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/pre.mda.prv',
 'firings': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/firings.mda',
 'firings_curated': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/firings_curated.mda',
 'cluster_metrics': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/cluster_metrics.json',
 'cluster_metrics_curated': '/mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/cluster_metrics_curated.json'}

## Command for viewing:
 - open up terminal with the environment msort
 - go go the ss_data folder for the session
 - run the command: qt-mountainview --raw raw.mda --filt sort_out/filt.mda.prv --pre sort_out/pre.mda.prv --samplerate=20000 --firings sort_out/firings.mda --cluster_metrics sort_out/cluster_metrics.json

# After manual curation
 - save the curated spikes in the sort_out/firings_curated.mda
 - come back to the notebook and run 

In [19]:
from pipefinch.h5tools.kwik import kwikfunctions as kwkf
reload(kwkf)
reload(et)
firings_to_save = 'firings_curated' # 'curated' or 'firings' for default_output


metrics_to_save = 'cluster_metrics_curated' if firings_to_save == 'firings_curated' else 'cluster_metrics'
kwkf.mda_to_kwik(exp_struct['files']['kwd'],
                 exp_struct['files']['kwik'],
                 file_paths[firings_to_save],
                file_paths[metrics_to_save])


2019-03-16 17:31:02,105 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Creating kwik file /data/experiment/microdrive/p1j1/Ephys/kwik/2019-02-27_1800_02/spikes.kwik from firings /mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/sort_out/firings_curated.mda


<pipefinch.h5tools.kwik.kwikfunctions.MdaKwikWriter at 0x7f1621f0cc50>

In [17]:
### extract all unit waveforms
from pipefinch.neural import units
reload(units)
units.get_all_unit_waveforms(exp_struct['files']['kwik'], exp_struct['files']['kwd'], port=probe_port)

2019-03-15 13:00:32,100 - pipefinch.neural.units - INFO - About to get all waveforms for 44 units in file /data/experiment/microdrive/p1j1/Ephys/kwik/2019-02-27_1800_02/spikes.kwik


HBox(children=(IntProgress(value=0, max=44), HTML(value='')))




0

In [None]:
### cleanup msort files (msort folder and temp folder)

In [18]:
et.msort_cleanup(exp_struct)
msc.clean_tmp_dir()

2019-03-16 16:54:51,178 - pipefinch.pipeline.filestructure - INFO - removing (if exists) msort mda file /mnt/scratch/experiment/p1j1/Ephys/msort/2019-02-27_1800_02/raw.mda 
2019-03-16 16:54:51,794 - pipefinch.sort.mountain.comre - INFO - Cleaning up msort temp dir /mnt/scratch/experiment/mountainlab-tmp


In [165]:
 exp_struct['files']

{'par': '/media/zinch/Windows/experiment/p14r14/ephys/msort/2019-02-13_1750_01/params.json',
 'set': '/mnt/zuperfinchjr/Data/p14r14/ephys/raw/2019-02-13_1750_01/settings.isf',
 'kwd': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/streams.kwd',
 'kwik': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/spikes.kwik',
 'kwe': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/events.kwe',
 'mda_raw': '/media/zinch/Windows/experiment/p14r14/ephys/msort/2019-02-13_1750_01/raw.mda'}