## Notebook for spike sorting from SGL data using Kilosort; neatly merging two epochs

Uses:

    - intan2kwik (https://github.com/zekearneodo/intan2kwik/blob/master/README.md)

What it does:

    - Creates a _all session, within the session with all the epochs in the session
        - Replicates the sglx structure with this _all session
        - Merges all binary files into the appropriate binary files
        - Copies the fist metadata file as the new meta file corresponding to each binary file (note that some metadata will be WRONG!)
            - fileCreateTime, fileName, fileSizeBytes, fileTimeSecs, firstSample are those of the first epoch. All else should be OK
            - Original files are kept track on the /application_data of each kwd file
            

In [1]:
import socket
import os
import glob
import json
import shutil 
from typing import Union
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import subprocess
from datetime import timedelta
from importlib import reload
import shutil

# pipeline imports
from pipefinch.h5tools.kwik import kutil
from pipefinch.pipeline import probes
from pipefinch.pipeline import sglxutil as sglu
from pipefinch.neural.sort import kilo

from pipefinch.pipeline import filestructure as et

import logging

# Setup the logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)
        
logger.info('Logger set')
logger.info('Hostname {}'.format(socket.gethostname()))

2019-11-16 19:59:24,643 - root - INFO - Logger set
2019-11-16 19:59:24,644 - root - INFO - Hostname zpikezorter


### Session parameters and raw files

In [2]:
reload(et)
bird = 'g4r4'
all_bird_sess = et.list_sessions(bird)
all_bird_sess

['20190711_01',
 '20190711_02',
 '20190711_03_tipref',
 '20190711_04',
 '20190712_01',
 '20190712_02',
 '20190712_03',
 '20190712_04',
 '20190712_05',
 '20190711_4800_01_g0',
 '20190713_01',
 '20190723_02',
 '20190712_01_extref_g0',
 '20190714_05',
 '20190714_06',
 '20190715_01',
 '20190715_02',
 '20190716_01',
 '20190716_02',
 '20190717_01',
 '20190717_02',
 '20190718_01',
 '20190718_02',
 '20190719_01',
 '20190719_02',
 '20190720_01',
 '20190721_01',
 '20190722_01',
 '20190722_02',
 '20190722_03',
 '20190722_04',
 '20190723_01',
 '20190723_03',
 '20190724_01',
 '20190713_02',
 '20190713_03',
 '20190713_04',
 '20190724_02',
 '20190714_01',
 '20190714_03',
 '20190714_04',
 '20190724_03',
 '20190725_01',
 '20190725_02',
 '20190725_03',
 '20190726_01',
 '20190726_02',
 '20190711_04_extref',
 '20190714_02']

In [28]:
reload(et)

sess_par = {'bird': bird,
           'sess': '20190713_01',
           'probe': 'probe_0', # probe to sort ('probe_0', 'probe_1') (to lookup in the rig_par which port to extract)
           'sort': 0, 
           'epoch': None, # for the subfolder in the neuropix data}
           }

exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], sess_par['sort'])

sort_params = {'adjacency_radius': -1,
              'detect_threshold': 2,
              'freq_min': 600}

ds_params = {'detect_sign': -1}

ks_params = {'use_gpu': 1,
            'auto_merge': 1,
            'filt_per_chan': 4,
            }

# visualization default parameters
viz_par = { 'evt_name': 'motif',
           'evt_signal': 'trig_perceptron',
            'evt_edge': 1,
            'pre_ms': -500,
            'post_ms': 300,
            
            'pre_samples': 0,
            'post_samples': 0,
            'span': 0,
            }

# convenient paths
kwik_folder = exp_struct['folders']['kwik']
ksort_folder = exp_struct['folders']['ksort']
raw_folder = exp_struct['folders']['raw']

In [29]:
exp_struct

{'folders': {'bird': '/mnt/microdrive/birds/g4r4',
  'raw': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01',
  'msort': '/data/experiment/microdrive/g4r4/Ephys/msort/20190713_01',
  'ksort': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01'},
 'files': {'par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/params.json',
  'set': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/settings.isf',
  'rig': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/rig.json',
  'kwd': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/stream.kwd',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/sort_0/spikes.kwik',
  'kwe': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/events.kwe',
  'mda_raw': '/data/experiment/microdrive/g4r4/Ephys/msort/20190713_01/raw.mda',
  'bin_raw': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/raw.bin'}}

### Pick an epoch and make the file structures of the merge

In [30]:
sess_epochs = sglu.list_sgl_epochs(sess_par)
sess_epochs

['20190713_01_1210_undir_g0',
 '20190713_01_1210_undir_g1',
 '20190713_01_1355_dir_g0']

In [31]:
sess_par

{'bird': 'g4r4',
 'sess': '20190713_01',
 'probe': 'probe_0',
 'sort': 0,
 'epoch': None}

### Merge binaries and meta files

Test merge_raw of two raw folders

In [7]:
src_f = os.path.abspath('/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0/20190715_02_dir_g0_imec0')
dst_f = os.path.abspath('/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all/20190715_02_all_imec0')

#m_f = sglu.merge_raw_sgl(src_f, dst_f)

In [32]:
# merge all epochs
reload(sglu)
epochs_to_merge = sglu.list_sgl_epochs(sess_par)
# skip epochs that are merges (those that finish with _all)
epochs_to_merge = [x for x in epochs_to_merge if x.find('_all')==-1]
logger.info('epochs to merge {}'.format(epochs_to_merge))

#exp_struct = sglu.sgl_struct(sess_par, new_epoch)
exp_struct, epoch_structs, epoch_meta, new_epoch = sglu.merge_epochs(sess_par, epochs_to_merge)
#merge_epochs(sess_par, sglu.list_sgl_epochs(sess_par)[:2])

2019-11-18 13:12:33,026 - root - INFO - epochs to merge ['20190713_01_1210_undir_g0', '20190713_01_1210_undir_g1', '20190713_01_1355_dir_g0']
2019-11-18 13:12:33,028 - pipefinch.pipeline.sglxutil - INFO - Will merge epochs ['20190713_01_1210_undir_g0', '20190713_01_1210_undir_g1', '20190713_01_1355_dir_g0']
2019-11-18 13:12:33,039 - pipefinch.pipeline.sglxutil - INFO - SGL folder struct {'nidq': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/20190713_01_alles', 'imec': {}}
2019-11-18 13:12:33,040 - pipefinch.pipeline.sglxutil - INFO - Creating merged raw /mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/20190713_01_alles
2019-11-18 13:12:33,041 - pipefinch.pipeline.sglxutil - INFO - Will cleanup destination folder first: /mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/20190713_01_alles
2019-11-18 13:12:33,142 - pipefinch.pipeline.sglxutil - INFO - * Adding epoch /mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/20190713_01_1210_undir_g0
2019-11-18 13:12:33,143 - pipefinch.pipeline.sg

In [33]:
df = os.path.abspath('/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_alles_imec')
nb = os.path.split(df)[-1]
nb.find('imec')

18

ni_pd = pd.DataFrame([x['nidq_meta'] for x in epoch_meta])
ni_pd['filetimesecs'] = ni_pd['filetimesecs'].apply(np.float)
ni_pd['samples'] = ni_pd['filetimesecs'] * ni_pd['s_f']
ni_pd['samples'] = ni_pd['samples'].apply(np.int)
ni_pd.sort_values('filecreatetime')


In [34]:
exp_struct

{'folders': {'bird': '/mnt/microdrive/birds/g4r4/20190713_01_alles',
  'raw': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/20190713_01_alles',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/20190713_01_alles',
  'msort': '/data/experiment/microdrive/g4r4/Ephys/msort/20190713_01/20190713_01_alles',
  'ksort': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles'},
 'files': {'par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/params.json',
  'set': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/settings.isf',
  'rig': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/rig.json',
  'kwd': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/20190713_01_alles/stream.kwd',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/20190713_01_alles/sort_0/spikes.kwik',
  'kwe': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/20190713_01_alles/events.kwe',
  'mda_raw': '/data/experiment/mic

#### Make kwd of the merged epochs

In [35]:
import h5py
from pipefinch.h5tools.core import tables
def make_merged_kwd(merged_exp_struct: dict, epoch_meta_list: list, overwrite=False):
## use these epoch folders to read the metas and make the kwd of the merged thing
    ## get the metas for all the nidq in the sorted epoch folders
    logger.info('merging nidq data of {} epochs onto kwd file {}'.format(len(epoch_meta_list), merged_exp_struct['files']['kwd']))
    
    ni_pd = pd.DataFrame([x['nidq_meta'] for x in epoch_meta_list])
    ni_pd['filetimesecs'] = ni_pd['filetimesecs'].apply(np.float)
    ni_pd['samples'] = ni_pd['filetimesecs'] * ni_pd['s_f']
    
    ni_pd.sort_values('filecreatetime')
    logger.info('Epochs to merge {}'.format(ni_pd['meta_file_path']))
    
    # signal the breaking points within the concatenated
    ni_pd['starts'] = ni_pd['samples'].shift(1)
    ni_pd.loc[0, 'starts'] = 0
   
    ## make the kwd with sgl_to_kwd(meta_path, dest_file_path, rec=0, include_blocks=['adc', 'dig_in'], overwrite=False) -> dict
    kwd_path = merged_exp_struct['files']['kwd']
    
    meta_folder = sglu.sgl_file_struct(merged_exp_struct['folders']['raw'])[0]['nidq']
    nidq_meta_files = glob.glob(os.path.join(meta_folder, '*.meta'))
    nidq_meta_path = nidq_meta_files[0]
    logger.info('nidq meta path {}'.format(nidq_meta_path))
    merged_nidq_dict = sglu.sgl_to_kwd(nidq_meta_path, kwd_path, overwrite=overwrite)
    
    ## edit the kwd and add the metadata on the breaks to /application_data tables
    with h5py.File(kwd_path, 'r+') as kwd_file:
        app_data_group = kwd_file['/recordings/0/application_data']
        
        tables.insert_table(app_data_group, ni_pd['starts'].to_numpy(dtype=np.int), 'breaks_sample')
        tables.insert_table(app_data_group, ni_pd['filecreatetime'].to_numpy(), 'breaks_tstart', force_dtype=h5py.special_dtype(vlen=str))
        tables.insert_table(app_data_group, ni_pd['meta_file_path'].to_numpy(), 'breaks_file', force_dtype=h5py.special_dtype(vlen=str))
        tables.insert_table(app_data_group, ni_pd['samples'].to_numpy(dtype=np.int), 'breaks_file_samples')
    
    return merged_nidq_dict, ni_pd

ni_meta_dict, ni_meta_pd = make_merged_kwd(exp_struct, epoch_meta, overwrite=True)

2019-11-18 14:33:47,779 - root - INFO - merging nidq data of 3 epochs onto kwd file /data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/20190713_01_alles/stream.kwd
2019-11-18 14:33:47,792 - root - INFO - Epochs to merge 0    /mnt/microdrive/birds/g4r4/Ephys/raw/20190713_...
1    /mnt/microdrive/birds/g4r4/Ephys/raw/20190713_...
2    /mnt/microdrive/birds/g4r4/Ephys/raw/20190713_...
Name: meta_file_path, dtype: object
2019-11-18 14:33:47,798 - root - INFO - nidq meta path /mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/20190713_01_alles/20190713_01_alles_t0.nidq.meta
2019-11-18 14:33:47,799 - pipefinch.pipeline.sglxutil - INFO - dest file: /data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/20190713_01_alles/stream.kwd
2019-11-18 14:33:47,800 - pipefinch.pipeline.sglxutil - INFO - meta file /mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/20190713_01_alles/20190713_01_alles_t0.nidq.meta
2019-11-18 14:34:23,389 - pipefinch.pipeline.sglxutil - INFO - block adc
2019-11-18 14:34:2

In [36]:
exp_struct

{'folders': {'bird': '/mnt/microdrive/birds/g4r4/20190713_01_alles',
  'raw': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/20190713_01_alles',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/20190713_01_alles',
  'msort': '/data/experiment/microdrive/g4r4/Ephys/msort/20190713_01/20190713_01_alles',
  'ksort': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles'},
 'files': {'par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/params.json',
  'set': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/settings.isf',
  'rig': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/rig.json',
  'kwd': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/20190713_01_alles/stream.kwd',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/20190713_01_alles/sort_0/spikes.kwik',
  'kwe': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190713_01/20190713_01_alles/events.kwe',
  'mda_raw': '/data/experiment/mic

### Do Sort
# epoch comes from above, but can always be taken from here by re-doing the exp_struct.
# typically, themerged session will be sess_alles

In [37]:
all_epoch = sess_par['sess'] + '_alles'
exp_struct = sglu.sgl_struct(sess_par, all_epoch)

In [38]:
sgl_folder, sgl_pd = sglu.sgl_file_struct(exp_struct['folders']['raw'])
sgl_folder

{'nidq': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/20190713_01_alles',
 'imec': {0: '/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/20190713_01_alles/20190713_01_alles_imec0'}}

##### get the AP files for one imec probe

The imec file.

Here's the meaning of some of the metadata https://github.com/JaneliaSciComp/JRCLUST/wiki/.meta-file. In particular, there is an explanation of what channels in the probe are used and where they are located in the block. More detailed meta here https://github.com/billkarsh/SpikeGLX/blob/master/Markdown/Metadata.md.


In [39]:
probe_id = int(sess_par['probe'].split('_')[-1])

probe_data_folder = sgl_folder['imec'][probe_id]
probe_data_folder
ap_meta_files = glob.glob(os.path.join(probe_data_folder, '*.ap.meta'))

ap_meta_files[0]

'/mnt/microdrive/birds/g4r4/Ephys/raw/20190713_01/20190713_01_alles/20190713_01_alles_imec0/20190713_01_alles_t0.imec0.ap.meta'

#### read a file and its meta

In [40]:
imec_meta_file_path = ap_meta_files[0]
# these should come from the .meta file
imec_meta_dict = sglu.get_imec_meta(imec_meta_file_path)

imec0 = sglu.get_imec_data(imec_meta_file_path)
n_chan = imec_meta_dict['nsavedchans'] #nSavedChans in meta file
s_f = imec0['meta']['s_f'] #30000.533148 #imSampleRate in meta file



In [41]:
s_f

30000.0

### Load the rig parameters and get the probe file, behavior trigers, etc
 - Get the rig par file
 - Get the aux channels
 - Detect onset of wav files

In [42]:
rig_par = et.get_rig_par(exp_struct)

## Scripts for sorting with Kilosort
Steps involved:
 - Make binary file with selected recs, chans
 - Set kilosort parameters
 - Make kilosort chanmap
 - Make kilosort scripts and phy parameters file (for manual curation)
 - Run the kilosort scripts (via matlab)
 - Expose the paths for manual curation
 - After curation, make the kwik file with sorted data
 - Cleanup and move metadata to permanentt locations

### prep the files with their nice formats, locations and names


In [43]:
from pipefinch.neural.sort.kilo import core as ksc

In [44]:
reload(ksc)
ks_params = {'kilo_version': 2,
             'use_gpu': 1,
            'auto_merge': 1,
            'filt_per_chan': 4,
            's_f': int(s_f),
            'n_chan': n_chan,
            'spkTh': -4,
            'minFR': 1/100,
            }

In [45]:
exp_struct['folders']['ksort']

'/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles'

In [46]:
reload(ksc)
reload(probes)
file_paths, out_folder = ksc.make_paths(exp_struct['folders']['ksort'])

In [47]:
file_paths

{'bin': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/raw.bin',
 'params': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/params.json',
 'prb': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/chanMap.mat',
 'rez': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/rez2.mat',
 'mat_log': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/kilosort_mat.log',
 'phy_par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/params.py'}

In [48]:
reload(ksc)
reload(probes)
file_paths, out_folder = ksc.make_paths(exp_struct['folders']['ksort'])
os.makedirs(out_folder, exist_ok=True)

# make the probe file
# for now, just copy copy the probe file from defaults to dict

#shutil.copyfile('/home/ezequiel/repos/Kilosort2/configFiles/chanMap_phase3b_allconnect.mat', file_paths['prb'])

# copy the binary file as the 'raw' binary file for the sorting
# this has to be done either way because /data partition is faster
# todo: concatenate them or something, from a kwd
#logger.info('copying raw file into {}'.format(file_paths['bin']))
#imec0['only_neural'].tofile(file_paths['bin'])

#shutil.copyfile(sglu.get_data_meta_path(imec_meta_file_path)[0], file_paths['bin'])

# parameters to pass to the msort scripts, other than de defaults
ks_params.update({'s_f': s_f, # required,
                  'n_chan': n_chan, # total number of chans in the .bin file,
                  'dtype_name': imec0['neural'].dtype.name
            })
ksc.make_kilo_scripts(exp_struct['folders']['ksort'], ks_params)
phy_pars = ksc.make_phy_par_file(ks_params, file_paths)

2019-11-18 14:34:43,631 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/master.m
2019-11-18 14:34:43,632 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/config.m
2019-11-18 14:34:43,633 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/run_master.m
2019-11-18 14:34:43,634 - pipefinch.neural.sort.kilo.core - INFO - Written phy parameters file /data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/params.py
2019-11-18 14:34:43,635 - pipefinch.neural.sort.kilo.core - INFO - Written ksort parameters file /data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/params.json


In [49]:
ks_params

{'kilo_version': 2,
 'use_gpu': 1,
 'auto_merge': 1,
 'filt_per_chan': 4,
 's_f': 30000.0,
 'n_chan': 385,
 'spkTh': -4,
 'minFR': 0.01,
 'dtype_name': 'int16'}

In [50]:
file_paths

{'bin': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/raw.bin',
 'params': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/params.json',
 'prb': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/chanMap.mat',
 'rez': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/rez2.mat',
 'mat_log': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/kilosort_mat.log',
 'phy_par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/params.py'}

In [51]:
reload(ksc)
sort_result, sort_return_value = ksc.do_the_sort(file_paths)

2019-11-18 14:34:43,652 - pipefinch.neural.sort.kilo.core - INFO - Running kilosort on matlab
2019-11-18 14:34:43,653 - pipefinch.neural.sort.kilo.core - INFO - Sort folder is /data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles
2019-11-18 14:34:43,654 - pipefinch.neural.sort.kilo.core - INFO - output to /data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/kilosort_mat.log


CalledProcessError: Command '['matlab', '-nodesktop', '-nosplash', '-noawt', '-r "cd(\'/data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles\'); dir; run_master"', '-logfile /data/experiment/microdrive/g4r4/Ephys/ksort/20190713_01/20190713_01_alles/kilosort_mat.log']' died with <Signals.SIGKILL: 9>.

In [25]:
sort_return_value

0

In [26]:
sort_result

"\n                            < M A T L A B (R) >\n                  Copyright 1984-2019 The MathWorks, Inc.\n              R2019a Update 1 (9.6.0.1099231) 64-bit (glnxa64)\n                               April 12, 2019\n\n \nTo get started, type doc.\nFor product information, visit www.mathworks.com.\n \n\n.                         kilosort_mat.log          spike_templates.npy       \n..                        master.m                  spike_times.npy           \n.phy                      params.json               temp_wh.dat               \namplitudes.npy            params.py                 template_feature_ind.npy  \nchanMap.mat               pc_feature_ind.npy        template_features.npy     \nchannel_map.npy           pc_features.npy           templates.npy             \nchannel_positions.npy     phy.log                   templates_ind.npy         \ncluster_Amplitude.tsv     raw.bin                   whitening_mat.npy         \ncluster_ContamPct.tsv     rez.mat                 

In [23]:
file_paths

{'bin': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_alles/raw.bin',
 'params': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_alles/params.json',
 'prb': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_alles/chanMap.mat',
 'rez': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_alles/rez2.mat',
 'mat_log': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_alles/kilosort_mat.log',
 'phy_par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_alles/params.py'}

## Command for viewing:
 - open up terminal with the environment phy or phy2
 - go go the ss_data folder for the session
 - run the command: phy template-gui params.py

# After manual curation
 - save the curated spikes
 - come back to the notebook and run 

In [27]:
from pipefinch.h5tools.kwik import kwikfunctions as kwkff
reload(kwkff)
reload(et)

kwkff.kilo_to_kwik(exp_struct['files']['kwd'],
                 exp_struct['files']['kwik'],
                 exp_struct['folders']['ksort'],
                 #rec_in_binary=selection_rec_list,
                 raw_format='sgl')

#sglu.all_sgl_to_kwd(sess_par, include_blocks=['adc', 'dig_in'], overwrite=True)

2019-11-16 21:32:49,809 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Creating kwik file /data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_alles/sort_0/spikes.kwik from kilosort folder /data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_alles
2019-11-16 21:32:49,810 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Found clu file, will attempt to unpack manual sorted data from kilosort
2019-11-16 21:32:49,813 - pipefinch.h5tools.kwik.kwikfunctions - INFO - 30000.0
2019-11-16 21:32:50,085 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making spike tables
2019-11-16 21:32:50,426 - pipefinch.h5tools.kwik.kwikfunctions - INFO - 30000.0
2019-11-16 21:32:50,575 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making rec tables (make_rec_groups)
2019-11-16 21:32:50,667 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making cluster group tables
2019-11-16 21:32:50,668 - pipefinch.h5tools.kwik.kwikfunctions - INFO - found cluster tags file in /data/experiment/

In [25]:
exp_struct['folders']['ksort']

'/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_alles'

In [26]:
os.path.isfile('/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_02/cluster_group.tsv')

True

### Generate the kwd file with the adc streams and dig inputs (for events, like preceptron)

In [27]:
#reload(sglu)
# this needs to roun only once for the session. It goes trhoguh all the epochs
#sglu.all_sgl_to_kwd(sess_par, include_blocks=['adc', 'dig_in'], overwrite=True)

2019-11-15 20:17:30,853 - pipefinch.pipeline.sglxutil - INFO - will process to kwd all epochs in session folder skipping trouble sessions /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02
2019-11-15 20:17:30,854 - pipefinch.pipeline.sglxutil - INFO - found 7 epoch subfolders
2019-11-15 20:17:30,855 - pipefinch.pipeline.sglxutil - INFO - epoch folder /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_00-undir_g0
2019-11-15 20:17:30,862 - pipefinch.pipeline.sglxutil - INFO - Will create a new kwd file and overwrite the old one
2019-11-15 20:17:30,863 - pipefinch.pipeline.sglxutil - INFO - dest file: /data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_00-undir_g0/stream.kwd
2019-11-15 20:17:30,868 - pipefinch.pipeline.sglxutil - INFO - meta file /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_00-undir_g0/20190715_02_undir_g0_t0.nidq.meta
2019-11-15 20:17:39,387 - pipefinch.pipeline.sglxutil - INFO - block adc
2019-11-15 20:17:39,390 - pipefinch.pipeli

['/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all',
 '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_alle',
 '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_alles']

# These won't work in ksort yet

In [31]:
### extract all unit waveforms
reload(kwkf)

from pipefinch.neural import units
reload(units)
units.get_all_unit_waveforms(exp_struct['files']['kwik'], exp_struct['files']['kwd'])

2019-05-21 15:42:28,359 - pipefinch.neural.units - INFO - About to get all waveforms for 3 units in file /data/experiment/microdrive/p14r14/Ephys/kwik/2019-02-15_3125_01/sort_1/spikes.kwik


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




0

In [None]:
def msort_cleanup(exp_struct: dict):
    # remove the mda files and try to cleanup the msort temp location
    mda_raw_path = exp_struct['files']['mda_raw']
    logger.info('removing intermediate msort mda file {}'.format(mda_raw_path))
    os.remove(mda_raw_path)

def ksort_cleanup(exp_struct: dict):
    # remove the 
def msort_tmp_clean():
    tmp_dir = os.path.abspath(os.environ['ML_TEMPORARY_DIRECTORY'])
    logger.info('Cleaning up msort temp dir {}'.format(tmp_dir))
    
#msort_tmp_clean()
msort_cleanup(exp_struct)

In [165]:
 exp_struct['files']

{'par': '/media/zinch/Windows/experiment/p14r14/ephys/msort/2019-02-13_1750_01/params.json',
 'set': '/mnt/zuperfinchjr/Data/p14r14/ephys/raw/2019-02-13_1750_01/settings.isf',
 'kwd': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/streams.kwd',
 'kwik': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/spikes.kwik',
 'kwe': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/events.kwe',
 'mda_raw': '/media/zinch/Windows/experiment/p14r14/ephys/msort/2019-02-13_1750_01/raw.mda'}

## Dig into Ksort output files
Looking for:
    - main channels
    - spike waveforms

In [28]:
templates_path = os.path.join(exp_struct['folders']['ksort'], 'pc_features.npy')

pc = np.load(os.path.join(exp_struct['folders']['ksort'], 
                          'pc_features.npy'))

spk_clu = np.load(os.path.join(exp_struct['folders']['ksort'], 
                          'spike_clusters.npy'))

spk_temp = np.load(os.path.join(exp_struct['folders']['ksort'], 
                          'spike_templates.npy'))

spk_temp_ind = np.load(os.path.join(exp_struct['folders']['ksort'], 
                          'template_feature_ind.npy'))

pc_ind = np.load(os.path.join(exp_struct['folders']['ksort'], 
                          'pc_feature_ind.npy'))

clu_grp, clu_ks = tuple([np.loadtxt(os.path.join(exp_struct['folders']['ksort'], x), dtype={'names': ('cluster_id', 'group'),
                                     'formats': ('i2', 'S8')}, skiprows=1) for x in ['cluster_group.tsv', 'cluster_KSLabel.tsv']])

In [43]:
spk_temp_ind[:,0]

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [45]:
np.max(spk_clu)

2100

In [49]:
np.max(clu_ks['cluster_id'])

211

In [53]:
spk_clu[:] = np.nan

In [214]:
pc_ind

array([[  1,   3,   0, ...,  28,  31,  30],
       [  1,   3,   0, ...,  28,  31,  30],
       [  1,   3,   0, ...,  28,  31,  30],
       ...,
       [ 32,  30,  34, ...,  47,  16,  48],
       [ 34,  32,  36, ...,  49,  18,  50],
       [226, 224, 228, ..., 241, 210, 242]], dtype=uint32)

In [203]:
spk_clu.shape

(9680565,)

In [200]:
spk_temp.shape

(9680565, 1)

In [199]:
np.unique(spk_temp)

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [36]:
clu_grp.shape

(183,)

In [37]:
clu_ks.shape

(212,)

In [38]:
clu_grp[clu_grp['cluster_id']==500]

array([], dtype=[('cluster_id', '<i2'), ('group', 'S8')])

In [188]:
clu_grp.shape

(183,)

In [189]:
clu_ks.shape

(212,)

In [192]:
pc_ind

array([[  1,   3,   0, ...,  28,  31,  30],
       [  1,   3,   0, ...,  28,  31,  30],
       [  1,   3,   0, ...,  28,  31,  30],
       ...,
       [ 32,  30,  34, ...,  47,  16,  48],
       [ 34,  32,  36, ...,  49,  18,  50],
       [226, 224, 228, ..., 241, 210, 242]], dtype=uint32)

In [72]:
pc_ind[1]

array([ 1,  3,  0,  2,  5,  4,  7,  6,  9,  8, 11, 10, 13, 12, 15, 14, 17,
       16, 19, 18, 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30],
      dtype=uint32)

In [178]:
pc.shape

(9680565, 3, 32)

In [67]:
xx = np.load(os.path.join(exp_struct['folders']['ksort'], 
                          'templates.npy'))
xx.shape

(196, 82, 362)