## Notebook for spike sorting from SGL data using Kilosort; dirtily merging two epochs
Uses:
    - intan2kwik (https://github.com/zekearneodo/intan2kwik/blob/master/README.md)
    - mountainlab suite(https://github.com/flatironinstitute/mountainlab-js)
    - mountainsort https://github.com/flatironinstitute/mountainsort_examples/blob/master/README.md
    - mountainsort examples https://github.com/flatironinstitute/mountainsort_examples/blob/master/README.md

In [1]:
import socket
import os
import glob
import json
import shutil 
from typing import Union
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import subprocess
from datetime import timedelta
from importlib import reload
import shutil

# pipeline imports
from pipefinch.h5tools.kwik import kutil
from pipefinch.pipeline import probes
from pipefinch.pipeline import sglxutil as sglu
from pipefinch.neural.sort import kilo

from pipefinch.pipeline import filestructure as et

import logging

# Setup the logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)
        
logger.info('Logger set')
logger.info('Hostname {}'.format(socket.gethostname()))

2019-11-12 21:02:42,029 - root - INFO - Logger set
2019-11-12 21:02:42,030 - root - INFO - Hostname zpikezorter


### Session parameters and raw files

In [2]:
reload(et)
bird = 'g4r4'
all_bird_sess = et.list_sessions(bird)
all_bird_sess

['20190711_01',
 '20190711_02',
 '20190711_03_tipref',
 '20190711_04',
 '20190712_01',
 '20190712_02',
 '20190712_03',
 '20190712_04',
 '20190712_05',
 '20190711_4800_01_g0',
 '20190713_01',
 '20190723_02',
 '20190712_01_extref_g0',
 '20190714_05',
 '20190714_06',
 '20190715_01',
 '20190715_02',
 '20190716_01',
 '20190716_02',
 '20190717_01',
 '20190717_02',
 '20190718_01',
 '20190718_02',
 '20190719_01',
 '20190719_02',
 '20190720_01',
 '20190721_01',
 '20190722_01',
 '20190722_02',
 '20190722_03',
 '20190722_04',
 '20190723_01',
 '20190723_03',
 '20190724_01',
 '20190713_02',
 '20190713_03',
 '20190713_04',
 '20190724_02',
 '20190714_01',
 '20190714_03',
 '20190714_04',
 '20190724_03',
 '20190725_01',
 '20190725_02',
 '20190725_03',
 '20190726_01',
 '20190726_02',
 '20190711_04_extref',
 '20190714_02']

In [3]:
reload(et)

sess_par = {'bird': bird,
           'sess': '20190715_02',
           'probe': 'probe_0', # probe to sort ('probe_0', 'probe_1') (to lookup in the rig_par which port to extract)
           'sort': 0, 
           'epoch': None, # for the subfolder in the neuropix data}
           }

exp_struct = et.get_exp_struct(sess_par['bird'], sess_par['sess'], sess_par['sort'])

sort_params = {'adjacency_radius': -1,
              'detect_threshold': 2,
              'freq_min': 600}

ds_params = {'detect_sign': -1}

ks_params = {'use_gpu': 1,
            'auto_merge': 1,
            'filt_per_chan': 4,
            }

# visualization default parameters
viz_par = { 'evt_name': 'motif',
           'evt_signal': 'trig_perceptron',
            'evt_edge': 1,
            'pre_ms': -500,
            'post_ms': 300,
            
            'pre_samples': 0,
            'post_samples': 0,
            'span': 0,
            }

# convenient paths
kwik_folder = exp_struct['folders']['kwik']
ksort_folder = exp_struct['folders']['ksort']
raw_folder = exp_struct['folders']['raw']

In [4]:
exp_struct

{'folders': {'bird': '/mnt/microdrive/birds/g4r4',
  'raw': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02',
  'msort': '/data/experiment/microdrive/g4r4/Ephys/msort/20190715_02',
  'ksort': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02'},
 'files': {'par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/params.json',
  'set': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/settings.isf',
  'rig': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/rig.json',
  'kwd': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/stream.kwd',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/sort_0/spikes.kwik',
  'kwe': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/events.kwe',
  'mda_raw': '/data/experiment/microdrive/g4r4/Ephys/msort/20190715_02/raw.mda',
  'bin_raw': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/raw.bin'}}

### Pick an epoch and make the file structures of the merge

In [5]:
sess_epochs = sglu.list_sgl_epochs(sess_par)
sess_epochs

['20190715_02_dir_g0',
 '20190715_02_undir_g0',
 '20190715_02_dir_0-0',
 '20190715_02_dir_0-1',
 '20190715_02_all']

In [6]:
ep = sess_epochs[0]
ep.split('_')

['20190715', '02', 'dir', 'g0']

In [7]:
sess_par

{'bird': 'g4r4',
 'sess': '20190715_02',
 'probe': 'probe_0',
 'sort': 0,
 'epoch': None}

In [8]:
sglu.sgl_struct(sess_par, ep)

{'folders': {'bird': '/mnt/microdrive/birds/g4r4/20190715_02_dir_g0',
  'raw': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_dir_g0',
  'msort': '/data/experiment/microdrive/g4r4/Ephys/msort/20190715_02/20190715_02_dir_g0',
  'ksort': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_g0'},
 'files': {'par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_g0/params.json',
  'set': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/settings.isf',
  'rig': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/rig.json',
  'kwd': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_dir_g0/stream.kwd',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_dir_g0/sort_0/spikes.kwik',
  'kwe': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_dir_g0/events.kwe',
  'mda_raw': '/data/exper

In [9]:
sglu.sgl_struct(sess_par, 'new_epoch')

{'folders': {'bird': '/mnt/microdrive/birds/g4r4/new_epoch',
  'raw': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/new_epoch',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/new_epoch',
  'msort': '/data/experiment/microdrive/g4r4/Ephys/msort/20190715_02/new_epoch',
  'ksort': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/new_epoch'},
 'files': {'par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/new_epoch/params.json',
  'set': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/settings.isf',
  'rig': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/rig.json',
  'kwd': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/new_epoch/stream.kwd',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/new_epoch/sort_0/spikes.kwik',
  'kwe': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/new_epoch/events.kwe',
  'mda_raw': '/data/experiment/microdrive/g4r4/Ephys/msort/20190715_02/new_epoch/raw.mda',
  'bin_raw': '/

In [10]:
src_f = os.path.abspath('/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0/20190715_02_dir_g0_imec0')
dst_f = os.path.abspath('/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all/20190715_02_all_imec0')

def safe_copy(src_path: str, dest_path: str):
    if not os.path.exists(dest_path):
        shutil.copyfile(src_path, dest_path)
    else:
        logger.info('file {} already existed, nuttin to do'.format(dest_path))

def append_binary(src_path: str, dest_path: str):
    logger.info('Appending binaries {} -> {}'.format(src_path, dest_path))
    with open(dest_path, "ab") as dest_file, open(src_path, "rb") as src_file:
        dest_file.write(src_file.read())
        
def merge_raw_sgl(src_fold: str, dest_fold: str, overwrite=True):
    logger.info('merging {} into {}'.format(src_fold, dest_fold))
    if overwrite:
        logger.info('Will cleanup destination folder first: {}'.format(dest_fold))
        if os.path.exists(dest_fold):
            shutil.rmtree(dest_fold)
    # make the dir. if overwrite, then it's already deleted, otherwise it's ok if it already exists
    os.makedirs(dest_fold, exist_ok=True)
            
    meta_files = glob.glob(os.path.join(src_fold, '*.meta'))
    # check that there is only one triggered recording in the folder (only one tuple of bin, meta with t0 as identifier)
    t_instances = np.unique([int(x.split('_')[-1].split('.')[0].strip('t')) for x in meta_files])
    n_trigs = t_instances.size
    if n_trigs != 1:
        if n_trigs > 1:
            raise NotImplementedError('Many triggers (t0, t1,...) in the epoch, dont know how to handle yet')
        else:
            raise RuntimeError('No recording t identifiers found (t0)')       
    logger.info(meta_files)
    
    # copy each meta_file to the corresponding, only if there is none of the class (i.e, leave just the first meta file for each merged session)
    new_base = os.path.split(dest_fold)[-1]
    if new_base.find('imec'):
        new_base, _, prb = new_base.rpartition('_')
    logger.info('new base {}'.format(new_base))
    for m_path in meta_files:
        meta_file = os.path.split(m_path)[-1]
        m_fname, m_ext = meta_file.split('.', 1)
        m_base, _,  m_t = m_fname.rpartition('_')
        new_meta_name = '{}_{}'.format(new_base, m_t)
        new_meta_file = '{}.{}'.format(new_meta_name, m_ext)
        logger.info('New meta is {}'.format(new_meta_file))
        logger.info('dest_fold {}'.format(dest_fold))
        # check if dest exists and copy
        new_bin_path, new_meta_path = sglu.get_data_meta_path(os.path.join(dest_fold, new_meta_file))
        safe_copy(m_path, new_meta_path)
        
        # append binaries
        bin_path, _ = sglu.get_data_meta_path(m_path)
        append_binary(bin_path, new_bin_path)
    return meta_files

#m_f = merge_raw_sgl(src_f, dst_f)

2019-11-12 21:02:46,775 - root - INFO - merging /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0/20190715_02_dir_g0_imec0 into /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all/20190715_02_all_imec0
2019-11-12 21:02:46,777 - root - INFO - Will cleanup destination folder first: /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all/20190715_02_all_imec0
2019-11-12 21:02:48,223 - root - INFO - ['/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0/20190715_02_dir_g0_imec0/20190715_02_dir_g0_t0.imec0.ap.meta', '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0/20190715_02_dir_g0_imec0/20190715_02_dir_g0_t0.imec0.lf.meta']
2019-11-12 21:02:48,224 - root - INFO - new base 20190715_02_all
2019-11-12 21:02:48,225 - root - INFO - New meta is 20190715_02_all_t0.imec0.ap.meta
2019-11-12 21:02:48,225 - root - INFO - dest_fold /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all/20190715_02_all_imec0
2019-11-1

In [11]:
epochs_to_merge = sglu.list_sgl_epochs(sess_par)

def merge_epochs(sess_par, epochs_to_merge, overwrite=True):
    new_epoch = '{}_{}'.format(sess_par['sess'], 'all')
    epoch_structs = [sglu.sgl_struct(sess_par, epoch) for epoch in epochs_to_merge]
    epoch_folders = [sglu.sgl_file_struct(epoch_struct['folders']['raw'])[0] for epoch_struct in epoch_structs]
    exp_struct = sglu.sgl_struct(sess_par, new_epoch)
    sgl_folder, _ = sglu.sgl_file_struct(exp_struct['folders']['raw'])
    logger.info('SGL folder struct {}'.format(sgl_folder))
    # make de folders
    merged_raw_folder = exp_struct['folders']['raw']
    logger.info('Creating merged raw {}'.format(merged_raw_folder))
    if overwrite:
        logger.info('Will cleanup destination folder first: {}'.format(merged_raw_folder))
        if os.path.exists(merged_raw_folder):
            shutil.rmtree(merged_raw_folder)
    os.makedirs(merged_raw_folder, exist_ok=True)
    # copy set files from the first session
    copy_raw = []
    for k, v in exp_struct['files'].items():
        if k in copy_raw:
            #print(v)
            shutil.copyfile(epoch_structs[0]['files'][k], v)
            
    # copy the raw_metadatas
    for epoch_folder in epoch_folders:
        logger.info('* Adding epoch {}'.format(epoch_folder['nidq']))
        for k, v in epoch_folder.items():
            if isinstance(v, dict):
                # deal with the probes
                for prb, epoch_fold in v.items():
                    src_fold = epoch_fold
                    probe_folder = '{}_imec{}'.format(new_epoch, prb)
                    dst_path = os.path.join(merged_raw_folder, probe_folder)
                    merge_raw_sgl(src_fold, dst_path)
                #os.makedirs(dest_fold[0], exist_ok=True)
            else:
                src_fold = v
                dst_path = merged_raw_folder
                merge_raw_sgl(src_fold, dst_path, overwrite=False)

    return exp_struct, epoch_structs, epoch_folders, new_epoch

#exp_struct = sglu.sgl_struct(sess_par, new_epoch)
exp_struct, epoch_structs, epoch_folders, new_epoch = merge_epochs(sess_par, sglu.list_sgl_epochs(sess_par)[:2])
#merge_epochs(sess_par, sglu.list_sgl_epochs(sess_par)[:2])

2019-11-12 21:20:43,728 - root - INFO - SGL folder struct {'nidq': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all', 'imec': {0: '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all/20190715_02_all_imec0'}}
2019-11-12 21:20:43,729 - root - INFO - Creating merged raw /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all
2019-11-12 21:20:43,730 - root - INFO - Will cleanup destination folder first: /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all
2019-11-12 21:20:44,867 - root - INFO - * Adding epoch /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0
2019-11-12 21:20:44,868 - root - INFO - merging /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0 into /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all
2019-11-12 21:20:44,871 - root - INFO - ['/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0/20190715_02_dir_g0_t0.nidq.meta']
2019-11-12 21:20:44,872 - root - INFO - 

In [45]:
os.path.split(m_f[0])[-1]

'20190715_02_dir_g0_t0.imec0.ap.meta'

In [12]:
exp_struct

{'folders': {'bird': '/mnt/microdrive/birds/g4r4/20190715_02_all',
  'raw': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_all',
  'msort': '/data/experiment/microdrive/g4r4/Ephys/msort/20190715_02/20190715_02_all',
  'ksort': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_all'},
 'files': {'par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_all/params.json',
  'set': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/settings.isf',
  'rig': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/rig.json',
  'kwd': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_all/stream.kwd',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_all/sort_0/spikes.kwik',
  'kwe': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_all/events.kwe',
  'mda_raw': '/data/experiment/microdrive/g4r4/Ephys

In [13]:
epoch_structs

[{'folders': {'bird': '/mnt/microdrive/birds/g4r4/20190715_02_dir_g0',
   'raw': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0',
   'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_dir_g0',
   'msort': '/data/experiment/microdrive/g4r4/Ephys/msort/20190715_02/20190715_02_dir_g0',
   'ksort': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_g0'},
  'files': {'par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_g0/params.json',
   'set': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/settings.isf',
   'rig': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/rig.json',
   'kwd': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_dir_g0/stream.kwd',
   'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_dir_g0/sort_0/spikes.kwik',
   'kwe': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_dir_g0/events.kwe',
   'mda_raw': 

In [14]:
exp_struct

{'folders': {'bird': '/mnt/microdrive/birds/g4r4/20190715_02_all',
  'raw': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_all',
  'msort': '/data/experiment/microdrive/g4r4/Ephys/msort/20190715_02/20190715_02_all',
  'ksort': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_all'},
 'files': {'par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_all/params.json',
  'set': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/settings.isf',
  'rig': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/rig.json',
  'kwd': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_all/stream.kwd',
  'kwik': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_all/sort_0/spikes.kwik',
  'kwe': '/data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_all/events.kwe',
  'mda_raw': '/data/experiment/microdrive/g4r4/Ephys

In [15]:
sglu.sgl_file_struct(exp_struct['folders']['raw'])

({'nidq': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all',
  'imec': {0: '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all/20190715_02_all_imec0'}},
 Empty DataFrame
 Columns: []
 Index: [])

In [17]:
# in zpike
sess_folder = exp_struct['folders']['raw']
#in lookfar
#sess_folder = '/Users/zeke/experiment/stimsim/Ephys/2019-05-27_stimsim_0000_00_g0'
all_probe_folders = glob.glob(os.path.join(sess_folder, '*'))
all_probe_folders

sgl_folder, _ = sglu.sgl_file_struct(sess_folder)
sgl_folder

{'nidq': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all',
 'imec': {}}

In [18]:
## concatenate the binary files
bin_1, bin_2 = [glob.glob(os.path.join(folder_dict['nidq'], '*.bin'))[0] for folder_dict in epoch_folders]
bin_merged = os.path.join(exp_struct['folders']['raw'], '{}.nidq.bin'.format(os.path.split(exp_struct['folders']['raw'])[-1]))
bin_merged

'/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all/20190715_02_all.nidq.bin'

In [19]:
bin_1

'/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0/20190715_02_dir_g0_t0.nidq.bin'

In [20]:
bin_merged

'/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_all/20190715_02_all.nidq.bin'

In [21]:

        
for fpath in [bin_1, bin_2]:
    print('appending file {}'.format(fpath))
    append_binary(fpath, bin_merged)

appending file /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0/20190715_02_dir_g0_t0.nidq.bin
appending file /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_undir_g0/20190715_02_undir_g0_t0.nidq.bin


In [22]:
bin_merged_bis = bin_merged + '.cat'

In [23]:
!cat {bin_1} {bin_2} > {bin_merged_bis}

In [77]:
## concatenate the binary files
bin_1, bin_2 = [glob.glob(os.path.join(folder_dict['imec'][0], '*.bin'))[0] for folder_dict in epoch_folders]
bin_merged = os.path.join(exp_struct['folders']['raw'], '{}_imec0'.format(new_epoch) , 
                          '{}.imec0.ap.bin'.format(os.path.split(exp_struct['folders']['raw'])[-1]))
bin_merged

'/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_0-1/20190715_02_dir_0-1_imec0/20190715_02_dir_0-1.imec0.ap.bin'

In [79]:
!cat {bin_1} {bin_2} > {bin_merged}

In [13]:
epoch_folders

[{'nidq': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0',
  'imec': {0: '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0/20190715_02_dir_g0_imec0'}},
 {'nidq': '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_undir_g0',
  'imec': {0: '/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_undir_g0/20190715_02_undir_g0_imec0'}}]

##### get the AP files for one imec probe

The imec file.

Here's the meaning of some of the metadata https://github.com/JaneliaSciComp/JRCLUST/wiki/.meta-file. In particular, there is an explanation of what channels in the probe are used and where they are located in the block. More detailed meta here https://github.com/billkarsh/SpikeGLX/blob/master/Markdown/Metadata.md.


In [14]:
probe_id = int(sess_par['probe'].split('_')[-1])

probe_data_folder = sgl_folder['imec'][probe_id]
probe_data_folder
ap_meta_files = glob.glob(os.path.join(probe_data_folder, '*.ap.meta'))

ap_meta_files[0]

'/mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_0-1/20190715_02_dir_0-1_imec0/20190715_02_dir_0-1_t0.imec0.ap.meta'

#### read a file and its meta

In [15]:
imec_meta_file_path = ap_meta_files[0]
# these should come from the .meta file
imec_meta_dict = sglu.get_imec_meta(imec_meta_file_path)

imec0 = sglu.get_imec_data(imec_meta_file_path)
n_chan = imec_meta_dict['nsavedchans'] #nSavedChans in meta file
s_f = imec0['meta']['s_f'] #30000.533148 #imSampleRate in meta file



In [16]:
s_f

30000.0

### Load the rig parameters and get the probe file, behavior trigers, etc
 - Get the rig par file
 - Get the aux channels
 - Detect onset of wav files

In [17]:
rig_par = et.get_rig_par(exp_struct)

## Scripts for sorting with Kilosort
Steps involved:
 - Make binary file with selected recs, chans
 - Set kilosort parameters
 - Make kilosort chanmap
 - Make kilosort scripts and phy parameters file (for manual curation)
 - Run the kilosort scripts (via matlab)
 - Expose the paths for manual curation
 - After curation, make the kwik file with sorted data
 - Cleanup and move metadata to permanentt locations

### prep the files with their nice formats, locations and names


In [18]:
from pipefinch.neural.sort.kilo import core as ksc

In [19]:
reload(ksc)
ks_params = {'kilo_version': 2,
             'use_gpu': 1,
            'auto_merge': 1,
            'filt_per_chan': 4,
            's_f': int(s_f),
            'n_chan': n_chan}

In [20]:
exp_struct['folders']['ksort']

'/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1'

In [21]:
reload(ksc)
reload(probes)
file_paths, out_folder = ksc.make_paths(exp_struct['folders']['ksort'])

In [22]:
file_paths

{'bin': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/raw.bin',
 'params': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/params.json',
 'prb': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/chanMap.mat',
 'rez': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/rez2.mat',
 'mat_log': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/kilosort_mat.log',
 'phy_par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/params.py'}

In [23]:
reload(ksc)
reload(probes)
file_paths, out_folder = ksc.make_paths(exp_struct['folders']['ksort'])
os.makedirs(out_folder, exist_ok=True)

# make the probe file
# for now, just copy copy the probe file from defaults to dict

#shutil.copyfile('/home/ezequiel/repos/Kilosort2/configFiles/chanMap_phase3b_allconnect.mat', file_paths['prb'])

# copy the binary file as the 'raw' binary file for the sorting
# this has to be done either way because /data partition is faster
# todo: concatenate them or something, from a kwd
#logger.info('copying raw file into {}'.format(file_paths['bin']))
#imec0['only_neural'].tofile(file_paths['bin'])

#shutil.copyfile(sglu.get_data_meta_path(imec_meta_file_path)[0], file_paths['bin'])

# parameters to pass to the msort scripts, other than de defaults
ks_params.update({'s_f': s_f, # required,
                  'n_chan': n_chan, # total number of chans in the .bin file,
                  'dtype_name': imec0['neural'].dtype.name
            })
ksc.make_kilo_scripts(exp_struct['folders']['ksort'], ks_params)
phy_pars = ksc.make_phy_par_file(ks_params, file_paths)

2019-08-09 17:41:21,573 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/master.m
2019-08-09 17:41:21,576 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/config.m
2019-08-09 17:41:21,578 - pipefinch.neural.sort.kilo.core - INFO - Written kilo script /data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/run_master.m
2019-08-09 17:41:21,580 - pipefinch.neural.sort.kilo.core - INFO - Written phy parameters file /data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/params.py
2019-08-09 17:41:21,581 - pipefinch.neural.sort.kilo.core - INFO - Written ksort parameters file /data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/params.json


In [24]:
file_paths

{'bin': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/raw.bin',
 'params': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/params.json',
 'prb': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/chanMap.mat',
 'rez': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/rez2.mat',
 'mat_log': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/kilosort_mat.log',
 'phy_par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/params.py'}

In [104]:
reload(ksc)
sort_result, sort_return_value = ksc.do_the_sort(file_paths)

2019-08-08 13:18:42,265 - pipefinch.neural.sort.kilo.core - INFO - Running kilosort on matlab
2019-08-08 13:18:42,266 - pipefinch.neural.sort.kilo.core - INFO - Sort folder is /data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1
2019-08-08 13:18:42,267 - pipefinch.neural.sort.kilo.core - INFO - output to /data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/kilosort_mat.log


In [105]:
sort_return_value

0

In [106]:
sort_result



In [97]:
file_paths

{'bin': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/raw.bin',
 'params': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/params.json',
 'prb': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/chanMap.mat',
 'rez': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/rez2.mat',
 'mat_log': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/kilosort_mat.log',
 'phy_par': '/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1/params.py'}

## Command for viewing:
 - open up terminal with the environment msort
 - go go the ss_data folder for the session
 - run the command: phy template-gui params.py

# After manual curation
 - save the curated spikes
 - come back to the notebook and run 

In [25]:
from pipefinch.h5tools.kwik import kwikfunctions as kwkf
reload(kwkf)
reload(et)

kwkf.kilo_to_kwik(exp_struct['files']['kwd'],
                 exp_struct['files']['kwik'],
                 exp_struct['folders']['ksort'],
                 #rec_in_binary=selection_rec_list,
                 raw_format='sgl')

#sglu.all_sgl_to_kwd(sess_par, include_blocks=['adc', 'dig_in'], overwrite=True)

2019-08-09 17:41:45,848 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Creating kwik file /data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_dir_0-1/sort_0/spikes.kwik from kilosort folder /data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1
2019-08-09 17:41:45,850 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Found clu file, will attempt to unpack manual sorted data from kilosort
2019-08-09 17:41:45,850 - pipefinch.h5tools.kwik.kwikfunctions - INFO - 30000.0
2019-08-09 17:41:46,133 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making spike tables
2019-08-09 17:41:46,447 - pipefinch.h5tools.kwik.kwikfunctions - INFO - 30000.0
2019-08-09 17:41:46,715 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making rec tables (make_rec_groups)
2019-08-09 17:41:46,844 - pipefinch.h5tools.kwik.kwikfunctions - INFO - Making cluster group tables
2019-08-09 17:41:46,845 - pipefinch.h5tools.kwik.kwikfunctions - INFO - found cluster tags file in /data/experim

In [24]:
exp_struct['folders']['ksort']

'/data/experiment/microdrive/g4r4/Ephys/ksort/20190715_02/20190715_02_dir_0-1'

In [29]:
os.path.isfile('/data/experiment/microdrive/p14r14/Ephys/ksort/2019-02-15_3125_02/cluster_group.tsv')

True

### Generate the kwd file with the adc streams and dig inputs (for events, like preceptron)

In [110]:
reload(sglu)
# this needs to roun only once for the session. It goes trhoguh all the epochs
sglu.all_sgl_to_kwd(sess_par, include_blocks=['adc', 'dig_in'], overwrite=True)

2019-08-09 12:48:42,806 - pipefinch.pipeline.sglxutil - INFO - will process to kwd all epochs in session folder skipping trouble sessions /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02
2019-08-09 12:48:42,807 - pipefinch.pipeline.sglxutil - INFO - found 3 epoch subfolders
2019-08-09 12:48:42,808 - pipefinch.pipeline.sglxutil - INFO - epoch folder /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0
2019-08-09 12:48:42,813 - pipefinch.pipeline.sglxutil - INFO - Will create a new kwd file and overwrite the old one
2019-08-09 12:48:42,814 - pipefinch.pipeline.sglxutil - INFO - dest file: /data/experiment/microdrive/g4r4/Ephys/kwik/20190715_02/20190715_02_dir_g0/stream.kwd
2019-08-09 12:48:42,819 - pipefinch.pipeline.sglxutil - INFO - meta file /mnt/microdrive/birds/g4r4/Ephys/raw/20190715_02/20190715_02_dir_g0/20190715_02_dir_g0_t0.nidq.meta
2019-08-09 12:48:43,745 - pipefinch.pipeline.sglxutil - INFO - block adc
2019-08-09 12:48:44,658 - pipefinch.pipeline.sglxutil - INF

[]

# These won't work in ksort yet

In [31]:
### extract all unit waveforms
reload(kwkf)

from pipefinch.neural import units
reload(units)
units.get_all_unit_waveforms(exp_struct['files']['kwik'], exp_struct['files']['kwd'])

2019-05-21 15:42:28,359 - pipefinch.neural.units - INFO - About to get all waveforms for 3 units in file /data/experiment/microdrive/p14r14/Ephys/kwik/2019-02-15_3125_01/sort_1/spikes.kwik


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))




0

In [None]:
def msort_cleanup(exp_struct: dict):
    # remove the mda files and try to cleanup the msort temp location
    mda_raw_path = exp_struct['files']['mda_raw']
    logger.info('removing intermediate msort mda file {}'.format(mda_raw_path))
    os.remove(mda_raw_path)

def ksort_cleanup(exp_struct: dict):
    # remove the 
def msort_tmp_clean():
    tmp_dir = os.path.abspath(os.environ['ML_TEMPORARY_DIRECTORY'])
    logger.info('Cleaning up msort temp dir {}'.format(tmp_dir))
    
#msort_tmp_clean()
msort_cleanup(exp_struct)

In [165]:
 exp_struct['files']

{'par': '/media/zinch/Windows/experiment/p14r14/ephys/msort/2019-02-13_1750_01/params.json',
 'set': '/mnt/zuperfinchjr/Data/p14r14/ephys/raw/2019-02-13_1750_01/settings.isf',
 'kwd': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/streams.kwd',
 'kwik': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/spikes.kwik',
 'kwe': '/media/zinch/Windows/experiment/p14r14/ephys/kwik/2019-02-13_1750_01/events.kwe',
 'mda_raw': '/media/zinch/Windows/experiment/p14r14/ephys/msort/2019-02-13_1750_01/raw.mda'}

## Dig into Ksort output files
Looking for:
    - main channels
    - spike waveforms

In [51]:
templates_path = os.path.join(exp_struct['folders']['ksort'], 'pc_features.npy')

pc = np.load(os.path.join(exp_struct['folders']['ksort'], 
                          'pc_features.npy'))

pc_ind = np.load(os.path.join(exp_struct['folders']['ksort'], 
                          'pc_feature_ind.npy'))

In [52]:
pc_ind.shape

(196, 32)

In [72]:
pc_ind[1]

array([ 1,  3,  0,  2,  5,  4,  7,  6,  9,  8, 11, 10, 13, 12, 15, 14, 17,
       16, 19, 18, 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30],
      dtype=uint32)

In [53]:
pc.shape

(2475599, 3, 32)

In [67]:
xx = np.load(os.path.join(exp_struct['folders']['ksort'], 
                          'templates.npy'))
xx.shape

(196, 82, 362)