# Setup

## Import

In [6]:
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('once')
warnings.filterwarnings("ignore", category=DeprecationWarning)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import pickle
import hdf5storage
from tqdm.notebook import tqdm
from neurodsp.spectral import compute_spectrum
from timescales.autoreg import compute_ar_spectrum
from timescales.fit import PSD, ACF
from ndspflow.workflows import WorkFlow
import timescale_development_hf as hf

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Parameters

In [2]:
# data directory
data_dir = '/Users/trevormcpherson/Desktop/Voytek/Organoids/organoidData/CTC/'
#'/Users/blancamartin/Desktop/Voytek_Lab/timescales/organoid_data/CTC_data/'
# file type, constant, for each recording there is a directory that is the name of the recording, and then this file name within it
file_type = 'LFP_Sp.mat'
# all recording folders have this string in their name
recording_string = 'CTC'
# get recording directories
recording_directories = os.listdir(data_dir)
# only keep recoring directories
recording_directories = [s for s in recording_directories if recording_string in s]
# number of recordings
n_recordings = len(recording_directories)
# total number of wells
total_n_wells = 12
# no data in first 4 wells, if not specifying specific ones to analyze, skip these first 4
wells_to_skip = 4
# number of wells
n_wells = total_n_wells - wells_to_skip
# number of channels
n_channels = 64
# spike recording sample rate
spike_sample_rate = 12500 # samp per sec
# length of time bins using to separate spikes (seconds)
bin_length = 0.05
# binned data sample rate
bin_sample_rate = 1 / bin_length
# bin length in ms
bin_length_ms = int(bin_length * 1000)
# param for determining network spiking events, floor division of the maximum spikes found in a bin by this number
max_spike_floor_division = 2
# analysis window kernel (seconds, total of 3)
kernel_window_sec = [-0.5,2.5]
# length of kernel window
kernel_window_length = np.abs(kernel_window_sec[0]) + kernel_window_sec[1]
# convert to bins
kernel_window_idx = np.divide(kernel_window_sec,bin_length).astype(int)
num_bins_kernel = np.abs(kernel_window_idx[0]) + kernel_window_idx[1]
# get sequential times of bins in kernel
kernel_times = np.linspace(0,kernel_window_length,num_bins_kernel)
# day for organoid differentation as a time object
day_diff = datetime.strptime("081116",'%m%d%y')
# AR spectrum calculation order
ar_spec_order = 10

# Analysis

## Step 1: Collect meta data

In [23]:
# metadata column labels
metadata_column_labels = ['recording','day_postdiff','well','channel']
# initialise metadata dataframe
organoid_metadata = pd.DataFrame(columns=metadata_column_labels, dtype=object)

# loop through recordings
for this_recording_directory in recording_directories:
    # get date string
    date_string = this_recording_directory[4:10]
    
    # convert to date time object
    date_object = datetime.strptime(date_string,'%m%d%y')
    
    #convert data time object to days post differentiation
    dfdf = hf.get_dpdf(date_object, day_diff)

    # loop through wells
    for well_i in range(n_wells):
        # loop through channels
        for channel_i in range(n_channels):
            # build row for matadata dataframe
            df_row_list = [date_object, dfdf ,well_i,channel_i]
            df_row = pd.DataFrame([df_row_list], columns=metadata_column_labels)
            # add row to dataframe
            organoid_metadata = pd.concat([organoid_metadata, df_row])

In [24]:
organoid_metadata.head()

Unnamed: 0,recording,day_postdiff,well,channel
0,2016-12-06,117,0,0
0,2016-12-06,117,0,1
0,2016-12-06,117,0,2
0,2016-12-06,117,0,3
0,2016-12-06,117,0,4


## Step 2: # Get network events kernels

In [5]:
# initialize list for storing network event kernels for each recording
network_event_kernels_all_recordings = []
# initialize list for storing kernel bins for each recording
kernel_bins_all_recordings = []
# loop through recordings
for this_recording_directory in recording_directories:
    print(this_recording_directory)
    # get mat file path
    mat_file_path = data_dir + this_recording_directory + '/' + file_type
    # load mat file
    data = hdf5storage.loadmat(mat_file_path)
    # get spikes
    spikes = data['spikes']
    # t_s - recording time in seconds
    time_sec = data['t_s']
    # get last time point in recording
    recording_end_time = np.floor(time_sec[-1])
    # number of time bins in this recording
    num_bins_recording = np.floor(recording_end_time / bin_length).astype('int')

    # loop through wells
    for well_i in range(wells_to_skip,total_n_wells):
        print(well_i)
        # get spikes for this well
        spikes_well = spikes[well_i]

        # initialzie to store all spike timestamps, and binned spikes for channels
        spike_times = np.array([])
        binned_spikes_all_channels = []
        # loop through channels
        for channel_i in range(n_channels):
            # get spikes for this channel
            spikes_channel = spikes_well[channel_i]
            # only bin if there are spikes
            if spikes_channel.size > 0:
                # get spike time stamps (seconds)
                spike_times_channel = spikes_channel / spike_sample_rate
                # store time stapms for this channel
                spike_times = np.concatenate((spike_times_channel.flatten(), spike_times))
                # binning function uses ms units, convert spike times to ms
                spike_times_channel_ms = spike_times_channel * 1000
                # binned spikes
                binned_spikes_channel = hf.bin_spikes(spike_times_channel_ms,bin_length_ms,fs=spike_sample_rate,n_recording_bins=num_bins_recording)
            else:
                binned_spikes_channel = np.array([])
            # store binned spikes for this channel
            binned_spikes_all_channels.append(binned_spikes_channel)

        # binning function uses ms units, convert spike times to ms
        spike_times_ms = spike_times * 1000
        # bin all spikes together
        binned_spikes_all = hf.bin_spikes(spike_times_ms,bin_length_ms,fs=spike_sample_rate,n_recording_bins=num_bins_recording)
        # get max number of spikes across all bins
        max_bin_spikes = max(binned_spikes_all)
        # set threshold for detecting large number of spikes
        network_event_thresh = max_bin_spikes // max_spike_floor_division
        # get indexes that are greater than threshold
        net_event_idxs = np.where(binned_spikes_all > network_event_thresh)[0]
        # number of network events
        num_net_events = len(net_event_idxs)

        # initialize list of total kernels we will be collecting - used to ensure there isnt overlap between kernels
        other_kernels = np.zeros((1,2))
        # initialize network events we will be storing
        network_kernels = np.zeros((1,2))
        # initialize and loop through network events
        for event in net_event_idxs:
            # define windows around each event onset
            kernel_start = event + kernel_window_idx[0]
            kernel_stop = event + kernel_window_idx[1]
            # assume we will store this kernel
            store_kernel = 1
            # only store if start and stop are both within the range of bins we have
            if kernel_start < 0 or kernel_stop > num_bins_recording:
                store_kernel = 0
            # only store if there is no overlap with previous kernels
            for previous_kernel in other_kernels:
                if previous_kernel[0] <= kernel_start <= previous_kernel[1] or previous_kernel[0] <= kernel_stop <= previous_kernel[1]:
                    store_kernel = 0
            # only store if mean firing rate is above 0
            net_spikes_this_kernel = binned_spikes_all[kernel_start:kernel_stop]
            mean_spiking_this_kernel = np.sum(net_spikes_this_kernel) / num_bins_kernel
            if mean_spiking_this_kernel <= 0:
                store_kernel = 0
            # store unless one of our conditions is not met
            if store_kernel:
                network_kernels = np.vstack((network_kernels, [kernel_start, kernel_stop])).astype(int)
                # also update total list of kernels
                other_kernels = np.vstack(
                    (other_kernels, [kernel_start, kernel_stop])).astype(int)
        # remove initializing zero row
        network_kernels = network_kernels[1:]
        # number of network kernels
        n_network_kernels = len(network_kernels)

        # loop through channels
        for channel_i in range(n_channels):
            # get binned spikes for this channel
            binned_spikes_channel = binned_spikes_all_channels[channel_i]
            # initialize list for kernels for this channel
            kernel_bins_all = []
            # loop though network kernels
            for kernel_i in range(n_network_kernels):
                # get kernel start and stop
                kernel_start = network_kernels[kernel_i][0]
                kernel_stop = network_kernels[kernel_i][1]
                # get bins for this kernel
                kernel_bins = binned_spikes_channel[kernel_start:kernel_stop]
                # store kernel for this channel
                kernel_bins_all.append(kernel_bins)
            # store kernel binned data
            kernel_bins_all_recordings.append(kernel_bins_all)
            # store network event kernels for this well
            network_event_kernels_all_recordings.append(network_kernels)

CTC_120616
4
5
6
7
8
9
10
11
CTC_100816
4
5
6
7
8
9
10
11
CTC_091316
4
5
6
7
8
9
10
11
CTC_122316
4
5
6
7
8
9
10
11
CTC_030317_Drugs
4
5
6
7
8
9
10
11
CTC_031617
4
5
6
7
8
9
10
11
CTC_012717
4
5
6
7
8
9
10
11
CTC_121616
4
5
6
7
8
9
10
11
CTC_080316
4
5
6
7
8
9
10
11
CTC_101816
4
5
6
7
8
9
10
11
CTC_111816
4
5
6
7
8
9
10
11
CTC_010617
4
5
6
7
8
9
10
11
CTC_100116
4
5
6
7
8
9
10
11
CTC_081016
4
5
6
7
8
9
10
11
CTC_112416
4
5
6
7
8
9
10
11
CTC_121716_Drugs
4
5
6
7
8
9
10
11
CTC_123016
4
5
6
7
8
9
10
11
CTC_022417
4
5
6
7
8
9
10
11
CTC_012017
4
5
6
7
8
9
10
11
CTC_020717_Drugs
4
5
6
7
8
9
10
11
CTC_031017
4
5
6
7
8
9
10
11
CTC_021017
4
5
6
7
8
9
10
11
CTC_102116
4
5
6
7
8
9
10
11
CTC_110416
4
5
6
7
8
9
10
11
CTC_101416
4
5
6
7
8
9
10
11
CTC_092016
4
5
6
7
8
9
10
11
CTC_082416
4
5
6
7
8
9
10
11
CTC_111016
4
5
6
7
8
9
10
11
CTC_073116
4
5
6
7
8
9
10
11
CTC_091616
4
5
6
7
8
9
10
11
CTC_011317
4
5
6
7
8
9
10
11
CTC_102816
4
5
6
7
8
9
10
11
CTC_100716
4
5
6
7
8
9
10
11
CTC_021717
4
5
6
7
8
9
10

In [7]:
kernel_bins_all_recordings_file = '/Users/trevormcpherson/Desktop/Voytek/Organoids/work_flow_analysis/kernel_bins_all_recordings.pkl'

In [8]:
with open(kernel_bins_all_recordings_file, 'wb') as f:
    pickle.dump(kernel_bins_all_recordings, f)

In [25]:
with open(kernel_bins_all_recordings_file, 'rb') as f:
    kernel_bins_all_recordings = pickle.load(f)

## Step 3: Remove electrodes with no spikes

In [27]:
spiking_electrode_idxs  = [i for i in range(len(kernel_bins_all_recordings)) if np.array(kernel_bins_all_recordings[i]).size > 0]
kernel_bins_all_recordings_clean  = [kernel_bins_all_recordings[i] for i in spiking_electrode_idxs]
organoid_metadata_clean = [organoid_metadata.iloc[i] for i in spiking_electrode_idxs]

## Step 4: Analysis with WorkFlow

In [77]:
# initialize work flow
wf = WorkFlow(y_array=kernel_bins_all_recordings_clean)
# set initial fork
wf.fork(0)
# compute kernel averaged power spectra - welch
wf.transform(hf.trial_average_spectrum_welch,bin_sample_rate,f_range=(.001, 200))
# spec param fit - welch
wf.fit(PSD())
# reset fork
wf.fork(0)
# compute kernel averaged power spectra - ar
wf.transform(hf.trial_average_spectrum_ar,bin_sample_rate,ar_spec_order,f_range=(.001,200))
# spec param fit - ar
wf.fit(PSD())

In [78]:
# run work flow
wf.run(n_jobs=-1, progress=tqdm)

Running Workflow:   0%|          | 0/20788 [00:00<?, ?it/s]

  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs, np.log10(powers),
  X -= avg[:, None]
  params, _ = curve_fit(expo_func, freqs

In [79]:
workflow_file = '/Users/trevormcpherson/Desktop/Voytek/Organoids/work_flow_analysis/workflow_run.pkl'

In [80]:
with open(workflow_file, 'wb') as f:
    pickle.dump(wf, f)

In [81]:
with open(workflow_file, 'rb') as f:
    wf = pickle.load(f)

# etc