# Imports

In [3]:
import numpy as np
from scipy.io import loadmat
import pandas as pd

from time import time
from tqdm import tqdm 
import math

from EfficientTimeShuffling import EfficientShuffle
import random

In [4]:
def prepare_data_tables(binWidth = 1.2, shuffle = False, combine = False, seed = 42, timelags = 3, drop = 0):
    #Set seed to be used for this iteration
    np.random.seed(seed=seed)
    
    all_trials = []
    all_spikes = loadmat('/home/ags72/Neural_Data/Synthetic/Single_Epoch/Laminar_Set_2a.mat')['SPIKES']    #Bin spikes
    
    for trial in tqdm(range(all_spikes.shape[-1])):
       #Bin spikes
        trial_duration = 2000 
        hist_spikes = []
        for iNeuron,neuron in enumerate(all_spikes):
            counts, edges = np.histogram(neuron[trial], bins = np.arange(0,trial_duration+binWidth, binWidth))
            hist_spikes.append(counts)
            
        #Combine spikes from same population by either layer/class or layer only
        if combine:
            population_indices = [np.arange(0,15),
                                  np.arange(15,30),
                                  np.arange(30,45)]
            
            pop_type = 'layeronly'
            column_names = ['_'.join([layer,'t{}'.format(timelag)]) for timelag in np.arange(1,timelags+1) for layer in ['L1','L2','L3'] ]
        else:
            population_indices = [np.arange(0,10),
                                  np.arange(10,15),
                                  np.arange(15,25),
                                  np.arange(25,30),
                                  np.arange(30,40),
                                  np.arange(40,45)]  
            
            pop_type = 'layerclass'
            column_names = ['_'.join([pop,'t{}'.format(timelag)]) for timelag in np.arange(1,timelags+1) for pop in ['E1', 'I1', 'E2', 'I2', 'E3', 'I3']]
        
        
        #Drop some percentage of neurons in each population
        if drop is not 0:
            for idx,indicies in enumerate(population_indices):
                population_indices[idx] = np.random.choice(indicies, math.ceil(len(indicies) * (1-drop)), replace = False)
        
        #Combine spikes in each population
        all_pop_spikes = []
        for indicies in population_indices:
            pop_hist_spikes = [hist_spikes[idx] for idx in indicies]
            all_pop_spikes.append(np.sum(pop_hist_spikes,axis = 0))
            
        all_pop_spikes = np.array(all_pop_spikes, dtype = int)
        all_pop_spikes[all_pop_spikes>=2] = 2
        
        #Creat lagged variables
        lagged_spikes = []
        for lag in range(timelags):
            if lag == timelags - 1:
                lagged_spikes.append(all_pop_spikes[:,lag:])
            else:
                lagged_spikes.append(all_pop_spikes[:,lag:lag-timelags+1])
        lagged_spikes = np.vstack(lagged_spikes)
                                  
        trial_df = pd.DataFrame(lagged_spikes.T, columns = column_names)
        all_trials.append(trial_df)

    spikes_df_all = pd.concat(all_trials).reset_index(drop=True)
    
    #Anirban shuffle, slow
    spikes_df_all_shuffle, df_shufflekeys = EfficientShuffle(spikes_df_all,seed = round(time()))
    
    #Faster shuffle algorithm, only shuffles columns
    # spikes_df_all_shuffle = spikes_df_all.copy()
    # for (columnName, columnData) in spikes_df_all.iteritems():
    #     test[columnName] = np.random.permutation(columnData)
    
    #Save dataframes to csv
    spikes_df_all.to_csv('Dataframes/laminar2_spikes_drop_{drop}_iteration_{iteration}.csv'.format(drop = int(100*drop), iteration = seed))
    spikes_df_all_shuffle.to_csv('Dataframes/laminar2_spikes_drop_{drop}_iteration_{iteration}_shuffle.csv'.format(drop = int(100*drop), iteration = seed))


    # return spikes_df_all, spikes_df_all_shuffle

    

In [25]:
for drop in [0.2, 0.4, 0.6, 0.8]:
    for iteration in [1,2,3,4,5]:
        prepare_data_tables(drop = drop, seed = iteration)

100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 215.80it/s]
100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 224.09it/s]
100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 218.65it/s]
100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 207.43it/s]
100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 213.49it/s]
100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 226.69it/s]
100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 214.77it/s]
100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 231.06it/s]
100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 224.34it/s]
100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 225.78it/s]
100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 228.39it/s]
100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 227.62it/s]
100%|███████████████████████████████████

In [5]:
prepare_data_tables(drop = 0, seed = 1)

100%|█████████████████████████████████████| 1000/1000 [00:04<00:00, 212.18it/s]
