In [None]:
#loading data!

In [1]:
import numpy as np
import sqlite3
import pandas as pd
import quantities as pq
import xml.etree.ElementTree as et

In [None]:
def regexp(expr, item):
    reg = re.compile(expr)
    return reg.search(item) is not None

In [None]:

class ReadSession:
    def __init__(self,
                 dataset,
                 path,
                 animal_id,
                 day,
                 beh,
                 session,
                 unit_spiketime,
                 unit_space,
                 unit_lfp=pq.V,
                 load_lfp=False):

        meta_path = path + dataset + '/docs/' + dataset.replace('-', '') +\
                    '-metadata-tables/' + dataset.replace('-', '') +\
                    '-tables.db'

        # Open database
        con_sessions = sqlite3.connect(meta_path)
        con_sessions.create_function("REGEXP", 2, regexp)

        topdir = animal_id + '.' + str(day)
        subdir = animal_id + '.' + str(session)
        
        df_session = pd.read_sql_query(
            'SELECT ' +
            'topdir, ' +
            'session, ' +
            'behavior, ' +
            'familiarity, ' +
            'duration ' +
            'from session where behavior=\'' +
            beh +
            '\' AND session=\'' +
            subdir +
            '\' AND topdir=\'' +
            topdir +
            '\'', con_sessions)

        df_cells = pd.read_sql_query(
            'SELECT ' +
            'id, ' +
            'topdir, ' +
            'animal, ' +
            'ele, ' +
            'clu, ' +
            'region, ' +
            'nexciting, ' +
            'ninhibiting, ' +
            'exciting, ' +
            'inhibiting, ' +
            'excited, ' +
            'inhibited, ' +
            'fireRate, ' +
            'totalFireRate, ' +
            'cellType ' +
            'From cell where topdir REGEXP \'' +
            topdir + '\'',
            con_sessions)
        
        df_epos = pd.read_sql_query(
            'SELECT ' +
            'topdir, ' +
            'animal, ' +
            'e1, ' +
            'e2, ' +
            'e3, ' +
            'e4, ' +
            'e5, ' +
            'e6, ' +
            'e7, ' +
            'e8, ' +
            'e9, ' +
            'e10, ' +
            'e11, ' +
            'e12, ' +
            'e13, ' +
            'e14, ' +
            'e15, ' +
            'e16 ' +
            'From epos where topdir REGEXP \'' +
            topdir + '\'',
            con_sessions)
        
        electrode_ids = np.unique(df_cells['ele'])
        path_to_session = path + dataset + '/' + \
            topdir + '/' +\
            subdir + '.tar'

        # extract variables from data
        clusters = {}
        times = {}
        print('Get position and spikes')
        with tf.open(path_to_session) as tf_session:
            # get sampling rate of spike timestamps

            xml_f = tf_session.extractfile(
                subdir + '/' +
                subdir + '.xml')
            e = et.parse(xml_f).getroot()
            sampling_rate_spike_time = float(
                e.findall("./acquisitionSystem/samplingRate")[0].text)

            # get animal position
            positions_file = tf_session.extractfile(
                subdir + '/' +
                subdir + '.whl')
            positions_file_lines =[np.array(line.split(), dtype=np.float64) for line in positions_file.readlines()]
    
            positions = np.stack(positions_file_lines)
            
            
            for ele_i in electrode_ids:
                clusters_f = tf_session.extractfile(
                    subdir + '/' +
                    subdir + '.clu.' + str(ele_i))
                # read cluster file
                clusters_i = np.array([int(clu_id) for clu_id in clusters_f ])
                # first line contains number of clusters in file, skip it
                
                
                clusters_i = clusters_i[1:]
                
                times_f = tf_session.extractfile(
                    subdir + '/' +
                    subdir + '.res.' + str(ele_i))
                # get times of spikes
                times_i= np.array([float(time_j) for time_j in times_f])*unit_spiketime
                # divide by sampling rate
                times_i /= sampling_rate_spike_time
                
                # from documentation:
                # cluster 0 corresponds to mechanical noise (the wave shapes
                # do not look like neuron's spike). Cluster 1 corresponds to
                # small, unsortable spikes. These two clusters (0 and 1) should
                # not be used for analysis of neural data since they do not
                # correspond to successfully sorted spikes.

                # remove clusters == 0 and == 1
                pos_cluster_not_0_or_1 = np.where(clusters_i >= 2)[0]
                clusters_i = clusters_i[pos_cluster_not_0_or_1]
                times_i = times_i[pos_cluster_not_0_or_1]
                clusters[ele_i] = clusters_i
                times[ele_i] = times_i

                    
        positions = positions * unit_space
        
        def data_spikes(electrode_ids,clusters,times):
            combined_arrays = []
            electrodeids = []
            for i in electrode_ids:
                combined_spike_time_i=np.column_stack((clusters[i],times[i]))
                combined_arrays.append(combined_spike_time_i)
                electrodeids_i = np.full((combined_spike_time_i.shape[0], 1), i)
                electrodeids.append(electrodeids_i)


            combined_array_all = np.vstack(
                [np.hstack((electrodeids[i], combined_arrays[i])) for i in range(4)]
            )

            df = pd.DataFrame(data=combined_array_all, columns=['Electrode ID', 'Cluster', 'Time'])
            return df

        self.df_session = df_session
        self.df_cells = df_cells
        self.dataset = dataset
        self.path = path
        self.animal_id = animal_id
        self.day = day
        self.beh = beh
        self.session = session
        self.unit_spiketime = unit_spiketime
        self.unit_space = unit_space
        self.data_spikes= data_spikes(electrode_ids, clusters, times)
    



In [None]:
dataset = "hc-3"
path = ".../"
animal_id = ""
day = 
beh = ""
session = 
unit_spiketime = pq.ms
unit_space = pq.mm
load_lfp = False  # no need to set True!


data=ReadSession(dataset,path,animal_id,day,beh,session,unit_spiketime,unit_space,load_lfp)

data.data_spikes #give you a df of electrode ids, time spikes and clusters 


count the number of spikes in each neuron in each time bin!

In [None]:

bin_width = 10  
start_time = 0  
end_time = 420
time_bins = np.arange(start_time, end_time + bin_width, bin_width)

def count_spikes_per_cluster(df, time_bins):
    cluster_counts = []
    for bin_start, bin_end in zip(time_bins[:-1], time_bins[1:]):
        bin_counts = df[(df['Time'] >= bin_start) & (df['Time'] < bin_end)].groupby(['Cluster', 'Electrode ID']).count()['Time']
        cluster_counts.append(bin_counts)
    return pd.concat(cluster_counts, axis=1, keys=time_bins[:-1])

spikes_counts_per_cluster = count_spikes_per_cluster(data.data_spikes, time_bins)


get correlation  between the counts of each neuron in time bins

In [None]:

correlation_matrix = spikes_counts_per_cluster.corr()

print("Correlation Matrix:")
print(correlation_matrix)