In [1]:
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import itertools
import os
import gc
import glob
import numba
import numpy.linalg as la
from scipy import stats
from scipy.stats import circmean
from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist

In [2]:
foldername = '0084'
n_inds = 30

window_size = '07' # sliding window size in seconds
fps = 30

In [20]:
df = pd.read_csv('/home/user/Documents/Vivek/cuda/DirectionalCorrelation/Data/Output/golden_shiners/' + str(n_inds) + '_fish/' + foldername + '/' + 'alltracks.csv')
df.head()

#### Rescale trajectory data to match the timescale of directional correlations while retaining pairwise nature of the measures

In [21]:
for fr in range(int(window_size)*fps//2, int(df['frame'].max() - int(window_size)*fps//2)):
    tmp = df[(df['frame'] >= fr - int(window_size)*fps//2) & (df['frame'] <= fr + int(window_size)*fps//2)]

    tmp1 = tmp.loc[:,["f_id", "n_id", "dist", "ang_area", "speed_diff", "acc_diff"]]
    tmp2 = tmp.loc[:,["f_id", "n_id", "ang_pos_x", "ang_pos_y"]]

    tmp_ord1 = tmp1.groupby(['f_id', 'n_id']).median()
    tmp_ord1 = tmp_ord1.reset_index()
    tmp_ord1 = tmp_ord1.rename(index=str, columns={'dist':'dist', 'ang_area':'ang_area', 'speed_diff':'speed_diff', 'acc_diff':'acc_diff'})
    tmp_ord2 = tmp2.groupby(['f_id', 'n_id']).mean()
    tmp_ord2 = tmp_ord2.reset_index()
    tmp_ord2 = tmp_ord2.rename(index=str, columns={'ang_pos_x':'ang_pos_x', 'ang_pos_y':'ang_pos_y'})
    
    tmp_ord = pd.merge(tmp_ord1, tmp_ord2, how='left')
    tmp_ord['frame'] = fr
    
    df_rescaled = tmp_ord if fr == int(window_size)*fps//2 else df_rescaled.append(tmp_ord)
        
df_rescaled.head()

Unnamed: 0,f_id,n_id,dist,ang_area,speed_diff,acc_diff,ang_pos_x,ang_pos_y,frame
0,0.0,0.0,0.0,0.040841,0.0,0.0,,,105
1,0.0,1.0,281.813534,0.0,1.062971,-0.051665,-0.146654,0.86259,105
2,0.0,2.0,157.944246,0.169646,0.214444,-0.010611,-0.719656,0.550193,105
3,0.0,3.0,139.384592,0.0,0.35096,-0.029211,-0.300223,0.745013,105
4,0.0,4.0,223.945949,0.0,0.977997,0.036184,-0.212254,0.844832,105


#### Rescale trajectory data to match timescale of directional correlations and get individual level measures

In [22]:
for fr in range(int(window_size)*fps//2, int(df['frame'].max() - int(window_size)*fps//2)):
    tmp = df[(df['frame'] >= fr - int(window_size)*fps//2) & (df['frame'] <= fr + int(window_size)*fps//2)]

    tmp1 = tmp.loc[:,["f_id", "fsize", "speed", "acceleration", "dev_gspeed", "dev_gacc"]]
    tmp2 = tmp.loc[:,["f_id", "rotated_x", "rotated_y"]]

    tmp_ord1 = tmp1.groupby(['f_id']).median()
    tmp_ord1 = tmp_ord1.reset_index()
    tmp_ord1 = tmp_ord1.rename(index=str, columns={'fsize':'size', 'speed':'speed', 'acceleration':'acceleration', 'dev_gspeed':'dev_gspeed', 'dev_gacc':'dev_gacc'})
    tmp_ord2 = tmp2.groupby(['f_id']).mean()
    tmp_ord2 = tmp_ord2.reset_index()
    tmp_ord2 = tmp_ord2.rename(index=str, columns={'rotated_x':'rx', 'rotated_y':'ry'})
    
    tmp_ord = pd.merge(tmp_ord1, tmp_ord2, how='left')
    tmp_ord['frame'] = fr
    
    df2 = tmp_ord if fr == int(window_size)*fps//2 else df2.append(tmp_ord)
        
df2.head()

Unnamed: 0,f_id,size,speed,acceleration,dev_gspeed,dev_gacc,rx,ry,frame
0,0.0,64.595504,3.528393,0.217933,-0.392939,0.002821,72.448097,-157.937772,105
1,1.0,77.057587,4.729745,0.18412,0.724011,-0.034881,3.817986,106.710141,105
2,2.0,69.060827,3.802294,0.23013,0.005873,0.004222,-55.204292,-72.667863,105
3,3.0,67.305659,4.031939,0.198368,-0.045021,-0.010903,13.693632,-56.137133,105
4,4.0,61.073496,4.373393,0.249962,0.451033,0.018251,1.735975,53.529533,105


In [23]:
del tmp
del tmp1
del tmp2
del tmp_ord1
del tmp_ord2
del tmp_ord
gc.collect()

6538

### Handle directional correlation data
Below, we binarise pairwise output obtained from the directional correlation cuda code. We then convert this measure of leadership per individual per frame.

#### Binarise pairwise events to 'leadership' or 'not leadership'

In [24]:
dcorr = pd.read_csv("/home/user/Documents/Vivek/cuda/DirectionalCorrelation/Data/Output/golden_shiners/" + str(n_inds) + "_fish/" + foldername + '/cross_correlation_' + window_size + '.csv')
dcorr.columns = ['frame', 'f_id', 'n_id', 'tau', 'cc']

dcorr.loc[dcorr['cc'] < np.sqrt(3)/2,'cc'] = 0
dcorr.loc[np.abs(dcorr['tau']) <= 5,'tau'] = 0

@numba.njit(fastmath=True, parallel=True)
def binarise_leadership(frame, f_id, tau, cc):
    cc_out = np.zeros(cc.shape)
    frs = np.unique(frame)
    f_ids = np.unique(f_id)
    for fr in numba.prange(frs.shape[0]):
        for idx in numba.prange(f_ids.shape[0]):
            subset_idxs = np.where((frame == frs[fr]) & (f_id == f_ids[idx]) & (tau < 0))[0]
            if len(subset_idxs) > 0:
                ccvals = cc[subset_idxs]
                if ccvals.max() > np.sqrt(3.)/2.:
                    max_idx = subset_idxs[np.where(ccvals == ccvals.max())[0]]
                    cc_out[max_idx] = 1
    
    return cc_out

In [25]:
frame = dcorr['frame'].values
f_id = dcorr['f_id'].values
tau = dcorr['tau'].values
cc = dcorr['cc'].values

dcorr['cc'] = binarise_leadership(frame, f_id, tau, cc)

#### Convert 'leadership' to an individual level metric over time (rather than a pairwise metric)

In [26]:
@numba.njit(fastmath=True, parallel=True)
def individual_leadership(frame, f_id, n_id, cc):
    fr_out = np.zeros((np.unique(frame).shape[0]*n_inds,), dtype=np.int64)
    fid_out = np.zeros((np.unique(frame).shape[0]*n_inds,), dtype=np.int64)
    lscore = np.zeros((np.unique(frame).shape[0]*n_inds,), dtype=np.float64)
    
    frs = np.unique(frame)
    f_ids = np.unique(f_id)
    for fr in numba.prange(frs.shape[0]):
        for idx in numba.prange(f_ids.shape[0]):
            lead_idxs = np.where((frame == frs[fr]) & (n_id == f_ids[idx]))[0]
            follow_idxs = np.where((frame == frs[fr]) & (f_id == f_ids[idx]))[0]
            
            fr_out[fr*n_inds + idx] = frs[fr]
            fid_out[fr*n_inds + idx] = f_ids[idx]
            lscore[fr*n_inds + idx] = np.sum(cc[lead_idxs]) - np.sum(cc[follow_idxs])
            
    return fr_out, fid_out, lscore

In [27]:
frame = dcorr['frame'].values
f_id = dcorr['f_id'].values
n_id = dcorr['n_id'].values
cc = dcorr['cc'].values

fr_out, fid_out, lscore = individual_leadership(frame, f_id, n_id, cc)

In [28]:
dcorr2 = pd.DataFrame(np.array([fr_out, fid_out, lscore]).T, columns=['frame', 'f_id', 'lscore'])
dcorr2['lscore'] /= n_inds

tmp = dcorr2.loc[:,["f_id", "lscore"]]
tmp_ord = tmp.groupby(['f_id']).sum()
tmp_ord = tmp_ord.reset_index()
tmp_ord = tmp_ord.rename(index=str, columns={'lscore':'lfinal'})

dcorr2 = pd.merge(dcorr2, tmp_ord, how='left')
dcorr2.head()

Unnamed: 0,frame,f_id,lscore,lfinal
0,105.0,0.0,0.0,-26.466667
1,105.0,1.0,0.0,204.2
2,105.0,2.0,0.0,-236.3
3,105.0,3.0,0.0,-159.433333
4,105.0,4.0,0.0,-13.233333


### Create final datasets - individual and pairwise
These two dataframes will contain all data about morphology, movement, vision and leadership. The only way the two dataframes differ is based on whether the data is presented at the individual or pairwise level.

#### Individual data

In [29]:
df = pd.merge(df2, dcorr2)

cols = ['frame', 'f_id']
df[cols] = df[cols].astype(np.int32)

df.head()

Unnamed: 0,f_id,size,speed,acceleration,dev_gspeed,dev_gacc,rx,ry,frame,lscore,lfinal
0,0,64.595504,3.528393,0.217933,-0.392939,0.002821,72.448097,-157.937772,105,0.0,-26.466667
1,1,77.057587,4.729745,0.18412,0.724011,-0.034881,3.817986,106.710141,105,0.0,204.2
2,2,69.060827,3.802294,0.23013,0.005873,0.004222,-55.204292,-72.667863,105,0.0,-236.3
3,3,67.305659,4.031939,0.198368,-0.045021,-0.010903,13.693632,-56.137133,105,0.0,-159.433333
4,4,61.073496,4.373393,0.249962,0.451033,0.018251,1.735975,53.529533,105,0.0,-13.233333


In [30]:
df.to_csv('/home/user/Documents/Vivek/cuda/DirectionalCorrelation/Data/Output/golden_shiners/' + str(n_inds) + '_fish/' + foldername + '/' + 'individual_' + window_size + '.csv', mode='w')

#### Pairwise data

In [31]:
df = pd.merge(df_rescaled, dcorr)

cols = ['frame', 'f_id', 'n_id', 'tau']
df[cols] = df[cols].astype(np.int32)

df.head()

Unnamed: 0,f_id,n_id,dist,ang_area,speed_diff,acc_diff,ang_pos_x,ang_pos_y,frame,tau,cc
0,0,1,281.813534,0.0,1.062971,-0.051665,-0.146654,0.86259,105,37,0.0
1,0,2,157.944246,0.169646,0.214444,-0.010611,-0.719656,0.550193,105,20,0.0
2,0,3,139.384592,0.0,0.35096,-0.029211,-0.300223,0.745013,105,14,0.0
3,0,4,223.945949,0.0,0.977997,0.036184,-0.212254,0.844832,105,0,0.0
4,0,5,256.186752,0.0,0.33353,0.014916,-0.244746,0.813744,105,47,0.0


In [32]:
df['ang_pos'] = np.arctan2(df['ang_pos_y'], df['ang_pos_x'])
df['ang_pos'] = np.abs(df['ang_pos'])

df.drop(['ang_pos_x', 'ang_pos_y'], axis=1, inplace=True) 

In [33]:
df.to_csv('/home/user/Documents/Vivek/cuda/DirectionalCorrelation/Data/Output/golden_shiners/' + str(n_inds) + '_fish/' + foldername + '/' + 'pairwise_' + window_size + '.csv', mode='w')