In [1]:
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import itertools
import os
import glob
from scipy import stats
from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist

In [None]:
fish = True # use boolean to switch between fish and bird datasets

foldername = '0066' # use appropriate foldernames based on the above boolean
n_inds = 10

In [3]:
if fish == True:
    df = pd.read_csv('/home/user/Documents/Vivek/cuda/DirectionalCorrelation/Data/Output/golden_shiners/' + str(n_inds) + '_fish/' + foldername + '/' + 'alltracks.csv')
    df = df.loc[:,['f_id', 'frame', 'dir_x', 'dir_y']]
else:
    df = pd.read_csv('/home/user/Documents/Vivek/cuda/DirectionalCorrelation/Data/Output/pigeons/' + str(n_inds) + '_birds/' + foldername + '/' + 'alltracks.csv')
    df = df.loc[:,['f_id', '#t(centisec)', 'dir_x', 'dir_y']]
    
df = df.rename(index=str, columns={'f_id':'id'})
df = df.drop_duplicates()
df.head()

Unnamed: 0,id,#t(centisec),dir_x,dir_y
0,0,0.0,-0.765925,0.64293
9,1,0.0,0.913269,0.407357
18,2,0.0,-0.329305,-0.944224
27,3,0.0,-0.652709,-0.757609
36,4,0.0,0.584252,-0.811572


# Create binaries for calculating cross correlation

This code assumes no missing data.

In [4]:
data = []

# Create individual specific dataframes
g = df.groupby("id")
d = dict(iter(g))
names = list(d.keys())
names = [int(name) for name in names]

# Removee ID column and create dataframes with IDs in their name
for i, name in enumerate(names):
    del d[name]['id']
    globals()['df_' + str(name)] = d[name]
    data.append(d[name])

# Change working directory so created binaries are placed in the data folder
if fish == True:
    os.chdir('/home/user/Documents/Vivek/cuda/DirectionalCorrelation/Data/Input/golden_shiners/10_fish/' + foldername + '/cross_correlation/')
else:
    os.chdir('/home/user/Documents/Vivek/cuda/DirectionalCorrelation/Data/Input/pigeons/10_birds/' + foldername + '/cross_correlation/')

# Merge dataframes based on presence of frame in both individuals and rename columns
# Then write directions to arrays and convert them to binaries
for i in range(len(names)):
    # initialise file names
    name_x = 'dir' + '_' + 'x' + str(names[i])
    name_y = 'dir' + '_' + 'y' + str(names[i])
    if dims == 3:
        name_z = 'dir' + '_' + 'z' + str(names[i])
    
    if names[i] < 10:
        name_x = 'dir' + '_' + 'x00' + str(names[i])
        name_y = 'dir' + '_' + 'y00' + str(names[i])
        if dims == 3:
            name_z = 'dir' + '_' + 'z00' + str(names[i])
            
    elif names[i] < 100:
        name_x = 'dir' + '_' + 'x0' + str(names[i])
        name_y = 'dir' + '_' + 'y0' + str(names[i])
        if dims == 3:
            name_z = 'dir' + '_' + 'z0' + str(names[i])

    # write to arrays
    globals()[name_x] = np.array(data[i]['dir_x']).astype('float32')
    globals()[name_y] = np.array(data[i]['dir_y']).astype('float32')
    if dims == 3:
        globals()[name_z] = np.array(data[i]['dir_z']).astype('float32')

    # convert to binaries
    output_x = open(name_x, 'wb')
    globals()[name_x].tofile(output_x)
    output_x.close()

    output_y = open(name_y, 'wb')
    globals()[name_y].tofile(output_y)
    output_y.close()
    
    if dims == 3:
        output_z = open(name_z, 'wb')
        globals()[name_z].tofile(output_z)
        output_z.close()

# Create binaries for calculating autocorrelation

In [198]:
data = []

# Create individual specific dataframes
g = df.groupby("id")
d = dict(iter(g))
names = list(d.keys())
names = [int(name) for name in names]

# Removee ID column and create dataframes with IDs in their name
for i, name in enumerate(names):
    del d[name]['id']
    globals()['df_' + str(name)] = d[name]
    data.append(d[name])
    
# Change working directory so created binaries are placed in the data folder
if fish == True:
    os.chdir('/home/user/Documents/Vivek/cuda/DirectionalCorrelation/Data/Input/golden_shiners/10_fish/' + foldername + '/autocorrelation/')
else:
    os.chdir('/home/user/Documents/Vivek/cuda/DirectionalCorrelation/Data/Input/pigeons/10_birds/' + foldername + '/autocorrelation/')

# Write directions to arrays and convert them to binaries
for i in range(len(names)):
    # initialise file names
    name_x = 'i' + '_' + 'x' + str(names[i])
    name_y = 'i' + '_' + 'y' + str(names[i])
    if dims == 3:
        name_z = 'i' + '_' + 'z' + str(names[i])
        
    if names[i] < 10:
        name_x = 'i' + '_' + 'x00' + str(names[i])
        name_y = 'i' + '_' + 'y00' + str(names[i])
        if dims == 3:
            name_z = 'i' + '_' + 'z00' + str(names[i])
            
    elif names[i] < 100:
        name_x = 'i' + '_' + 'x0' + str(names[i])
        name_y = 'i' + '_' + 'y0' + str(names[i])
        if dims == 3:
            name_z = 'i' + '_' + 'z0' + str(names[i])

    # write to arrays
    globals()[name_x] = np.array(globals()['df_' + str(names[i])]['dir_x']).astype('float32')
    globals()[name_y] = np.array(globals()['df_' + str(names[i])]['dir_y']).astype('float32')
    if dims == 3:
        globals()[name_z] = np.array(globals()['df_' + str(names[i])]['dir_z']).astype('float32')
    
    # convert to binaries
    output_x = open(name_x, 'wb')
    globals()[name_x].tofile(output_x)
    output_x.close()

    output_y = open(name_y, 'wb')
    globals()[name_y].tofile(output_y)
    output_y.close()
    
    if dims == 3:
        output_z = open(name_z, 'wb')
        globals()[name_y].tofile(output_z)
        output_z.close()