The purpose of this notebook is to generate the figures for the *Astyanax Mexicanus* social behavior paper using a pre-processed dataset.

Upstream of this notebook, we have tracked, processed, and analyzed data from the complete raw dataset. Here, we share data relevant to figures in a pickle file, `figure-data.pik`. Here, we show how to access and work with that file.

# Imports and module versions 

In [None]:
import sys

# Print version numbers for the modules used in this notebook.
print(f'python {sys.version}')
for m in 'numpy', 'scipy', 'statsmodels', 'matplotlib', 'pandas', 'seaborn', 'pingouin':
    i = __import__(m)
    print(f'{m} {i.__version__}')

# Full imports.
import os
import sys
import pickle
import numpy as np
import scipy.interpolate
from scipy.stats import normaltest, shapiro, linregress
from scipy.stats import ttest_1samp, ttest_ind, ttest_ind_from_stats, f_oneway
from scipy.stats import mannwhitneyu
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from statsmodels.stats.multitest import multipletests
from pingouin import ancova
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.colors import hsv_to_rgb, rgb_to_hsv, to_rgb
import pandas as pd
import seaborn as sns
from collections import defaultdict, namedtuple
import itertools as itt
import copy


# T-test using means and std errors. The closest scipy equivalent requires
# means, std deviations, and number of observations.
TestResult = namedtuple('TestResult', 'statistic pvalue')
def ttest_from_err(mean1, stderr1, mean2, stderr2):
    t = (mean1-mean2)/np.sqrt(stderr1**2+stderr2**2)
    p = distributions.t.sf(np.abs(t), df) * 2

# ANOVA test using the samples' means, std deviations, and sizes.
# The closest scipy equivalent requires the original observations in each sample.
def f_oneway_from_stats(means, stds, counts):
    means  = np.asarray(list(means))
    stds   = np.asarray(list(stds))
    counts = np.asarray(list(counts), dtype=int)
    k      = len(means) # number of samples
    N      = np.sum(counts) # number of data points across all samples
    mean   = np.sum(counts*means)/np.sum(counts) # mean of all samples taken together
    ssb    = np.sum(counts*(means-mean)**2) # sum of squares between samples
    ssw    = np.sum((counts)*stds**2) # sum of squares within samples
    F      = (ssb/(k-1)) / (ssw/(N-k))
    pval   = 1 - scipy.stats.f.cdf(F, k-1, N-k)
    return TestResult(statistic=F, pvalue=pval)

# Mean and standard error.
def mean_sem(x, axis=None):
    return np.nanmean(x, axis=axis),scipy.stats.sem(x, axis=axis)

# Figure styling

Fonts, line styles, color schemes, etc.

In [None]:
names       = { 'sf':'Surface', 'pa':'Pachón', 'mo':'Molino', 
                'ti':'Tinaja', 'sf-dark':'Surface in the dark' }
populations = list(names.keys())

plt.rc('font', size=16)
plt.rc('axes',  labelsize=16)
plt.rc('xtick', labelsize=12)
plt.rc('ytick', labelsize=12)
plt.rc('legend', fontsize=16)
lw        = 2   # line width
dpi       = 150 # figure output resolution
alpha_err = 0.2 # transparency for std error halos
leg_opt   = dict( borderpad=0.3, labelspacing=0.2, handlelength=1.5,
                  handletextpad=0.5, borderaxespad=0.2, fontsize=15 ) # legend options
err_opt   = dict( lw=lw, elinewidth=lw, capthick=lw, capsize=5, barsabove=False ) # errorbar plot options
dashes    = dict(zip(populations, [(2,2), (3,1,1,1), (3,1,1,1,1,1), (1,1), (3,1)]))

def create_color_shades(rgb, gray=False):
    hsv = np.array([rgb_to_hsv(rgb)]*4)
    hsv[:,0] += np.array([-1,0,1,2])*0.02    # Slight hue gradient.
    hsv[:,1]  = np.array([0.5,0.6,0.9,1])    # Saturation gradient.
    hsv[:,2]  = np.array([0.9,0.75,0.5,0.2]) # Value gradient.
    if gray:
        hsv[:,1] = 0
        hsv[:,2]  = np.array([0.6,0.4,0.2,0]) # Value gradient.
    return np.array(list(map(hsv_to_rgb,hsv)))

def create_color_dictionaries(color_list, color_matrix, populations=populations):
    color_dict1  = dict(zip( populations, color_list )) # One shade per population.
    color_dict4  = { (pop,n):color_matrix[i,j] for i,pop in enumerate(populations) for j,n in enumerate([1,2,5,10]) }
    # Color map interpolating between white and the default shade.
    color_maps   = {}
    for k,c in color_dict1.items():
        # First max out saturation so colors "pop" a bit more, unless it's a grayscale family (s=0).
        c = rgb_to_hsv(c)
        if c[1]>0:
            c = hsv_to_rgb((c[0],1,c[2]))
        else:
            c = hsv_to_rgb((c[0],0,0.1))
        color_maps[k] = LinearSegmentedColormap.from_list(k,[c,(1,1,1)][::-1])
    return dict( color_dict1=color_dict1, color_dict4=color_dict4, color_maps=color_maps )

def visualize_color_scheme(color_list, color_matrix, title=None, ax=None):
    cl  = color_list.reshape((-1,1,3))
    mat = np.concatenate([color_matrix,np.ones_like(cl),cl],axis=1)
    if ax is None:
        ax = plt.gca()
    ax.imshow(mat)
    ax.set_xticks(range(6))
    ax.set_xticklabels(['1','2','3','4','','default'])
    ax.set_yticks([])
    ax.set_title(title, pad=10)

# Adam's color scheme.
color_matrix  = [ [ '#AADFFC', '#689BB7', '#315C72', '#09232F' ],  
                  [ '#FC7940', '#BF5428', '#823413', '#491702' ],
                  [ '#3AFAF2', '#4FB1A8', '#406E67', '#22332E' ],
                  [ '#F890A2', '#C75778', '#8C264E', '#4C0225' ],
                  [ '#59DA7A', '#FFFFFF', '#FFFFFF', '#0F3B1C' ] ]
color_matrix  = np.array([ [to_rgb(c) for c in C] for C in color_matrix ])
color_list    = color_matrix[:,1].copy()
color_list[4] = color_matrix[4,-1]
color_schemes = { 'adam': (color_list,color_matrix) }

# Scheme build on matplotlib's default line colors.
color_list = np.array(list(map(to_rgb, ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd'])))
color_matrix = np.array([create_color_shades(c) for c in color_list])
color_schemes['mpl_'] = (color_list,color_matrix)

# More matplotlib-based color schemes.
for k in ['tab20','tab20b','tab20c']:
    # Use the color scheme as-is.
    color_matrix = np.array(plt.get_cmap(k).colors).reshape((5,4,3))[:,::-1,:]
    color_list = color_matrix[:,-1,:].copy()
    color_schemes[k] = (color_list,color_matrix)
    # Keep each family's central hue, but apply custom recipe to create the shades within each family.
    color_list = np.array(plt.get_cmap(k).colors[::4])
    color_matrix = np.array([create_color_shades(c) for c in color_list])
    if k=='tab20c':
        color_matrix[4] = create_color_shades(color_list[4], gray=True)
    color_schemes[k+'_'] = (color_list,color_matrix)

# # Visualize every scheme under consideration.
# nc = 4
# nr = np.math.ceil(len(color_schemes)/nc)
# fig,axs = plt.subplots(nr, nc, figsize=(4*nc,4*nr), gridspec_kw=dict(wspace=0.5))
# for ax in axs.flatten():
#     ax.axis('off')
# for i,k in enumerate(color_schemes.keys()):
#     visualize_color_scheme(*color_schemes[k], title=k, ax=axs[i//nc,i%nc])

# Pick a scheme to use in the rest of the notebook.
globals().update(create_color_dictionaries(*color_schemes['mpl_']))

# Figure 1

In [None]:
figure_data = pickle.load(open('figure-data.pik','rb'))
# print(figure_data['fig1'].keys())
globals().update(figure_data['fig1'])

fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(12,7.5))
axs = axs.T

# Video snapshots.
for i in range(2):
    ax = axs[0,i]
    sf_track = mpimg.imread(f'assets/fig_snapshot-{["surface","pachon"][i]}.png')
    ax.imshow(sf_track)
    ax.axis('off')

# Density and nematic OP.
for i in range(2):
    ax = axs[1,i]
    Q  = [density,nematic][i]
    for pop in Q.keys():
        mu,err = mean_sem(Q[pop], axis=0)
        ax.plot(bin_centers, mu, label=names[pop], 
                color=color_dict1[pop], lw=lw)
        ax.fill_between(bin_centers, mu-err, mu+err, 
                        color=color_dict1[pop], alpha=alpha_err, lw=0)
    ax.locator_params(axis='y', nbins=3)
    for y in [-1,0,1]:
        ax.axhline(y=y,color='k',dashes=(4,4),lw=0.5)
    ax.set_xlim(0,55)
    ax.set_xlabel('Distance from the wall (cm)')

ax = axs[1,0]
# ax.set_ylim(0,5.7e-4)
ax.set_ylim(0,9e-4)
ax.set_yticks([0,0.0004,0.0008])
ax.set_ylabel('Area density')

ax = axs[1,1]
ax.set_ylim(-1.1,0.55)
ax.set_yticks([-1,0])
ax.set_ylabel('Nematic order\nparameter')

# Legend.
plt.tight_layout()
axs[2,1].set_visible(False)
axs[1,1].legend(loc='center', bbox_to_anchor=(1.85,0.5),
                borderpad=1, framealpha=0.5, edgecolor='k', fancybox=True)

# Median distance to the wall.
ax   = axs[2,0]
med  = [ (k,x) for k,X in median.items() for x in X]
med  = pd.DataFrame(med, columns=['Population', 'Median distance\nfrom the wall (cm)'])
data = dict(x='Population', y='Median distance\nfrom the wall (cm)', data=med, ax=ax)
palette = [ v+0.2*(1-v) for v in color_dict1.values()]
sns.violinplot(inner=None, ec='None', palette=palette, cut=2, **data)
sns.stripplot(size=5, jitter=0.1, color='k', **data)
# ax.set_xlabel('')

y0 = 55.5/np.sqrt(2)
ax.axhline(y0, color='k', dashes=(4,4))
ax.text(3.5, y0+0.3, 'uniform area density ', fontsize=12, ha='right', va='bottom')

def draw_bracket(txt, x1, x2, y, dx=0, dy=0.8, ax=ax):
    # dy = ax.transAxes.inverted([0,dy_])
    ax.plot([x1+dx,x1+dx,x2-dx,x2-dx], [y-dy,y,y,y-dy], color='k', lw=0.5)
    ax.text((x1+x2)/2, y-1, txt, va='bottom', ha='center')
draw_bracket('***', 0, 1, 24)
draw_bracket('**', 0, 2, 28)
draw_bracket('*', 0, 3, 32)
ax.set_ylim(0, 46)

# Label each panel.
for i,c in enumerate('abcde'):
    ax = axs[i//2,i%2]
    ax.text(0.02, 0.98, c, transform=ax.transAxes, fontsize=24, weight='bold', va='top')

plt.savefig('fig1.png', dpi=dpi)
plt.show()

In [None]:
# Test normality.
print('Shapiro test:')
for k,X in median.items():
    print(f'p[{k}] = {shapiro(X)[1]}')
# print(f'cave: p = {shapiro(df_cf)[1]}')

print('\nMann-Whitney test:')
for k in ['pa','mo','ti']:
#     p = ttest_ind(median['sf'],median[k]).pvalue
#     print(f'T-test sf-{k}: p = {p}')
    p = mannwhitneyu(median['sf'],median[k]).pvalue
    print(f'p[sf-{k}] = {p}')

# # If the hypothesis is that there's at least one inter-population difference, 
# # use a multiple test correction.
# # However here the hypothesis is that all three cave populations are different 
# # from the surface population. If anything the bar should be lowered.
# P = [ mannwhitneyu(median['sf'],median[k]).pvalue for k in ['pa','mo','ti'] ]
# multipletests(P) # , method='holm-sidak') # 'sidak') # 'bonferroni') # 

In [None]:
# Here we test normality on the combined sample that 
# will go into the T or Mann-Whitney test

for k in ['pa','mo','ti']:
    m1, m2 = median['sf'].tolist(), median[k].tolist()
    p_sh = shapiro(m1+m2).pvalue
    p_tt = ttest_ind(m1,m2).pvalue
    p_mw = mannwhitneyu(m1,m2).pvalue
    fmt  = '.1g'
    print(f'sf-{k}: p[Shapiro]={p_sh:{fmt}}, p[T-test]={p_tt:{fmt}}, p[Mann-Whitney]={p_mw:{fmt}}')

# Figure 2

While the cave fish tend to maintain an active swim state in our arena, regardless of group size, surface fish tend to stop swimming when they are alone or in small groups. In this figure we show the impact of this on the distribution of swim speed and consider the impact of inactivity across group size.

In [None]:
figure_data = pickle.load(open('figure-data.pik','rb'))
# print(figure_data['fig2'].keys())
globals().update(figure_data['fig2'])

fig, axs = plt.subplots( nrows=1, ncols=2, figsize=(8,3.5), 
                         gridspec_kw=dict(wspace=0.5, bottom=0.2, top=0.95, right=0.95) )

ax = axs[0]
x  = speed_distribution['bin_centers']
for i,n in enumerate([1,2,5]): # Loop over group size.
    H      = speed_distribution['sf',n]
    mu,err = mean_sem(H, axis=0)
    ax.plot(x, mu, label=n, color=color_dict4['sf',n], lw=lw)
    ax.fill_between(x, mu-err, mu+err, color=color_dict4['sf',n], alpha=alpha_err, lw=0)
ax.set_xlabel('Speed (cm/s)')
ax.set_ylabel('Probability density')
ax.set_xlim(0,50)
ax.set_ylim(0,None)
ax.legend(**leg_opt)

ax     = axs[1]
n_list = [1,2,5,10]
opt    = lambda pop: dict( lw=lw, elinewidth=lw, capsize=5, alpha=1,
                           color=color_dict1[pop], label=pop, dashes=dashes[pop] )
F = inactive_fraction
n_list = [1,2,5,10]
for pop in populations[:-1]:
    mu    = np.array([np.nanmean(F[pop,n], axis=0) for n in n_list])
    std   = np.array([np.nanstd(F[pop,n], axis=0) for n in n_list])
    count = len(F[pop,n])
#     count = np.count_nonzero(np.isfinite(F[pop,n]))
    err   = std / np.sqrt(count-1)
#     ax.errorbar(n_list, _mu, yerr=_err, color=color_dict1[pop], label=pop, 
#                 dashes=dashes[pop], **err_opt)
    opt   = err_opt
    opt.update(lw=0)
    ax.errorbar(n_list, mu, yerr=err, color=color_dict1[pop], **opt)
    ax.plot(n_list, mu, color=color_dict1[pop], dashes=dashes[pop], lw=lw, label=pop)
ax.set_ylim(0.6,1.08)
ax.set_xlabel('Group size')
ax.set_ylabel('Fraction of the\ntime active')
ax.set_xticks(n_list)
leg_opt.update(dict(handlelength=1.5))
ax.legend(**leg_opt)

# Label each panel.
for i,c in enumerate('ab'):
    ax = axs[i%2]
    ax.text(0.02, 0.98, c, transform=ax.transAxes, fontsize=24, weight='bold', va='top')

plt.tight_layout()
plt.savefig('fig2.png', dpi=dpi)
plt.show()

# [Work in progress]

Figures above use trilab-tracker data.
Figures below use Adam's figure data.

# Figure 3

Now let's take a look at the distribution of speeds after the activity cut. First we can grab histograms from the pickle.

In [None]:
H = trial_store['histograms-vcut']
means = trial_store['means-vcut']

fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(12,4))
axs = axs.flatten()

n_list = [1,2,5,10]
for i,pop in enumerate(['SF','Pa']):
    ax  = axs[i]
    ax.set_title(names[pop])
    ax.set_xlabel('Speed (cm/s)')
    ax.set_ylabel('Probability density')
    x   = H['speed','bin_centers']
    for n in n_list:
        y   = np.mean(H[pop,n,'speed'], axis=0)
        err = np.std(H[pop,n,'speed'], axis=0)/np.sqrt(len(H[pop,n,'speed'])-1)
        ax.plot(x, y, label=n, color=color_dict4[pop,n], lw=lw)
        ax.fill_between(x, y-err, y+err, color=color_dict4[pop,n], alpha=alpha_err, lw=0, zorder=-5)
#         ax.fill_between(x, y-err, y+err, color=color_dict4[pop,n], alpha=0.7, lw=0, label=n)
    ax.legend(**leg_opt)
    ax.set_xlim(0,75)
    ax.set_ylim(0,1.1*ax.get_ylim()[1])
    ax.locator_params('y',nbins=4)

ax = axs[2]
ms_stats = {}
for pop in populations[:-1]:
#     xy = [(n,m) for n in n_list for m in means[pop,n,'speed'][:,0]]
#     ax.scatter(*zip(*xy), color=color_dict1[pop], s=3)
    MS  = [means[pop,n,'speed'][:,0] for n in n_list]
    y   = [np.mean(ms) for ms in MS]
    err = [np.std(ms)/np.sqrt(len(ms)-1) for ms in MS]
    ax.errorbar(n_list, y, yerr=err, color=color_dict1[pop], label=pop, alpha=0.7, **err_opt) # , dashes=dashes[pop]
ax.set_xlabel('Group size')
ax.set_ylabel('Mean speed\nwhen active')
ax.set_xticks(n_list)
ax.set_ylim(0,30)
ax.locator_params(axis='y', nbins=4)
ax.legend(**leg_opt)

# Label each panel.
for i,c in enumerate('abc'):
    ax = axs[i] # [i//2,i%2] 
    ax.text(0.02, 0.98, c, transform=ax.transAxes, fontsize=24, weight='bold', va='top')

plt.tight_layout()
plt.savefig('fig3.png', dpi=dpi)
plt.show()

In [None]:
slope,pvalue = [],[]
fig,axs = plt.subplots(2, 2, figsize=(6,6))
for ax,pop in zip(axs.flatten(),populations[:-1]):
    x,y  = np.array([ (k[1],m) for k,M in means.items() if k[0]==pop and k[2]=='speed' for m in M[:,0] ]).T
#     x,y  = np.array([ (n,m) for n in [1,2,5,10] for m in means[pop,n,'speed'][:,0] ]).T
    ax.scatter(x, y, fc='None', ec=color_dict1[pop], lw=1, label=names[pop])
    reg = linregress(x, y) #, alternative='less')
    pvalue.append([pop, reg.pvalue])
    slope.append([pop, reg.slope, reg.stderr, len(x)])
    ax.plot(x, reg.slope*x+reg.intercept, color=color_dict1[pop])
    ax.set_title(names[pop])
# fig.legend(loc='center left', bbox_to_anchor=(0.95,0.5))
plt.tight_layout()
plt.show()

print(f'p-values for a negative slope:')
for pop,pval in pvalue:
    print(f'  {pop}: {pval}')

ax = plt.gca()
n,s,e,no = zip(*slope)
ax.errorbar(n, s, yerr=e, **err_opt)
ax.set_xticklabels(names.values())
ax.set_ylabel('Population')
plt.show()

In [None]:
means = trial_store['means-vcut']
df = []
for k,M in means.items():
    if len(k)==3 and k[2]=='speed' and k[0] in populations[:-1]:
        for m in M:
            df.append([k[0],k[1],m[0]])
df = pd.DataFrame(df, columns=['Population','n','Mean Speed'])
# display(df)

for p1,p2 in itt.combinations(populations[:-1],2):
    I   = (df['Population']==p1)|(df['Population']==p2)
    res = ancova(data=df[I], dv='Mean Speed', covar='n', between='Population')
#     display(res)
    p   = float(res.loc[res['Source']=='Population','p-unc'])
    print(f'{p1}-{p2}: {p}')
#     break

In [None]:
p1,p2 = ['SF','Pa']
I   = (df['Population']==p1)|(df['Population']==p2)
# display(df[I])
df[df['Population']=='SF'].plot(x='n', y='Mean Speed', kind='scatter', ax=plt.gca())
df[df['Population']=='Pa'].plot(x='n', y='Mean Speed', kind='scatter', ax=plt.gca(), color='r')
# df[I].groupby('Population').plot(x='n', y='Mean Speed', lw=0, marker='o', ax=plt.gca())

res = ancova(data=df[I], dv='Mean Speed', covar='n', between='Population')
display(res)

In [None]:
n,s,e,no = zip(*slope)
for i in range(1,4):
    p = ttest_ind_from_stats(s[0],e[0],2, s[i],e[i],2).pvalue
    print(f'SF-{n[i]}: {p}')
    p = ttest_ind_from_stats(s[0],e[0],no[0]-2, s[i],e[i],no[i]-2).pvalue
    print(f'SF-{n[i]}: {p}')
    print()

pd.DataFrame(slope, columns=['Population','slope','err','n_obs'])
# pairwise_tukeyhsd(

## Supplement to Figure 3

In [None]:
n_list = [1,2,5,10]
H = trial_store['histograms-vcut']
means = trial_store['means-vcut']

fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(8,7))

for i,pop in enumerate(populations[:-1]):
    ax  = axs[i//2,i%2]
    ax.set_title(names[pop])
    ax.set_xlabel('Speed (cm/s)')
    ax.set_ylabel('Probability density')
    x   = H['speed','bin_centers']
    for n in n_list:
        y   = np.mean(H[pop,n,'speed'], axis=0)
        err = np.std(H[pop,n,'speed'], axis=0)/np.sqrt(len(H[pop,n,'speed'])-1)
        ax.plot(x, y, label=n, color=color_dict4[pop,n], lw=lw)
        ax.fill_between(x, y-err, y+err, color=color_dict4[pop,n], alpha=alpha_err, lw=0, zorder=-5)
    ax.legend(**leg_opt)
    ax.set_xlim(0,75)
    ax.set_ylim(0,None)
    ax.locator_params('y',nbins=4)
    ax.set_ylim(0,1.1*ax.get_ylim()[1])

# Label each panel.
for i,c in enumerate('abcd'):
    ax = axs[i//2,i%2] 
    ax.text(0.02, 0.98, c, transform=ax.transAxes, fontsize=24, weight='bold', va='top')

plt.tight_layout()
plt.savefig(fpath('SI-speeds.png'), dpi=dpi)
plt.show()

Note that we only have single runs for Tinaja and Molino groups of 10.

# Figure 4

In this figure, we focus on the distributions of turning behavior across type and group size.

In [None]:
n_list = [1, 2, 5, 10]
H = trial_store['histograms-vcut']

# Set vertical line at the following x-coordinates
xvline1 = 1
xvline2 = 7
alpha_vline = 0.67

fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(8,8),
                       gridspec_kw={'width_ratios': [8,20], 'wspace':0.6, 'hspace':0.5,
                                    'right':0.95, 'top':0.95})
for i,pop in enumerate(['SF','Pa']):
    ax = axs[i,0]
    ax.set_title(names[pop])
    ax.set_xlabel('Angular speed (rad/s)')
#     ax.set_ylabel('Probability density')
    ax.set_ylabel('Time-based\nprobability density')
    x   = H['omega','bin_centers']
    for n in n_list:
        h   = H[pop,n,'omega']
        y   = np.mean(h, axis=0)
        err = np.std(h, axis=0)/np.sqrt(len(h)-1)
        ax.plot(x, y, label=n, color=color_dict4[pop,n], lw=lw)
        ax.fill_between(x, y-err, y+err, color=color_dict4[pop,n], alpha=alpha_err, lw=0, zorder=-5)
    ax.set_xlim(0,8)
    
    ax = axs[i,1]
    ax.set_title(names[pop])
#     ax.set_ylabel('Probability density\n'+r'$\times$ Angular speed')
    ax.set_ylabel('Angle-based\nprobability density')
    x   = H['omega','bin_centers']
    for n in n_list:
        h   = H[pop,n,'omega']
        y   = np.mean(h, axis=0)
        err = np.std(h, axis=0)/np.sqrt(len(h)-1)
        ax.plot(x, x*y, label=n, color=color_dict4[pop,n], lw=lw)
        ax.fill_between(x, x*(y-err), x*(y+err), color=color_dict4[pop,n], alpha=alpha_err, lw=0, zorder=-5)
    ax.set_xlim(0,20)
    ax.legend(loc='upper right', **leg_opt)
    
for ax in axs.flatten():
    for x in [1,7]:
        ax.axvline(x, lw=0.7*lw, color='k', dashes=(4,4))
    ax.set_xlabel('Angular speed (rad/s)')
    ax.set_ylim(0,1.1*ax.get_ylim()[1])
    ax.locator_params(axis='x', nbins=5)
#     ax.legend(loc='upper right', **leg_opt)

# Label each panel.
for i,c in enumerate('abcd'):
    ax = axs[i//2,i%2] 
    t = ax.text(0.05/(i%2+1), 0.98, c, transform=ax.transAxes, fontsize=24, weight='bold', va='top')

plt.tight_layout()
plt.savefig(fpath("fig4.png"), dpi=dpi)
plt.show()

## Supplement to Figure 4

In [None]:
n_list = [1, 2, 5, 10]
H = trial_store['histograms-vcut']

# Set vertical line at the following x-coordinates
xvlines = [1,7]
alpha_vline = 0.67

fig, axs = plt.subplots(nrows=4, ncols=2, figsize=(8,16),
                       gridspec_kw={'width_ratios': [8,20], 'wspace':0.6, 'hspace':0.5,
                                    'right':0.95, 'top':0.95})
for i,pop in enumerate(populations[:-1]):
    ax = axs[i,0]
    ax.set_title(names[pop])
    ax.set_xlabel('Angular speed (rad/s)')
    ax.set_ylabel('Time-based\nprobability density')
    x   = H['omega','bin_centers']
    for n in n_list:
        h   = H[pop,n,'omega']
        y   = np.mean(h, axis=0)
        err = np.std(h, axis=0)/np.sqrt(len(h)-1)
        ax.plot(x, y, label=n, color=color_dict4[pop,n], lw=lw)
        ax.fill_between(x, y-err, y+err, color=color_dict4[pop,n], alpha=alpha_err, lw=0, zorder=-5)
    ax.set_xlim(0,8)
    
    ax = axs[i,1]
    ax.set_title(names[pop])
    ax.set_ylabel('Angle-based\nprobability density')
    x   = H['omega','bin_centers']
    for n in n_list:
        h   = H[pop,n,'omega']
        y   = np.mean(h, axis=0)
        a   = 1/np.trapz(y, x) # Density normalization.
        err = np.std(h, axis=0)/np.sqrt(len(h)-1)
        ax.plot(x, a*x*y, label=n, color=color_dict4[pop,n], lw=lw)
        ax.fill_between(x, a*x*(y-err), a*x*(y+err), color=color_dict4[pop,n], alpha=alpha_err, lw=0, zorder=-5)
    ax.set_xlim(0,20)
    ax.legend(loc='upper right', **leg_opt)
    
for ax in axs.flatten():
    for x in xvlines:
        ax.axvline(x, lw=0.7*lw, color='k', dashes=(4,4))
    ax.set_xlabel('Angular speed (rad/s)')
    ax.set_ylim(0,1.1*ax.get_ylim()[1])
    ax.locator_params(axis='x', nbins=5)
#     ax.legend(loc='upper right', **leg_opt)

# Label each panel.
for i,c in enumerate('abcdefgh'):
    ax = axs[i//2,i%2] 
    ax.text(0.05/(i%2+1), 0.98, c, transform=ax.transAxes, fontsize=24, weight='bold', va='top')

plt.tight_layout()
plt.savefig(fpath("SI-turns.png"), dpi=dpi)
plt.show()

In [None]:
keys = [ k for k in H.keys() if len(k)==3 and k[2]=='omega' ]
bins = H['omega', 'bin_centers']
# ranges = ((0,1),(1,6),(6,np.inf))
ranges = ((0,0.5),(0.5,6),(6,np.inf))
# ranges = ((0,1),(1,np.inf))
II   = [ (np.absolute(bins)>=R[0])&(np.absolute(bins)<R[1]) for R in ranges ]
# I1   = np.absolute(bins)<1
# I3   = np.absolute(bins)>=6
# I2   = ~(I1|I3)
stats = {}
for k in keys:
    for j,I in enumerate(II):
        h       = H[k].copy()
        h[:,~I] = 0
        stats[k[:2]+(j,'time')] = np.trapz(h, bins, axis=1)
        a       = 1/np.trapz(H[k]*np.absolute(bins), bins, axis=1) # Density normalization.
        stats[k[:2]+(j,'angle')] = a*np.trapz(h*np.absolute(bins), bins, axis=1)

# range_labels = ['0-1', '1-6', '6+']
range_labels = [ f'{r1}-{r2}' for r1,r2 in ranges ]
N = [1,2,5,10]
for q in ['time','angle']:
    for j in range(len(ranges)):
        for pop in populations[:-1]:
            S = [ stats[pop,n,j,q] for n in N ]
            y = [ np.mean(s) for s in S ]
            e = [ np.std(s)/np.sqrt(len(s)-1) for s in S ]
            plt.errorbar(N, y, e, label=f'{pop} {range_labels[j]}', color=color_dict1[pop], alpha=0.8, **err_opt)
        plt.legend(loc='center left', bbox_to_anchor=(1.01,0.5))
        plt.xticks([1,2,5,10])
        plt.xlabel('Group size')
        plt.ylim(0,1)
        plt.ylabel(f'Fraction of {q}')
    #     plt.tight_layout()
        plt.show()
    #     break


# Figure 5

We're interested in the collective behavior. To summarize the collective schooling and/or shoaling behavior of these fish, we measure relative distance and relative angle between all pairs of fish.

In [None]:
da_binned = trial_store['distance-alignment']
x = da_binned['x_edges']
y = da_binned['y_edges']

gridspec=dict(wspace=0.4, hspace=0.4, left=0.02, right=0.98, bottom=0.08, top=0.94)
fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(12,8), gridspec_kw=gridspec)
fig.tight_layout()

# Label each panel.
# Note: with `transform=ax.transAxes`, the labels move if the axes object is later resized.
# Doing the coordinate transform explicitly (after calling `tight_layout`) avoids that,  
# thus keeping the labels on a regular grid.
for i,c in enumerate('abcdef'):
    ax = axs[i//3,i%3] 
    xy = fig.transFigure.inverted().transform(ax.transAxes.transform([0.02,0.98]))
    ax.text(*xy, c, transform=fig.transFigure, fontsize=24, weight='bold', va='top')

# First column: schematics.
for i,fn in enumerate(['fig_schematic_pair-angle.png','fig_schematic_collective-behavior.png']):
    axs[i,0].imshow(mpimg.imread(fpath(fn)))
    axs[i,0].axis('off')
bbox = axs[1,0].get_position()
bbox.y0,bbox.y1 = np.array([bbox.y0,bbox.y1]) - 0.04
axs[1,0].set_position(bbox)

for i,pop in enumerate(['SF','Pa']):
    for j,n in enumerate([2,10]):
        ax  = axs[j,i+1]
        z   = da_binned[pop,n].T
        ax.pcolormesh(x, y, z, cmap = color_maps[pop], vmin=0, vmax=np.max(z))
        ax.set_xlim(x[0], x[-1])
        ax.set_ylim(y[0], y[-1])
        ax.set_yticks([0,30,60,90,120,150,180])
        ax.set_ylabel("Pair angle (deg)")
        ax.set_xlabel("Pair distance (cm)")
        ax.set_title(f'{n} {names[pop]}')

# Wall following prediction (arches).
D     = 111  # tank diameter
D_    = 103 # effective tank diameter (wall-following diameter)
d     = np.arange(0,D+1,1)
theta = (180/np.pi) * np.arccos(1 - 2*(d/D_)**2)
opt   = dict(lw=1.5, alpha=0.5, color='k', dashes=(4,4))
for i in range(2):
    axs[i,2].plot(d, theta, **opt)
    axs[i,2].plot(d, 180-theta, **opt)


fout = "fig5.png"
plt.savefig(fpath("fig5.png"), dpi=dpi)
plt.show()

## Density of proximities

In [None]:
# for n in [2,10]:
#     bc = da_binned['x_centers']
#     h_dist = np.sum(da_binned['SF',n], axis=1)/np.sum(da_binned['SF',n])
#     fig, ax1 = plt.subplots()
#     plt.title(f"Relative distance for {n} Surface Fish")
#     ax1.plot(bc,h_dist,c='blue',alpha=0.6,lw=3)
#     ax1.set_ylabel("Probability density",c='blue')
    
#     ax2 = ax1.twinx()
#     ax2.plot(bc,np.cumsum(h_dist), '--', c='brown', alpha=0.6,lw=3)
#     ax2.set_ylabel("Cumulative", c='brown')
#     plt.tight_layout()
#     plt.show()

## Supplement to Figure 5

In [None]:
da_binned = trial_store['distance-alignment']
x = da_binned['x_edges']
y = da_binned['y_edges']

for n in [2,5,10]:
    gridspec = dict(wspace=0.4, hspace=0.4, left=0.1, right=0.97, bottom=0.08, top=0.94)
    fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(8,8), gridspec_kw=gridspec)
    fig.tight_layout()

    # Label each panel.
    for i,c in enumerate('abcd'):
        ax = axs[i//2,i%2] 
        ax.text(0.02, 0.98, c, transform=ax.transAxes, fontsize=24, weight='bold', va='top')

    for i,pop in enumerate(populations[:-1]):
        ax  = axs[i//2,i%2]
        z   = da_binned[pop,n].T
        ax.pcolormesh(x, y, z, cmap = color_maps[pop], vmin=0, vmax=np.max(z))
        ax.set_xlim(x[0], x[-1])
        ax.set_ylim(y[0], y[-1])
        ax.set_yticks([0,30,60,90,120,150,180])
        ax.set_ylabel("Pair angle (deg)")
        ax.set_xlabel("Pair distance (cm)")
        ax.set_title(f'{n} {names[pop]}')

#         if pop!='SF':
#             # Wall following prediction (arches).
#             D     = 111  # tank diameter
#             D_    = 103 # effective tank diameter (wall-following diameter)
#             d     = np.arange(0,D+1,1)
#             theta = (180/np.pi) * np.arccos(1 - 2*(d/D_)**2)
#             opt   = dict(lw=1.5, alpha=0.5, color='k', dashes=(4,4))
#             for i in range(2):
#                 ax.plot(d, theta, **opt)
#                 ax.plot(d, 180-theta, **opt)
    
    plt.savefig(fpath(f"SI_collective-n{n:02}.png"), dpi=dpi)
    plt.show()

# Figure 6

In this figure, we consider surface fish in a dark tank and compare some aspects of their behavior to cavefish.

First, we need to connect with several dictionary entries from the data.

In [None]:
# Load data.
h_dark     = trial_store['dark-histograms-vcut']
means_dark = trial_store['dark-means-vcut']
da_binned  = trial_store['distance-alignment']
da_binned['SF-dark',10] = trial_store['dark-distance-alignment']
h_theta    = trial_store['pair-angle-slice']['h_theta']
cos_mean   = trial_store['pair-angle-slice']['cos_mean']


# Create figure.
gridspec = dict(wspace=0.4, hspace=0.4, left=0.07, right=0.99, bottom=0.08, top=0.9)
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(12,3.8), gridspec_kw=gridspec)
fig.tight_layout()
inset_fontsize = 12
err_opt2 = dict(lw=lw, elinewidth=0.7*lw, capsize=2) # inset error bar options


# Label each panel.
for i,c in enumerate('abc'):
    ax = axs[i] 
    ax.text(0.02, 0.98, c, transform=ax.transAxes, fontsize=24, weight='bold', va='top')


# Left panel: speed distribution.
ax = axs[0]
D  = 111 # tank diameter
pop = 'SF-dark'
for n in [1,10]:
    x = h_dark['speed','bin_centers']
    h = h_dark['SF',n,'speed']
    y = np.mean(h, axis=0)
    err = np.std(h, axis=0)/np.sqrt(len(h)-1)
    ax.plot(x, y, label=n, color=color_dict4[pop,n], lw=lw)
    ax.fill_between(x, y-err, y+err, color=color_dict4[pop,n], alpha=alpha_err, lw=0, zorder=-5)
ax.set_xlim(0,65)
ax.set_ylim(0,1.1*ax.get_ylim()[1])
ax.locator_params('y',nbins=5)
ax.set_xlabel('Speed (cm/s)')
ax.set_ylabel('Probability density')
# leg_opt   = dict( borderpad=0.3, labelspacing=0.2, handlelength=1.5,
#                   handletextpad=0.5, borderaxespad=0.2, fontsize=15 )
l_opt = dict( borderpad=0.3, labelspacing=0.2, handlelength=1,
              handletextpad=0.5, borderaxespad=0.2, fontsize=15 )
ax.legend(loc='lower left', **l_opt)
ax.set_title(names[pop])


# Left panel inset: mean speed vs group size.
x0,y0,x1,y1 = axs[0].get_position()._points.flatten()
x0_,y0_     = x0+0.45*(x1-x0), y0+0.4*(y1-y0)
ax_         = fig.add_axes([x0_, y0_, x1-x0_-0.003, y1-y0_-0.01])
# Prepare data.
process_means = lambda n,m: (n, np.mean(m), np.std(m)/np.sqrt(len(m)-1))
  # Surface in the light.
x,y,err = np.array([ process_means(n, trial_store['means-vcut']['SF',n,'speed'][:,0]) for n in [1,2,5,10] ]).T
ax_.errorbar(x, y, yerr=err, color=color_dict1['SF'], dashes=dashes['SF'], label='Surface, light', **err_opt2)
  # Surface in the dark.
x,y,err = np.array([ process_means(n, means_dark['SF',n,'speed'][:,0]) for n in [1,10] ]).T
ax_.errorbar(x, y, yerr=err, color=color_dict1['SF-dark'], dashes=dashes['SF-dark'], label='Surface, dark', **err_opt2)
  # All cavefish.
# x,y,err = np.array([ process_means(n, trial_store['means-vcut']['all-cavefish',n][:,0]) for n in [1,10] ]).T
x,y,err = np.array([ process_means(n, 
              np.concatenate([trial_store['means-vcut'][pop,n,'speed'][:,0] 
                 for pop in ['Pa','Ti','Mo']])) for n in [1,2,5,10] ]).T
ax_.errorbar(x, y, yerr=err, color='#AA0000', dashes=dashes['Pa'], label='All cavefish', **err_opt2)
ax_.set_xlim(0,11)
ax_.set_ylim(0,None)
ax_.set_xticks([1,2,5,10])
ax_.set_xlabel('Group size', fontsize=inset_fontsize, labelpad=0)
ax_.set_ylabel('Mean speed (cm/s)', fontsize=inset_fontsize, labelpad=0)
l_opt = dict( borderpad=0.3, labelspacing=0.2, handlelength=1.5,
              handletextpad=0.5, borderaxespad=0.2, fontsize=inset_fontsize, frameon=False )
ax_.legend(**l_opt)


# Middle panel: distance-angle probability heatmap.
ax = axs[1]
n,pop = 10,'SF-dark'
x     = trial_store['distance-alignment']['x_edges']
y     = trial_store['distance-alignment']['y_edges']
z     = trial_store['distance-alignment'][pop,n].T
ax.pcolormesh(x, y, z, cmap = color_maps[pop], vmin=0, vmax=np.max(z))
ax.set_xlim(x[0], x[-1])
ax.set_ylim(y[0], y[-1])
ax.set_yticks([0,30,60,90,120,150,180])
ax.set_ylabel("Pair angle (deg)")
ax.set_xlabel("Pair distance (cm)")
ax.set_title(f'{n} {names[pop]}')


# Right panel: angle probability when close.
ax   = axs[2]
n    = 10 # group size
d0   = 10 # distance threshold
bins = np.linspace(0,180,201)
x    = (bins[1:]+bins[:-1])/2
for pop in populations:
    Y   = trial_store['pair-angle-slice']['h_theta'][pop] *np.pi/180
    y   = np.mean(Y, axis=0)
    err = np.std(Y, axis=0)/np.sqrt(Y.shape[0]-1)
    ax.plot(x, y, label=pop, color=color_dict1[pop], lw=lw)
    ax.fill_between(x, y-err, y+err, color=color_dict1[pop], alpha=alpha_err, lw=0, zorder=-5)
ax.set_xlim(0,180)
ax.set_ylim(0,1.1*ax.get_ylim()[1])
ax.locator_params('y',nbins=5)
ax.set_xlabel(r'Pair angle $\theta$ (deg)')
ax.set_ylabel('Probability density')


# Right panel inset: mean speed vs group size.
x0,y0,x1,y1 = axs[2].get_position()._points.flatten()
x0_,y0_ = x0+0.35*(x1-x0), y0+0.5*(y1-y0)
ax_     = fig.add_axes([x0_, y0_, x1-x0_-0.003, y1-y0_-0.01])
x,y,err,no,c = zip(*[ (names[k],np.mean(v),np.std(v)/np.sqrt(len(v)-1),len(v),color_dict1[k]) 
                   for k,v in cos_mean.items() ])
x = x[:-1] + ('Surface (dark)',)
ax_.bar(x, y, color=c)
ax_.errorbar(x, y, yerr=err, fmt='none', color='k', **err_opt2)
ax_.set_xticklabels(x, rotation=45, ha='right', y=0.05)
ax_.set_yticks([0,1])
ax_.set_ylim(0,1.25)
ax_.set_ylabel(r'$\langle\cos\,\theta\rangle$', labelpad=0)

def draw_bracket(txt, x1, x2, y, dx=0, dy=0.05, ax=ax_, **args):
    ax.plot([x1+dx,x1+dx,x2-dx,x2-dx], [y-dy,y,y,y-dy], color='k', lw=0.5)
    ax.text((x1+x2)/2, y, txt, va='bottom', ha='center', **args)
draw_bracket('n.s.', 1, 3, 0.35, dx=-0.2, fontsize=12)
# draw_bracket('', 1, 3, 0.4, dx=-0.2)
# ax_.text(2, 0.38, 'Cave', va='top', ha='center', fontsize=12)
draw_bracket('**', 0, 1.8, 1)
ax_.plot([1.8]*2, [1,0.55], color='k', lw=0.5)
draw_bracket('*', 2.2, 4, 0.6)
ax_.plot([2.2]*2, [0.6,0.55], color='k', lw=0.5)


plt.savefig(fpath("fig6.png"), dpi=dpi)
plt.show()

In [None]:
# Statistical tests for <v(n)> (left panel, inset).

# Test normality.
print('Shapiro test:')
for k,V in cos_mean.items():
    print(f'{k}: p = {shapiro(V)[1]}')

# Make DataFrame for Tukey test.
df = pd.DataFrame([(k,v) for k,V in cos_mean.items() for v in V], columns=['pop','cos_mean'])
# print(pairwise_tukeyhsd(df['cos_mean'], df['pop']))

cave = ['Pa','Mo','Ti']
p = f_oneway(*[V for k,V in cos_mean.items() if k in cave]).pvalue
print(f'\nOne-way ANOVA test among cavefish populations: p = {p}')

# Merge cave populations and perform Tukey test.
df['pop'] = df['pop'].apply(lambda k: 'cave' if k in cave else k)
print()
print(pairwise_tukeyhsd(df['cos_mean'], df['pop']))


In [None]:
# Statistical tests for <cos(theta)> (right panel, inset).

# Test normality.
print('Shapiro test:')
for k,V in cos_mean.items():
    print(f'{k}: p = {shapiro(V)[1]}')

# Make DataFrame for Tukey test.
df = pd.DataFrame([(k,v) for k,V in cos_mean.items() for v in V], columns=['pop','cos_mean'])
# print(pairwise_tukeyhsd(df['cos_mean'], df['pop']))

cave = ['Pa','Mo','Ti']
p = f_oneway(*[V for k,V in cos_mean.items() if k in cave]).pvalue
print(f'\nOne-way ANOVA test among cavefish populations: p = {p}')

# Merge cave populations and perform Tukey test.
df['pop'] = df['pop'].apply(lambda k: 'cave' if k in cave else k)
print()
print(pairwise_tukeyhsd(df['cos_mean'], df['pop']))


# Figure 7

Here, we consider the effect of evasive interactions on time spent in the center of the tank. Experiments show that cave fish spend more time in the center of the tank with increased group size. We compare time in the center across simulations of three variants of a minimal active matter model. 

Where we consider time in the center of the tank of radius $R$, where the center is defined so that there is an equal area in the center compared to outside of the tank.
\begin{align}
A_\text{center} &= A_\text{total} /2 \\
R_\text{center}^2 &= R^2 /2 \\
R_\text{center} &= R / \sqrt{2} 
\end{align}

If fish were to spend time uniformly throughout the tank, then the probability of finding the fish in the center and near the walls would be equal.

In [None]:
gridspec = dict(wspace=0.4, bottom=0.15, left=0.01, right=0.99, top=0.97)
fig, axs = plt.subplots(ncols=3, nrows=1, figsize=(12,3.8), gridspec_kw=gridspec)
plt.tight_layout()

# Label each panel.
for ax,c in zip(axs,'abc'):
    ax.text(0.02, 0.98, c, transform=ax.transAxes, fontsize=24, weight='bold', va='top')

    
# Left panel: Overlaid screenshots of evasive maneuver.
ax = axs[0]
evasive_tracks = mpimg.imread(fpath('fig_evasion.png'))
ax.imshow(evasive_tracks)
ax.axis('off')


# Middle panel: Time in center vs group size (experiments).
ax  = axs[1]
d_expt = trial_store['time-in-center_expt']
n_list = [1,2,5,10]
for pop in ['Pa','Mo','Ti']:
    y   = [ np.mean(d_expt[pop,n]) for n in n_list ]
    err = [ np.std(d_expt[pop,n])/np.sqrt(len(d_expt[pop,n])-1) for n in n_list ]
    ax.errorbar(n_list, y, yerr=err, label=names[pop], dashes=dashes[pop], 
                color=color_dict1[pop], alpha=0.8, **err_opt)
ax.legend(loc='upper center', **leg_opt)


# Right panel: Time in center vs group size (simulations).
ax          = axs[2]
fic         = trial_store['time-in-center_sims']
fic_models  = ['ignore', 'slow', 'avoid', 'combo']
fic_markers = ['o', 's', '^', 'p']
fic_colors  = ['grey', 'r', 'b', 'magenta']
fic_names   = ['Ignore', 'Slowdown', 'Evade', 'Combined']
for i,m in enumerate(fic_models):
    x,y,err = zip(*[ (k[1], np.mean(f), np.std(f)/np.sqrt(len(f)-1)) for k,f in fic.items() if k[0]==m])
#     ax.errorbar(x, y, yerr=err, label=fic_names[i], color=fic_colors[i], marker=fic_markers[i], ms=7, **err_opt)
    ax.plot(x, y, label=fic_names[i], color=fic_colors[i], alpha=0.8, marker=fic_markers[i], ms=7)
ax.legend(loc='upper left', bbox_to_anchor=(0.1, 1), **leg_opt)


# Common settings for middle and right panels.
for ax in axs[1:]:
    ax.set_xlabel('Group size')
    ax.set_ylabel('Fraction of time in center')
    ax.set_xlim(0,12.5)
    ax.set_ylim(0,0.55)
    ax.set_xticks([1,2,5,10])
    ax.set_yticks([0,0.2,0.4])
#     ax.legend(loc='upper left', bbox_to_anchor=(0.17,1), **leg_opt)


plt.savefig(fpath("fig7.png"), dpi=dpi)
plt.show()