In [19]:
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt

from ipywidgets import interact
from matplotlib.gridspec import GridSpec

from plotters import _get_data
from figures import mean_agg_rule_tr_indivs, mean_agg_rule_tr_group, mean_agg_rule_tr_indiv_nb

from natsort import natsorted

In [2]:
animal_uid_col = 'trial_uid_id'
video_uid_col = 'trial_uid'

animal_info_cols = [
    'genotype',
    'fish_id_exp',
    'identity',
    'dpf', 
    'size_cm', 
]

video_info_cols = [
    'experiment_type',
    'gene',
    'founder',
    'replicate', 
    'genotype_group',
]

invididual_variables = ['normed_distance_to_origin', "angular_position", 'speed', 'acceleration']
group_variables = ['mean_distance_to_center_of_group', 'polarization_order_parameter', 'rotation_order_parameter']
individual_nb_variables = ['nb_angle', 'nb_cos_angle', 'nb_distance']

In [3]:
def _get_info(data, info_cols):
    info_str = ""
    for info_col in info_cols:
        assert info_col in data.columns
        infos = data[info_col].unique()
        assert len(set(infos)) == 1, set(infos)
        info = str(infos[0])
        info_str += f"{info_col}: {info} - "
    info_str = info_str[:-3]
    return info_str

# Animal summary

In [4]:
data_path_indiv_vars = "/home/pacorofe/Dropbox (CRG ADV)/ZFISH_MICs/_BSocial/2022_ANALYSIS_social/generated_tables/tr_indiv_vars_bl.pkl"

In [5]:
data_filters = [
    lambda x: x["experiment_type"] == 1,
    lambda x: ~x["gene"].str.contains("srrm"),
]
per_indiv_stasts_kwargs = {
    "groupby": [
        "trial_uid",
        "identity",
        "genotype_group",
        "genotype",
        "line",
        "line_replicate",
    ],
    "agg_rule": mean_agg_rule_tr_indivs,
}
data_indiv, data_indiv_stats = _get_data(
    data_path_indiv_vars,
    data_filters=data_filters,
    per_indiv_stats_kwargs=per_indiv_stasts_kwargs,  
)

Getting data
Filtering data
Filtering data
original shape (26493056, 36)


['gli3' 'cdon' 'srrm3' 'evi5b' 'shank3a' 'vdac3' 'apbb1' 'shank3b' 'gli2b'
 'srrm4' 'mef2ca' 'itsn1' 'vav2' 'vti1a' 'kif1b' 'madd' 'ap1g1' 'asap1b'
 'src' 'reln']


(26205088, 36)
(22209532, 36)
Groupping data


(22209532, 36)
['gli3' 'cdon' 'evi5b' 'shank3a' 'vdac3' 'apbb1' 'shank3b' 'gli2b'
 'mef2ca' 'itsn1' 'vav2' 'vti1a' 'kif1b' 'madd' 'ap1g1' 'asap1b' 'src'
 'reln']


In [6]:
from plotters import _boxplot_axes_one_variable

def plot_trajectory(data, ax=None, hue=None):
    if ax is None:
        fig, ax = plt.subplots(1,1, figsize=(10,10))
    assert 's_x' in data 
    assert 's_y' in data
    if hue is not None:
        cbar=False
    else:
        cbar=True
    sns.histplot(ax=ax, data=data, x='s_x', y='s_y', cbar=cbar, hue=hue)
    sns.lineplot(ax=ax, data=data, x='s_x', y='s_y', sort=False, hue=hue, alpha=.5, units=hue, estimator=None)
    ax.set_aspect('equal')
    ax.set_ylabel('y')
    ax.set_xlabel('x')
    sns.despine(ax=ax)
    
       
def plot_variable_along_time(data, variable, ax=None, hue=None, units=None):
    if ax is None:
        fig, ax = plt.subplots(1,1, figsize=(30,10))
    assert 'frame' in data 
    assert variable in data
    if units is None:
        units = hue
    _boxplot_axes_one_variable(ax, data, variable, how='h', add_text=True)
    sns.lineplot(ax=ax, data=data, x='frame', y=variable, alpha=.5, hue=hue, units=units, estimator=None)
    sns.despine(ax=ax)
    

def plot_variable_1d_distribution(data, variable, ax=None, hue=None):
    if ax is None:
        fig, ax = plt.subplots(1,1, figsize=(30,10))
    assert variable in data
    _boxplot_axes_one_variable(ax, data, variable, how='h')
    sns.histplot(ax=ax, data=data, y=variable, element='poly', fill=False, alpha=.5, hue=hue)
    sns.despine(ax=ax)
    


def _prepare_indiv_fig_axes_summary(num_variables):
    fig = plt.figure(constrained_layout=True, figsize=(30, 10))
    num_cols = num_variables * 3
    num_rows =num_variables
    gs = GridSpec(num_rows, num_cols, figure=fig)
    
    ax_trajectories = fig.add_subplot(gs[:num_rows, :num_rows])
    axs_variables = []
    axs_distributions = []
    for i in range(num_variables):
        axs_variables.append(fig.add_subplot(gs[i:i+1, num_rows:num_cols-1]))
        axs_distributions.append(fig.add_subplot(gs[i:i+1, num_cols-1:]))
    return fig, ax_trajectories, axs_variables, axs_distributions


def _plot_trajectory_and_variables_summary(animal_data, variables, hue=None):
    fig, ax_trajectories, axs_variables, axs_distributions = _prepare_indiv_fig_axes_summary(len(variables))
    plot_trajectory(animal_data, ax=ax_trajectories, hue=hue)
    for variable, ax_time, ax_dist in zip(variables, axs_variables, axs_distributions):
        plot_variable_along_time(animal_data, variable, ax=ax_time, hue=hue)
        plot_variable_1d_distribution(animal_data, variable, ax=ax_dist, hue=hue)
    return fig
    
    

In [7]:
def get_animal_info_str(animal_data, info_cols = animal_info_cols, video_info_cols=video_info_cols):
    video_info = _get_info(animal_data, video_info_cols)
    animal_info = _get_info(animal_data, info_cols)
    return f"{video_info} \n {animal_info}"
    

possible_animals_uid = data_indiv[animal_uid_col].unique()
@interact(animal_uid=possible_animals_uid, y=1.0)
def summary_animal(animal_uid, save=False):
    assert animal_uid_col in data_indiv.columns
    if animal_uid in possible_animals_uid:
        animal_data = data_indiv[data_indiv[animal_uid_col] == animal_uid]
        animal_info_str = get_animal_info_str(animal_data)
        print(animal_info_str)
        
        
        fig = _plot_trajectory_and_variables_summary(animal_data, invididual_variables)
        if save:
            fig.suptitle(animal_info_str)
            fig.savefig(f"{animal_uid}.png")
            fig.savefig(f"{animal_uid}.pdf")
            
    else:
        print(f"Animal {animal_uid} does not exist")
        print("Possible animals are")
        print(possible_animals_uid)

interactive(children=(Dropdown(description='animal_uid', options=('gli3_7_2_1_4_0.0', 'gli3_7_2_1_4_1.0', 'gli…

# Video summary

In [8]:
data_path_group_vars = "/home/pacorofe/Dropbox (CRG ADV)/ZFISH_MICs/_BSocial/2022_ANALYSIS_social/generated_tables/tr_group_vars_bl.pkl"

data_filters = [
    lambda x: x["experiment_type"] == 1,
    lambda x: ~x["gene"].str.contains("srrm"),
]
per_group_stasts_kwargs = {
    "groupby": [
        "trial_uid",
        "genotype_group",
        "line",
        "line_replicate",
    ],
    "agg_rule": mean_agg_rule_tr_group,
}
data_group, data_group_stats = _get_data(
    data_path_group_vars,
    data_filters=data_filters,
    per_indiv_stats_kwargs=per_group_stasts_kwargs,  
)

Getting data
Filtering data
Filtering data
original shape (13246528, 12)


['gli3' 'cdon' 'srrm3' 'evi5b' 'shank3a' 'vdac3' 'apbb1' 'shank3b' 'gli2b'
 'srrm4' 'mef2ca' 'itsn1' 'vav2' 'vti1a' 'kif1b' 'madd' 'ap1g1' 'asap1b'
 'src' 'reln']


(13102544, 12)
(11104766, 12)
Groupping data


(11104766, 12)
['gli3' 'cdon' 'evi5b' 'shank3a' 'vdac3' 'apbb1' 'shank3b' 'gli2b'
 'mef2ca' 'itsn1' 'vav2' 'vti1a' 'kif1b' 'madd' 'ap1g1' 'asap1b' 'src'
 'reln']


In [9]:
def _prepare_group_fig_axes_summary(num_variables):
    fig = plt.figure(constrained_layout=True, figsize=(30, 10))
    num_cols = num_variables * 3
    num_rows = num_variables
    gs = GridSpec(num_rows, num_cols, figure=fig)
    
    ax_order_params = fig.add_subplot(gs[:num_rows, :num_rows])
    axs_variables = []
    axs_distributions = []
    for i in range(num_variables):
        axs_variables.append(fig.add_subplot(gs[i:i+1, num_rows:num_cols-1]))
        axs_distributions.append(fig.add_subplot(gs[i:i+1, num_cols-1:]))
    return fig, ax_order_params, axs_variables, axs_distributions


def plot_order_parameter_dist(data, ax=None):
    x_var = 'rotation_order_parameter'
    y_var = 'polarization_order_parameter'
    if ax is None:
        fig, ax = plt.subplots(1,1, figsize=(10,10))
    assert x_var in data 
    assert y_var in data
    sns.histplot(ax=ax, data=data, x=x_var, y=y_var)
    ax.set_aspect('auto')
    ax.set_ylabel(y_var)
    ax.set_xlabel(x_var)
    sns.despine(ax=ax)
    
    
def _plot_group_variables_summary(data, variables):
    fig, ax_order_params, axs_variables, axs_distributions = _prepare_group_fig_axes_summary(len(variables))
    plot_order_parameter_dist(data, ax=ax_order_params)
    for variable, ax_time, ax_dist in zip(variables, axs_variables, axs_distributions):
        plot_variable_along_time(data, variable, ax=ax_time)
        plot_variable_1d_distribution(data, variable, ax=ax_dist)
    return fig

In [10]:
data_path_indiv_nb_vars = "/home/pacorofe/Dropbox (CRG ADV)/ZFISH_MICs/_BSocial/2022_ANALYSIS_social/generated_tables/tr_indiv_nb_vars_bl.pkl"

data_filters = [
    lambda x: x["experiment_type"] == 1,
    lambda x: ~x["gene"].str.contains("srrm"),
]
per_group_stasts_kwargs = {
    "groupby": [
        "trial_uid",
        "identity",
        "identity_nb",
        "genotype_group",
        "genotype",
        "genotype_nb",
        "line",
        "line_replicate",
    ],
    "agg_rule": mean_agg_rule_tr_indiv_nb,
}
data_indiv_nb, data_indiv_nb_stats = _get_data(
    data_path_indiv_nb_vars,
    data_filters=data_filters,
    per_indiv_stats_kwargs=per_group_stasts_kwargs,  
)

Getting data
Filtering data
Filtering data
original shape (26492917, 28)


['gli3' 'cdon' 'srrm3' 'evi5b' 'shank3a' 'vdac3' 'apbb1' 'shank3b' 'gli2b'
 'srrm4' 'mef2ca' 'itsn1' 'vav2' 'vti1a' 'kif1b' 'madd' 'ap1g1' 'asap1b'
 'src' 'reln']


(26204949, 28)
(22209394, 28)
Groupping data


(22209394, 28)
['gli3' 'cdon' 'evi5b' 'shank3a' 'vdac3' 'apbb1' 'shank3b' 'gli2b'
 'mef2ca' 'itsn1' 'vav2' 'vti1a' 'kif1b' 'madd' 'ap1g1' 'asap1b' 'src'
 'reln']


In [17]:
def _prepare_indiv_nb_fig_axes_summary(num_variables):
    fig = plt.figure(constrained_layout=True, figsize=(30, 10))
    num_cols = num_variables * 3
    num_rows = num_variables
    gs = GridSpec(num_rows, num_cols, figure=fig)
    
    ax_relative_position = fig.add_subplot(gs[:num_rows, :num_rows])
    axs_variables = []
    axs_distributions = []
    for i in range(num_variables):
        axs_variables.append(fig.add_subplot(gs[i:i+1, num_rows:num_cols-1]))
        axs_distributions.append(fig.add_subplot(gs[i:i+1, num_cols-1:]))
    return fig, ax_relative_position, axs_variables, axs_distributions


def plot_relative_position_dist(data, ax=None):
    x_var = 'nb_position_x'
    y_var = 'nb_position_y'
    if ax is None:
        fig, ax = plt.subplots(1,1, figsize=(10,10))
    assert x_var in data 
    assert y_var in data
    sns.histplot(ax=ax, data=data, x=x_var, y=y_var, cbar=False, binrange=((-5, 5), (-5, 5)))
    ax.set_aspect('equal')
    ax.set_ylabel(y_var)
    ax.set_xlabel(x_var)
    ax.axhline(0, c='k', ls=':')
    ax.axvline(0, c='k', ls=':')
    sns.despine(ax=ax)


def _plot_video_indiv_nb_variables_summary(data, variables):
    fig, ax_order_params, axs_variables, axs_distributions = _prepare_indiv_nb_fig_axes_summary(len(variables))
    plot_relative_position_dist(data, ax=ax_order_params)
    for variable, ax_time, ax_dist in zip(variables, axs_variables, axs_distributions):
        plot_variable_along_time(data, variable, ax=ax_time, hue='genotype_nb', units='identity_nb')
        plot_variable_1d_distribution(data, variable, ax=ax_dist, hue='genotype_nb')
    return fig
    

In [20]:
def get_video_info(video_data, video_info_cols = video_info_cols):
    video_str_info = _get_info(video_data, video_info_cols)
    
    animals_uid_ids = video_data[animal_uid_col].unique()
    for animal_uid in animals_uid_ids:
        animal_data = video_data[video_data[animal_uid_col] == animal_uid]
        animal_str_info = _get_info(animal_data, animal_info_cols)
        video_str_info += f"\n {animal_str_info}"
    return video_str_info

possible_video_uids_in_group_vars = data_group[video_uid_col].unique()
possible_video_uids_in_indiv_vars = data_indiv[video_uid_col].unique()
possible_video_uids_in_indiv_nb_vars = data_indiv_nb[video_uid_col].unique()
possible_video_uids = set(possible_video_uids_in_group_vars) & set(possible_video_uids_in_indiv_vars) & set(possible_video_uids_in_indiv_nb_vars)
possible_video_uids = natsorted(possible_video_uids)
@interact(video_uid=possible_video_uids, y=1.0)
def summary_video(video_uid, save=False):
    assert video_uid_col in data_group.columns
    if video_uid in possible_video_uids:
        video_group_data = data_group[data_group[video_uid_col] == video_uid]
        video_indiv_data = data_indiv[data_indiv[video_uid_col] == video_uid]
        video_indiv_nb_data = data_indiv_nb[data_indiv_nb[video_uid_col] == video_uid]
        
        print(video_group_data.shape)
        print(video_indiv_data.shape)
        
        video_info_str = get_video_info(video_indiv_data)
        print(video_info_str)
        fig = _plot_trajectory_and_variables_summary(video_indiv_data, invididual_variables, hue='genotype')
        if save:
            fig.suptitle(video_info_str)
            fig.savefig(f"{video_uid}_indiv.png")
            fig.savefig(f"{video_uid}_indiv.pdf")
            
        fig = _plot_group_variables_summary(video_group_data, group_variables)
        if save:
            fig.suptitle(video_info_str)
            fig.savefig(f"{video_uid}_group.png")
            fig.savefig(f"{video_uid}_group.pdf")
        
        for animal_uid in video_indiv_nb_data[animal_uid_col].unique():
            animal_nb_data = video_indiv_nb_data[video_indiv_nb_data[animal_uid_col] == animal_uid]
            fig = _plot_video_indiv_nb_variables_summary(animal_nb_data, individual_nb_variables)
            if save:
                fig.suptitle(video_info_str)
                fig.savefig(f"{video_uid}_indiv_nb.png")
                fig.savefig(f"{video_uid}_indiv_nb.pdf")
            
    else:
        print(f"Video {video_uid} does not exist")
        print("Possible videos are")
        print(possible_video_uids)

interactive(children=(Dropdown(description='video_uid', options=('ap1g1_1_1_1_1', 'ap1g1_1_1_1_2', 'ap1g1_1_1_…

In [13]:
data_indiv_nb.columns

Index(['identity', 'identity_nb', 'frame', 'nb_position_x', 'nb_position_y',
       'nb_angle', 'nb_cos_angle', 'nb_distance', 'trial_uid', 'fish_id_exp',
       'genotype', 'dpf', 'size_cm', 'fish_id_exp_nb', 'genotype_nb', 'dpf_nb',
       'size_cm_nb', 'line', 'line_replicate', 'gene', 'founder', 'replicate',
       'experiment_type', 'genotype_group', 'genotype_group_genotype',
       'trial_uid_id', 'genotype_group_genotype_nb', 'trial_uid_id_nb'],
      dtype='object')