In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import os
import seaborn as sns
import umap
import warnings
from collections import OrderedDict
from scipy import signal, stats
from mpl_toolkits import mplot3d
from mpl_toolkits.mplot3d import axes3d

plt.rcParams.update({'figure.max_open_warning': 0})
warnings.simplefilter("ignore")

%run /media/turritopsis/katie/grooming/t1-grooming/grooming_functions.ipynb
%matplotlib inline

# for figure styling
sns.set()
sns.set_style('ticks')

In [None]:
behavior = 't1_grooming'
prefix = '/media/turritopsis/katie/grooming/summaries'
data_path = os.path.join(prefix, 'lines-' + behavior + '_onball_processed.parquet')
data = pd.read_parquet(os.path.join(data_path), engine='fastparquet')

In [None]:
fps = 300.0 # know this for this dataset

# get the joints to analyze
bodyparts = np.array(['L1A', 'L1B', 'L1C', 'L1D', 'L1E', 
                      'L2A', 'L2B', 'L2C', 'L2D', 'L2E', 
                      'L3A', 'L3B', 'L3C', 'L3D', 'L3E', 
                      'R1A', 'R1B', 'R1C', 'R1D', 'R1E', 
                      'R2A', 'R2B', 'R2C', 'R2D', 'R2E',
                      'R3A', 'R3B', 'R3C', 'R3D', 'R3E'])

bodyparts = [x.replace('_error', '') for x in data.columns if '_error' in x]
bodyparts_xyz = [bp + '_' + x for bp in bodyparts for x in ['x', 'y', 'z']]

angle_types = np.array(['_BC', '_flex', '_rot', '_abduct'])
angle_names_t1 = get_angle_names(data, angle_types, only_t1 = True)
angle_names = get_angle_names(data, angle_types, only_t1 = False)

angle_vars = [v for v in data.columns
              if some_contains(v, ['_flex', '_rot', '_x', '_y', '_z'])
              and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
              and v[1] == '1']

bout_numbers = np.unique(np.array(data.behavior_bout.astype(int)))
# bout_length_dict = get_bout_lengths(data)
# fly_dict = get_fly_id(data, bout_numbers)
# videos = get_videos(bout_numbers, data)
# fly_videos = fly_to_video(data)
# dif_flies = np.unique(list(fly_dict.values()))
# fly_data, fly_names_sorted = data_per_fly(data)

In [None]:
titles = {
    'A_flex': 'coxa flexion',
    'A_abduct': 'body-coxa abduction',
    'A_rot': 'coxa rotation',
    'B_flex': 'coxa-femur flexion',
    'B_rot': 'femur rotation',
    'C_flex': 'femur-tibia flexion',
    'C_rot': 'tibia rotation',
    'D_flex': 'tibia-tarsus flexion',
    '_BC': 'body-coxa abduction'
}

In [None]:
# plot all grooming angles across time (left and right on same plot) 
legs = ['L1', 'L2', 'L3', 'R1', 'R2', 'R3']
legs = ['L1', 'R1']
angle_names_u = np.unique([x[2:] for x in angle_names])
for j in range(len(angle_names_u)):
    for k in range(len(bout_numbers[:50])):
    
        fig = plt.figure(figsize = (8,4))
        plt.title(titles[angle_names_u[j]] + ' angles during t1 grooming (bout ' + str(int(bout_numbers[k])) + ')', fontsize = 14)     
        plt.xlabel('time (seconds)', fontsize = 14)
        plt.ylabel('angle (deg)', fontsize = 14)         
            
        bout = data[data.behavior_bout == bout_numbers[k]]
        for leg in legs:
            angle = np.array(bout.iloc[0:][leg + angle_names_u[j]])
            t = np.array(range(len(angle))) / fps
            plt.plot(t, angle, label = leg)
        
        plt.legend(fontsize = 12, loc=(1,0.5))    
        sns.despine()
        plt.show() 

In [None]:
bout_numbers[:50]

In [None]:
# plot all grooming angles across time (subplots) 
legs = ['L1', 'L2', 'L3', 'R1', 'R2', 'R3']
legs = ['L1', 'R1']
angle_types = ['_abduct', '_flex', '_rot']
angle_titles = ['abduction', 'flexion', 'rotation']
colors = sns.color_palette('Blues_r', 5)
rcolors = sns.color_palette('Reds_r', 5)
leg_colors = {'L1A':colors[0] , 'R1A':rcolors[0], 'L1B':colors[1], 'R1B':rcolors[1],
              'L1C':colors[2], 'R1C':rcolors[2], 'L1D':colors[3], 'R1D':rcolors[3]}

for k in range(len(bout_numbers[:50])):

    bout = data[data.behavior_bout == bout_numbers[k]]    
    fig, axs = plt.subplots(1, 3, sharex=True, sharey=True, figsize = (12, 2))
    ax = axs.T.flatten()
    plt.title('{}, bout {}'.format(bout.flyid.iloc[0] , str(int(bout_numbers[k]))), fontsize = 14)
    ax[0].set_yticks([-200, 0, 200])
    ax[0].set_ylim([-275, 200])
    ax[0].set_ylabel('angle (deg)', fontsize = 12)
    print(bout.filename.iloc[0])
    handles = []
    labels = []
    
    for i in range(len(angle_types)): 

        angle_vars = [v for v in data.columns if some_contains(v, [angle_types[i]])
                     and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
                     and v[:2] in legs] 

        for j in range(len(angle_vars)):
            
            angle = np.array(bout.iloc[0:][angle_vars[j]])
            t = np.array(range(len(angle))) / fps
            ax[i].plot(t, angle, label = angle_vars[j], color = leg_colors[angle_vars[j][:3]])
            ax[i].set_xlabel('time (seconds)', fontsize = 12)
            # ax[i].set_ylabel('{} angle (deg)'.format(angle_titles[i]), fontsize = 12)
            ax[i].set_title('{} angles'.format(angle_titles[i]), fontsize = 12)
            # ax[i].set_title('{}, bout {}'.format(bout.flyid.iloc[0] , str(int(bout_numbers[k]))), fontsize = 12)
            ax[i].tick_params(labelleft = True)
            
        hs, ls = ax[i].get_legend_handles_labels()
        handles.append(hs)
        labels.append(hs)     
            
    sns.despine()
    plt.show() 


In [None]:
# plot legends 
# labels = [x[:3] for x in labels]
for j in range(len(handles)):
    fig = plt.figure(figsize = (3,3))
    plt.legend(handles = handles[j], labels = labels, loc = 'center', fontsize = 12, ncol = 2)
    plt.axis('off')
    plt.tight_layout()
    plt.show()
    
labels = ['L1A', 'L1B', 'L1C', 'L1D', 'R1A', 'R1B', 'R1C', 'R1D']
fig = plt.figure(figsize = (3,3))
plt.legend(handles = handles[1], labels = labels, loc = 'center', fontsize = 12, ncol = 4)
plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# overlay grooming cycles from individual bouts, then determine an average grooming
# cycle for each angle (uses all oscillations in all bouts)
dist = 20
height = None
legs = ['L1', 'R1']
angle_vars = [v for v in data.columns
              if some_contains(v, ['_flex', '_abduct', '_rot', '_BC'])
              and not some_contains(v, ['_d1', '_d2', '_freq'])
              and v[:2] in legs]
angle_titles = ['flexion', 'abduction', 'rotation', 'body-coxa']

cycle_dict = dict()
length_dict = dict()

for i in range(len(angle_vars)): 

    all_cycles = []
    all_lengths = []
    
    for j in range(len(bout_numbers)):
        
        bout = data[data.behavior_bout == bout_numbers[j]]
        bout_angs = np.array(bout[angle_vars[i]])
        if len(bout_angs) < 500:
            continue
        idxs, props = signal.find_peaks(bout_angs, distance = dist, height = height)
        peaks = bout_angs[idxs]
        
        cycles = []
        lengths = []
        for k in range(len(peaks)-1):
            cycle = np.zeros(600)
            cycle[:] = np.nan
            period = bout_angs[idxs[k]:idxs[k+1]]
            cycle[:len(period)] = period
            cycles.append(cycle)
            lengths.append(len(period))
        
        all_cycles.extend(cycles)
        all_lengths.extend(lengths)
    
    cycle_dict[angle_vars[i]] = all_cycles
    length_dict[angle_vars[i]] = all_lengths

In [None]:
for j in range(len(angle_vars)): 
    
    all_cycles = cycle_dict[angle_vars[j]]
    all_lengths = length_dict[angle_vars[j]]
    
    fig = plt.figure(figsize = (8,4))
    plt.title('t1 grooming cycle of {} angles'.format(titles[angle_vars[j][2:]]), fontsize = 14)     
    plt.xlabel('time (seconds)', fontsize = 14)
    plt.ylabel('angle (deg)', fontsize = 14)         

    for k in range(len(all_cycles)):
        cycle = all_cycles[k][:all_lengths[k]]
        plt.plot(range(len(cycle)), cycle, linewidth=1)
        
    avg_cycle = np.nanmean(all_cycles, axis = 0)
    avg_cycle = avg_cycle[:max(all_lengths)]
    plt.plot(range(len(avg_cycle)), avg_cycle, label = 'average', color = 'k')
    plt.xlim([0, np.percentile(all_lengths, 80)])

    plt.legend(fontsize = 12, loc=(1.02,0.2))    
    sns.despine()
    plt.show() 

In [None]:
# overlay grooming cycles from individual bouts, then determine an average grooming
# cycle for each angle (uses one oscillation per bout, taken from middle or max range)
dist = 20
height = None
legs = ['L1', 'R1']
angle_vars = [v for v in data.columns
              if some_contains(v, ['_flex', '_abduct', '_rot', '_BC'])
              and not some_contains(v, ['_d1', '_d2', '_freq'])
              and v[:2] in legs]
angle_titles = ['flexion', 'abduction', 'rotation', 'body-coxa']

cycle_dict = dict()
length_dict = dict()

for i in range(len(angle_vars)): 

    all_cycles = []
    all_lengths = []
    
    for j in range(len(bout_numbers)):
        
        bout = data[data.behavior_bout == bout_numbers[j]]
        bout_angs = np.array(bout[angle_vars[i]])
        if len(bout_angs) < 300:
            continue
        idxs, props = signal.find_peaks(bout_angs, distance = dist, height = height)
        peaks = bout_angs[idxs]
        
        if len(peaks) < 2:
            continue
        
        cycles = []
        lengths = []
        ranges = []
        for k in range(len(peaks)-1):
            cycle = np.zeros(600)
            cycle[:] = np.nan
            period = bout_angs[idxs[k]:idxs[k+1]]
            cycle[:len(period)] = period
            cycles.append(cycle)
            lengths.append(len(period))
            # ranges.append(rng)
        
        # max_range = np.argmax(ranges)        
        all_cycles.append(cycles[len(cycles)//2])
        all_lengths.append(lengths[len(lengths)//2])
        # all_cycles.append(cycles[max_range])
        # all_lengths.append(lengths[max_range])
    
    cycle_dict[angle_vars[i]] = all_cycles
    length_dict[angle_vars[i]] = all_lengths

In [None]:
for j in range(len(angle_vars)): 
    
    all_cycles = cycle_dict[angle_vars[j]]
    all_lengths = length_dict[angle_vars[j]]
    
    fig = plt.figure(figsize = (8,4))
    plt.title('t1 grooming cycle of {} {} angles'.format(angle_vars[j].split('_')[0][:2], titles[angle_vars[j][2:]]), fontsize = 14)     
    plt.xlabel('time (seconds)', fontsize = 14)
    plt.ylabel('angle (deg)', fontsize = 14)         

    for k in range(len(all_cycles)):
        cycle = all_cycles[k][:all_lengths[k]]
        plt.plot(range(len(cycle)), cycle, linewidth=1)
        
    avg_cycle = np.nanmean(all_cycles, axis = 0)
    avg_cycle = avg_cycle[:max(all_lengths)]
    plt.plot(range(len(avg_cycle)), avg_cycle, label = 'average', color = 'k')
    plt.xlim([0, np.percentile(all_lengths, 75)])

    plt.legend(fontsize = 12)    
    sns.despine()
    plt.show() 

In [None]:
# plot angle distributions for individual flies with the most data
fly_names = ['5_0 5272019','3_0 5222019','3_0 5272019','4_0 5222019','1_0 5242019',
             '2_0 5272019','1_0 5222019','4_0 5212019','1_0 5272019','4_0 5272019'] 
# fly_names = fly_names_sorted[:10]
colors = custom_cmap(13)
colors = sns.color_palette('Spectral', len(fly_names))
angle_names_u = np.unique([x[2:] for x in angle_names])
legs = ['L1', 'L2', 'L3', 'R1', 'R2', 'R3']
legs = ['L1', 'R1']

for i in range(len(angle_names_u)): 
    
    fig, axs = plt.subplots(int(len(legs)/2), 2, sharex = True, figsize = (15,3*int(len(legs)/2)))
    fig.suptitle('distribution of ' + titles[angle_names_u[i]] + ' angles during t1 grooming', y = 0.97)
    ax = axs.T.flatten()
    
    for j in range(len(legs)):
        
        ax[j].set_ylabel('PDF')
        ax[j].set_xlabel(legs[j] + ' ' + titles[angle_names_u[i]] + ' (deg)')
        
        for k in range(len(fly_names)):
            
            fly_data = data[data['flyid'] == fly_names[k]]
            t1 = fly_data.iloc[0:][legs[j] + angle_names_u[i]]
            t1 = t1[np.isfinite(t1)] # ignores nans 
            if len(t1) <= 1: 
                continue        

            kernel_t1 = stats.gaussian_kde(t1)    
            t1 = np.linspace(np.percentile(t1, 1), np.percentile(t1, 99), 500)
            height_t1 = kernel_t1.pdf(t1)                                                     
            ax[j].plot(t1, height_t1, label = fly_names[k], color = colors[k])
            ax[j].tick_params(labelbottom = True)
            
        # plot average distribution for all flies
        t1 = data.iloc[0:][legs[j] + angle_names_u[i]]
        t1 = t1[np.isfinite(t1)] # ignores nans   
        kernel_t1 = stats.gaussian_kde(t1)    
        t1 = np.linspace(np.percentile(t1, 1), np.percentile(t1, 99), 500)
        height_t1 = kernel_t1.pdf(t1)                                                     
        ax[j].plot(t1, height_t1, label = 'all flies', color = 'k')
            
    plt.subplots_adjust(wspace = 0.2, hspace = 0.3)
    plt.legend(bbox_to_anchor = (1.4, 1.03))
    plt.show()
    

In [None]:
# plot angle distributions for individual flies with the most data
fly_names = ['5_0 5272019','3_0 5222019','3_0 5272019','4_0 5222019','1_0 5242019',
             '2_0 5272019','1_0 5222019','4_0 5212019','1_0 5272019','4_0 5272019'] 
# fly_names = fly_names_sorted[:10]
cmap = plt.get_cmap('Spectral')
n_colors = 10
colors = [cmap(i/(n_colors-1.9999)) for i in range(n_colors)]
colors[3] = adjust_color('y', 1.3)
# colors[4] = adjust_color('y', 1.7)
colors[4] = 'g'
colors[5] = adjust_color('g', 1.3)
# colors[7] = adjust_color('g', 1.3)
colors[-1] = adjust_color('#5636a7', 1.3)
# colors = sns.color_palette('Spectral', len(fly_names))
angle_names_u = np.unique([x[2:] for x in angle_names])
legs = ['L1', 'L2', 'L3', 'R1', 'R2', 'R3']
legs = ['L1']

for i in range(len(angle_names_u)): 
    
    fig, axs = plt.subplots(1, 1, sharex = True, figsize = (10,2))
    ax = [axs]
    
    for j in range(len(legs)):
        
        ax[j].set_ylabel('PDF', fontsize = 14)
        ax[j].set_xlabel(legs[j] + ' ' + titles[angle_names_u[i]] + ' angle (deg)', fontsize = 14)
        
        for k in range(len(fly_names)):
            
            fly_data = data[data['flyid'] == fly_names[k]]
            t1 = fly_data.iloc[0:][legs[j] + angle_names_u[i]]
            t1 = t1[np.isfinite(t1)] # ignores nans 
            if len(t1) <= 1: 
                continue        

            kernel_t1 = stats.gaussian_kde(t1)    
            t1 = np.linspace(np.percentile(t1, 1), np.percentile(t1, 99), 500)
            height_t1 = kernel_t1.pdf(t1)                                                     
            ax[j].plot(t1, height_t1, label = fly_names[k], color = colors[k], linewidth = 1)
            ax[j].tick_params(labelbottom = True)
            
        # plot average distribution for all flies
        t1 = data.iloc[0:][legs[j] + angle_names_u[i]]
        t1 = t1[np.isfinite(t1)] # ignores nans   
        kernel_t1 = stats.gaussian_kde(t1)    
        t1 = np.linspace(np.percentile(t1, 1), np.percentile(t1, 99), 500)
        height_t1 = kernel_t1.pdf(t1)                                                     
        ax[j].plot(t1, height_t1, label = 'all flies', color = 'k', linewidth = 2)
        
    hs, ls = ax[j].get_legend_handles_labels()         
    plt.subplots_adjust(wspace = 0.2, hspace = 0.3)
    # plt.legend(bbox_to_anchor = (1.4, 1.03))
    sns.despine()
    plt.show()

In [None]:
fig = plt.figure(figsize = (3,3))
plt.legend(handles = hs, labels = ls, loc = 'center', fontsize = 12, ncol = 3)
plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
def get_url(flyid, filename):
    url_prefix = 'http://128.95.10.233:5000'
    session, _, folder = flyid.partition('_')
    url = '{}/#{}/Fly {}/{}'.format(
        url_prefix, session, folder, filename)
    url = url.replace(' ', '%20')
    return url


flyid = '3_0 522019'
flyid = '1_0 6182019'
videos = fly_videos[flyid]
for fname in videos:
    print(fname + ' ({})'.format(len(data[data.filename == fname])))
    #print(get_url(flyid, filename))

In [None]:
# add derivative columns to data 
dt = 1/fps
s = 1.0/dt
s2 = 1.0 / (dt * dt)

for j in range(len(bout_numbers)):
    mask = data.behavior_bout == bout_numbers[j]
    bout_df = data.loc[mask]
    for ang in angle_names:
        bout = np.array(bout_df[ang])
        data.loc[mask, ang + '_d1'] = signal.savgol_filter(bout, 5, 3, deriv=1) * s
        data.loc[mask, ang + '_d2'] = signal.savgol_filter(bout, 5, 3, deriv=2) * s2

In [None]:
# plot derivatives of the angle distributions for individual flies with the most data
fly_data, fly_names_sorted = data_per_fly(data)
fly_names = fly_names_sorted[:13]
angle_names_u = np.unique([x[2:] for x in angle_names])
legs = ['L1', 'L2', 'L3', 'R1', 'R2', 'R3']
legs = ['L1', 'R1']
dt = 1/fps
s = 1.0/dt
s2 = 1.0 / (dt * dt)   

for i in range(len(angle_names_u)): 
    
    fig, axs = plt.subplots(int(len(legs)/2), 2, sharex = True, figsize = (15,3*int(len(legs)/2)))
    fig.suptitle('distribution of ' + titles[angle_names_u[i]] + ' speeds during t1 grooming', y = 0.97)
    ax = axs.T.flatten()
    
    for j in range(len(legs)):
        
        ax[j].set_ylabel('PDF')
        ax[j].set_xlabel(legs[j] + ' ' + titles[angle_names_u[i]] + ' velocity (deg/s)')
        
        for k in range(len(fly_names)):
            
            fly_data = data[data['flyid'] == fly_names[k]]
            t1 = fly_data.iloc[0:][legs[j] + angle_names_u[i]]
            t1 = signal.savgol_filter(t1, 5, 3, deriv=1) * s
            t1 = t1[np.isfinite(t1)] # ignores nans 
            if len(t1) <= 1: 
                continue        

            kernel_t1 = stats.gaussian_kde(t1)    
            t1 = np.linspace(np.percentile(t1, 5), np.percentile(t1, 95), 1000)
            height_t1 = kernel_t1.pdf(t1)                                                     
            ax[j].plot(t1, height_t1, label = fly_names[k], color = colors[k])
            ax[j].tick_params(labelbottom = True)
            plt.locator_params(axis='x', nbins=5)
            
    plt.subplots_adjust(wspace = 0.2, hspace = 0.3)
    #plt.legend(bbox_to_anchor = (1.4, 1.03))
    plt.show() 
    

In [None]:
# plot velocity based on the direction of motion 

def get_direction(data, align_to):  
    velocity = np.array(data[align_to])
    forward = velocity > 0
    backward = velocity < 0
    return forward, backward


# plot first derivative of the angle distributions for individual flies with the most data
legs = ['L1', 'R1']
angle_vars = [v for v in data.columns
              if some_contains(v, ['_flex'])
              and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
              and v[:2] in legs]

align_to = 'C_flex_d1'
fly_names = fly_names_sorted[:13]
angle_names_u = np.unique([x[2:] for x in angle_names])
dt = 1/fps
s = 1.0/dt
s2 = 1.0 / (dt * dt)

for i in range(len(angle_names_u)): 
    
    fig, axs = plt.subplots(int(len(legs)/2), 2, sharex = True, figsize = (15,3*int(len(legs)/2)))
    fig.suptitle('distribution of ' + titles[angle_names_u[i]] + ' velocity during t1 grooming', y = 0.99)
    ax = axs.T.flatten()
    
    for j in range(len(legs)):
        
        ax[j].set_ylabel('PDF')
        ax[j].set_xlabel(legs[j] + ' ' + titles[angle_names_u[i]] + ' velocity (deg/s)')
        
        for k in range(len(fly_names)):
            
            fly_data = data[data['flyid'] == fly_names[k]]
            forward, backward = get_direction(fly_data, legs[j] + align_to)
            direction = [forward, backward]
            cs = ['xkcd:blood red', 'xkcd:tangerine']
            labels = ['forward', 'backward']
            for n in range(len(direction)):
                mask = direction[n]
                t1 = np.array(fly_data.iloc[0:][legs[j] + angle_names_u[i]][mask])
                t1 = signal.savgol_filter(t1, 5, 3, deriv=1) * s
                t1 = t1[np.isfinite(t1)] # ignores nans       

                kernel_t1 = stats.gaussian_kde(t1)    
                t1 = np.linspace(np.percentile(t1, 5), np.percentile(t1, 95), 1000)
                height_t1 = kernel_t1.pdf(t1)                                                     
                ax[j].plot(t1, height_t1, color = cs[n], label = labels[n] if k == 0 else '')
                ax[j].tick_params(labelbottom = True)
                plt.locator_params(axis='x', nbins=5)
            
    plt.subplots_adjust(wspace = 0.2, hspace = 0.3)
    plt.legend()
    plt.show()
    

In [None]:
# plot velocity based on the direction of motion 

def get_direction(data, align_to):  
    velocity = np.array(data[align_to])
    forward = velocity > 0
    backward = velocity < 0
    return forward, backward


# plot first derivative of the angle distributions for individual flies with the most data
legs = ['L1', 'R1']
angle_vars = [v for v in data.columns
              if some_contains(v, ['_flex'])
              and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
              and v[:2] in legs]

align_to = 'C_flex_d1'
fly_names = fly_names_sorted[:15]
angle_names_u = np.unique([x[2:] for x in angle_names])
dt = 1/fps
s = 1.0/dt
s2 = 1.0 / (dt * dt)

for i in range(len(angle_names_u)): 
    
    fig, axs = plt.subplots(int(len(legs)/2), 2, sharex = True, figsize = (15,3*int(len(legs)/2)))
    fig.suptitle('distribution of ' + titles[angle_names_u[i]] + ' velocity during t1 grooming', y = 0.99)
    ax = axs.T.flatten()
    
    for j in range(len(legs)):
        
        ax[j].set_ylabel('PDF')
        ax[j].set_xlabel(legs[j] + ' ' + titles[angle_names_u[i]] + ' velocity (deg/s)')
        
        forward, backward = get_direction(data, legs[j] + align_to)
        direction = [forward, backward]
        cs = ['xkcd:blood red', 'xkcd:tangerine']
        labels = ['extension', 'flexion']
        for n in range(len(direction)):
            mask = direction[n]
            t1 = np.array(data.iloc[0:][legs[j] + angle_names_u[i]][mask])
            t1 = signal.savgol_filter(t1, 5, 3, deriv=1) * s
            t1 = t1[np.isfinite(t1)] # ignores nans       

            kernel_t1 = stats.gaussian_kde(t1)    
            t1 = np.linspace(np.percentile(t1, 5), np.percentile(t1, 95), 1000)
            height_t1 = kernel_t1.pdf(t1)                                                     
            ax[j].plot(t1, height_t1, color = cs[n], label = labels[n])
            ax[j].tick_params(labelbottom = True)
            plt.locator_params(axis='x', nbins=5)
            
    plt.subplots_adjust(wspace = 0.2, hspace = 0.3)
    plt.legend()
    plt.show()
    

In [None]:
# plot derivatives of the angle distributions (pooled data)
fly_names = fly_names_sorted[:13]
angle_names_u = np.unique([x[2:] for x in angle_names])
legs = ['L1', 'L2', 'L3', 'R1', 'R2', 'R3']
legs = ['L1', 'R1']
dt = 1/fps
s = 1.0/dt
s2 = 1.0 / (dt * dt)   

for i in range(len(angle_names_u)): 
    
    fig, axs = plt.subplots(int(len(legs)/2), 2, sharex = True, figsize = (15,3*int(len(legs)/2)))
    fig.suptitle('distribution of ' + titles[angle_names_u[i]] + ' speeds during t1 grooming', y = 0.97)
    ax = axs.T.flatten()
    
    for j in range(len(legs)):
        
        ax[j].set_ylabel('PDF')
        ax[j].set_xlabel(legs[j] + ' ' + titles[angle_names_u[i]] + ' velocity (deg/s)')
        
        t1 = data.iloc[0:][legs[j] + angle_names_u[i]]
        t1 = signal.savgol_filter(t1, 5, 3, deriv=1) * s
        t1 = t1[np.isfinite(t1)] # ignores nans        

        kernel_t1 = stats.gaussian_kde(t1)    
        t1 = np.linspace(np.percentile(t1, 5), np.percentile(t1, 95), 1000)
        height_t1 = kernel_t1.pdf(t1)                                                     
        ax[j].plot(t1, height_t1, color = 'k')
        ax[j].tick_params(labelbottom = True)
        plt.locator_params(axis='x', nbins=5)
            
    plt.subplots_adjust(wspace = 0.2, hspace = 0.3)
    plt.show() 
    

In [None]:
# plot second derivative of the angle distributions (pooled data)
fly_names = fly_names_sorted
angle_names_u = np.unique([x[2:] for x in angle_names])
legs = ['L1', 'L2', 'L3', 'R1', 'R2', 'R3']
legs = ['L1', 'R1']
dt = 1/fps
s = 1.0/dt
s2 = 1.0 / (dt * dt)

for i in range(len(angle_names_u)): 
    
    fig, axs = plt.subplots(int(len(legs)/2), 2, sharex = True, figsize = (15,3*int(len(legs)/2)))
    fig.suptitle('distribution of ' + titles[angle_names_u[i]] + ' acceleration during t1 grooming', y = 0.99)
    ax = axs.T.flatten()
    
    for j in range(len(legs)):
        
        ax[j].set_ylabel('PDF')
        ax[j].set_xlabel(legs[j] + ' ' + titles[angle_names_u[i]] + ' acceleration (deg/$s^2$)')
        t1 = np.array(data.iloc[0:][legs[j] + angle_names_u[i]])
        t1 = signal.savgol_filter(t1, 5, 3, deriv=2) * s2
        t1 = t1[np.isfinite(t1)] # ignores nans       

        kernel_t1 = stats.gaussian_kde(t1)    
        t1 = np.linspace(np.percentile(t1, 5), np.percentile(t1, 95), 1000)
        height_t1 = kernel_t1.pdf(t1)                                                     
        ax[j].plot(t1, height_t1, color = 'k')
        ax[j].tick_params(labelbottom = True)
        plt.locator_params(axis='x', nbins=5)
            
    plt.subplots_adjust(wspace = 0.2, hspace = 0.3)
    plt.show()
    

In [None]:
# compute and save average grooming frequencies for each fly and session. 
# maybe include frequency from peak finding and using signal.welch.
# plot power spectral density for each bout and find max freq

# choose one angle to determine the grooming frequency from 
# (avg freq of this joint across all bouts associated with the fly)

angle = 'R1C_flex' # 'R1C_flex'
cols = ['date', 'fly', 'grooming_freq_welch', 'grooming_freq_period', 'grooming_cycles', 'grooming_bouts']
freq_df = pd.DataFrame(columns = cols)
fly_ids = np.unique(list(fly_dict.values()))
dist = 25

for i in range(len(fly_ids)):
    
    fly_data = data[data['flyid'] == fly_ids[i]]
    bout_nums = np.unique(fly_data.behavior_bout)
    fly_freqs_welch = []
    fly_freqs_period = []
    grooming_cycles = 0
    
    for j in range(len(bout_nums)):
        
        bout = data[data.behavior_bout == bout_nums[j]]
        t1 = bout.iloc[0:][angle]
        t1 = np.array(t1[np.isfinite(t1)])
        if len(t1) <= 1: 
            continue

        f, pxx = signal.welch(t1, fs=300, nperseg=1024)  
        pxx = pxx - np.mean(pxx)
        fly_freqs_welch.append(f[np.argmax(pxx)])
        
        peaks, props = signal.find_peaks(t1, height = None, distance = dist)
        mean_int, stderr_int, intervals = mean_peak_interval(t1, fps, dist = dist)
        fly_freqs_period.append(1/mean_int)
        grooming_cycles += len(peaks)
     
    avg_freq_welch = np.nanmean(fly_freqs_welch)
    avg_freq_period = np.nanmean(fly_freqs_period)
    
    row = dict()
    row[cols[0]] = fly_ids[i].split()[1]
    row[cols[1]] = fly_ids[i].split()[0]
    row[cols[2]] = avg_freq_welch
    row[cols[3]] = avg_freq_period
    row[cols[4]] = grooming_cycles
    row[cols[5]] = len(bout_numbers)
    freq_df = freq_df.append(row, ignore_index = True)

csv_name = os.path.join(out_path, 'grooming_freqs_' + angle + '.csv')
freq_df.to_csv(csv_name, index = False)

In [None]:
freq_df

In [None]:
grooming_freq_path = os.path.join(out_path, 'grooming_freqs_R1B_flex.csv')
walking_freq_path = os.path.join(out_path, 'walking_freqs.csv')
grooming_freqs = pd.read_csv(grooming_freq_path)
walking_freqs = pd.read_csv(walking_freq_path)

# adjust dates 
dates = list(walking_freqs.date.str.split('.'))
dates_new = []
for j in range(len(walking_freqs)):
    date = dates[j]
    month = date[0]
    day = date[1]
    if len(day) == 1:
        day = '0' + str(day)
    year = '20' + date[2]
    dates_new.append(str(month) + str(day) + str(year))
walking_freqs['date'] = dates_new

walking_freqs['flyid'] = walking_freqs.date + ' ' + walking_freqs.fly
grooming_freqs['date'] = grooming_freqs.date.astype(str)
grooming_freqs['flyid'] = grooming_freqs.date + ' ' + grooming_freqs.fly

cols = list(grooming_freqs.columns[~grooming_freqs.columns.isin(walking_freqs.columns)])
cols.extend(['flyid'])
freqs = walking_freqs.merge(grooming_freqs[cols], on = 'flyid', how = 'left')


In [None]:
# scatterplot of fly walking frequencies vs fly grooming frequencies
# welch method for grooming
freqs1 = freqs[~freqs.grooming_freq_welch.isnull()]
grooming_freqs_w = freqs1.grooming_freq_welch
walking_freqs_w = freqs1.walking_freq

fig = plt.figure()
ax = plt.gca()
ax.set_title('grooming and walking frequencies of individual flies')
ax.scatter(grooming_freqs_w, walking_freqs_w, facecolor = 'w', edgecolor = 'k')
ax.set_xlabel('t1 grooming frequency')
ax.set_ylabel('step frequency')
plt.show()

# grooming period method
freqs2 = freqs[~freqs.grooming_freq_period.isnull()]
grooming_freqs_p = freqs2.grooming_freq_period
walking_freqs_p = freqs2.walking_freq

fig = plt.figure()
ax = plt.gca()
ax.set_title('grooming and walking frequencies of individual flies')
ax.scatter(grooming_freqs_p, walking_freqs_p, facecolor = 'w', edgecolor = 'k')
ax.set_xlabel('t1 grooming frequency')
ax.set_ylabel('step frequency')
plt.show()

In [None]:
# make a column indicating where stim is on/off
data['stim'] = 0
start = int(0.5*fps)
for j in range(len(bout_numbers)):
    bout = data[data.behavior_bout == bout_numbers[j]]
    fnums = np.array(bout.fnum)
    stimlen = np.array(bout.stimlen)[0]
    end = int(start + stimlen*fps)
    stim = (fnums >= start) & (fnums <= end)
    stim = np.array(stim).astype(int) 
    data['stim'].mask(data['behavior_bout'] == bout_numbers[j], stim, inplace=True)
    

In [None]:
summary_path = '/media/turritopsis/katie/grooming'
summary_fname = os.path.join(summary_path, 'fly_summary_2_final.csv')
clean_summary(summary_path, 'fly_summary_2.csv', 'fly_summary_2_final.csv')
struct_name = 'Berlin-kinematic-data'

summary = pd.read_csv(summary_fname)
summary = parse_date(summary)
summary['flyid'] = summary['Fly #'] + ' ' +  summary['date']
check = summary['Structure Name'] == struct_name
summary_good = summary.loc[check]

to_load_rows = []
sessions_flies = defaultdict(set)
metadata = dict()

for ix in range(len(summary_good)):
    row = summary_good.iloc[ix]
    row = dict(row)
    key = (row['date'], row['Fly #'])
    metadata[key] = row
    sessions_flies[row['date']].add(row['Fly #'])
    
data = data.merge(summary[['Structure Name', 'flyid']], on = 'flyid', how ='left')

In [None]:
# find and plot angles for bouts with stim
legs = ['L1', 'L2', 'L3', 'R1', 'R2', 'R3']
legs = ['L1', 'R1']
angle_names_u = np.unique([x[2:] for x in angle_names])
struct_names = set(data['Structure Name'])

for struct in struct_names:
    
    if struct == 'Berlin-kinematic-data':
        continue
    struct_data = data[data['Structure Name'] == struct]

    for i in range(len(bout_numbers)):

        bout = struct_data[data.behavior_bout == bout_numbers[i]]
        if np.sum(bout.stim) < 1 or np.array(bout.stimlen)[0] == 0: 
            continue

        for j in range(len(angle_names_u)):

            fig = plt.figure(figsize = (8,4))
            plt.title(titles[angle_names_u[j]] + ' angles (' + struct + ', bout ' + str(int(bout_numbers[i])) + ', ' + str(np.array(bout.stimlen)[0]) + 's stimulus)', fontsize = 14)
            plt.xlabel('time (seconds)', fontsize = 14)
            plt.ylabel('angle (deg)', fontsize = 14)          

            colors = ['r', 'b']
            stim_colors = [adjust_color('r', 1.5), adjust_color('b', 1.5)]
            for k in range(len(legs)):
                angle = np.array(bout.iloc[0:][legs[k] + angle_names_u[j]])
                t = np.array(range(len(angle))) / fps
                plt.plot(t, angle, c = colors[k], label = legs[k])
                plt.plot(t[bout.stim == 1], angle[bout.stim == 1], c = stim_colors[k], label = legs[k] + ' (stim)')

            plt.axvspan(t[bout.stim == 1][0], t[bout.stim == 1][-1], color='y', alpha=0.7, lw=0)
            plt.legend(fontsize = 12, loc=(1,0.5))    
            sns.despine()
            plt.show() 
    

In [None]:
# plot distribution of bout lengths (all flies)
bout_length_dict = get_bout_lengths(data)
bout_lengths = np.array(list(bout_length_dict.values()))
figure = plt.figure(figsize = (8,4))
plt.title('distribution of t1 grooming bout lengths', fontsize = 14)
xs, ys = get_kde_vals(bout_lengths)
plt.plot(xs, ys, 'k')
plt.xlabel('bout length (number of frames)', fontsize = 14)
plt.ylabel('PDF', fontsize = 14)
plt.xlim([0, 600])
plt.show()


In [None]:
# plot distribution of bout lengths (individual flies)
fly_data, fly_names_sorted = data_per_fly(data)
fly_names = fly_names_sorted[:13]
colors = custom_cmap(len(fly_names))
figure = plt.figure(figsize = (8,4))
plt.title('distribution of t1 grooming bout lengths', fontsize = 14)

for j in range(len(fly_names)):
    
    fly_data = data[data.flyid == fly_names[j]]
    bout_length_dict = get_bout_lengths(fly_data)
    bout_lengths = np.array(list(bout_length_dict.values()))
    xs, ys = get_kde_vals(bout_lengths)
    plt.plot(xs, ys, color = colors[j], label = fly_names[j])
    
plt.xlabel('bout length (number of frames)', fontsize = 14)
plt.ylabel('PDF', fontsize = 14)
plt.xlim([0, 600])
plt.ylim([0, 0.0035])
plt.legend(loc = (1.02, 0))
plt.show()

In [None]:
# plot average bout lengths (individual flies)
fly_data, fly_names = data_per_fly(data)
bout_length_avg = np.zeros(len(fly_names))
bout_length_stderr = np.zeros(len(fly_names))

for j in range(len(fly_names)):
    
    fly_data = data[data.flyid == fly_names[j]]
    bout_length_dict = get_bout_lengths(fly_data)
    bout_lengths = np.array(list(bout_length_dict.values()))
    bout_length_avg[j] = np.nanmean(bout_lengths)
    bout_length_stderr[j] = stats.sem(bout_lengths, nan_policy = 'omit')

idxs = np.where(~pd.isnull(bout_length_avg))[0]
fly_names = np.take(fly_names, idxs)  
bout_length_avg = np.take(bout_length_avg, idxs)
bout_length_stderr = np.take(bout_length_stderr, idxs)
sort_idxs = np.argsort(bout_length_avg)[::-1]
fly_names_sorted = np.take(fly_names, sort_idxs)
bout_len_sorted = np.take(bout_length_avg, sort_idxs)
bout_len_err_sorted = np.take(bout_length_stderr, sort_idxs)


fig = plt.figure(figsize = (18,5), dpi = 200)
ax = plt.gca()
plt.title('average t1 grooming bout lengths for individual flies', fontsize = 14) 
plt.xlabel('fly ids', fontsize = 14)
plt.ylabel('average bout length (frames)', fontsize = 14)
plt.bar(range(len(fly_names_sorted)), bout_len_sorted, color = 'k')
plt.errorbar(range(len(fly_names_sorted)), bout_len_sorted, bout_len_err_sorted, color = 'k', capsize = 2, ls = 'none')
ax.set_xticks(range(len(fly_names_sorted)))
ax.set_xticklabels(fly_names_sorted, rotation = 90)
plt.show()

# color berlin wt flies
fig = plt.figure(figsize = (20,5), dpi = 200)
ax = plt.gca()
plt.title('average t1 grooming bout lengths for individual flies', fontsize = 14) 
plt.xlabel('fly ids', fontsize = 14)
plt.ylabel('average bout length (frames)', fontsize = 14)
wt_flies = ['5222019', freq'1_0 5242019', '5272019']
barlist = plt.bar(range(len(fly_names_sorted)), bout_len_sorted, color = 'k')
plt.errorbar(range(len(fly_names_sorted)), bout_len_sorted, bout_len_err_sorted, color = 'k', capsize = 2, ls = 'none')
for j in range(len(fly_names_sorted)):
    if fly_names_sorted[j].split()[1] in wt_flies:
        barlist[j].set_color('xkcd:gray')
ax.set_xticks(range(len(fly_names_sorted)))
ax.set_xticklabels(fly_names_sorted, rotation = 90)
handles = [Patch(color = 'xkcd:gray', label='berlin wild type'), Patch(color = 'k', label='other')]
plt.legend(handles = handles, fontsize = 12)
plt.show()

In [None]:
# plot total number of frames from all flies 
fly_data, fly_names_sorted = data_per_fly(data)
fly_frames = []
for j in range(len(fly_names_sorted)):
    fly_frames.append(fly_data[fly_names_sorted[j]])
fig = plt.figure(figsize = (20,5), dpi = 200)
ax = plt.gca()
plt.title('amount of t1 grooming for individual flies', fontsize = 14) 
plt.xlabel('fly ids', fontsize = 14)
plt.ylabel('total grooming frames', fontsize = 14)
wt_flies = ['5222019', '1_95242019', '5272019']
barlist = plt.bar(range(len(fly_names_sorted)), fly_frames, color = 'k')
for j in range(len(fly_names_sorted)):
    if fly_names_sorted[j].split()[1] in wt_flies:
        barlist[j].set_color('xkcd:gray')
ax.set_xticks(range(len(fly_names_sorted)))
ax.set_xticklabels(fly_names_sorted, rotation = 90)
handles = [Patch(color = 'xkcd:gray', label='berlin wild type'), Patch(color = 'k', label='other')]
plt.legend(handles = handles, fontsize = 12)
plt.show()