###### determine how to manually classify t1 grooming vs head grooming 

features that can be used to distinguish these two types of grooming:
   1. average range of the rotation angles, most distinguishable for L1B (higher for head grooming)
   2. average range of the L1A flexion angles (higher for head grooming)
   3. average range of the L1E z-coordinates (higher for head grooming)
   4. average z position of L1D and L1E (lower for head grooming)
    
design a metric that combines each of the above features. calculate the score for each bout, and if 
the score is above a certain threshold, the bout will be classified as head grooming

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import os
import seaborn as sns
import umap
import csv
import warnings
from collections import OrderedDict
from collections import defaultdict
from scipy import signal, stats
from mpl_toolkits import mplot3d
from mpl_toolkits.mplot3d import axes3d
from matplotlib.patches import Patch
from sklearn.preprocessing import MinMaxScaler

plt.rcParams.update({'figure.max_open_warning': 0})
warnings.simplefilter("ignore")

%run grooming_functions.ipynb
%matplotlib inline

# for figure styling and saving
sns.set()
sns.set_style('ticks')
out_path = '/media/turritopsis/katie/grooming/t1-grooming'

In [None]:
def get_url(flyid, filename):
    url_prefix = 'http://128.95.10.233:5000'
    session, _, folder = flyid.partition('_')
    url = '{}/#{}/Fly {}/{}'.format(url_prefix, session, folder, filename)
    url = url.replace(' ', '%20')
    return url

def set_box_color(bp, color):
    plt.setp(bp['boxes'], color=color)
    plt.setp(bp['whiskers'], color=color)
    plt.setp(bp['caps'], color=color)
    plt.setp(bp['medians'], color=color)

# max - min range
def get_range(data, angle_vars):

    bout_numbers = np.unique(data.behavior_bout.astype(int))
    for j in range(len(angle_vars)):
        row = []
        for i in range(len(bout_numbers)):
            bout_data = data[data.behavior_bout == bout_numbers[i]]
            bout_angles = np.array(bout_data[angle_vars[j]])
            max_peak = np.max(bout_angles)
            min_trough = np.min(bout_angles)
            avg_amp = np.abs(max_peak - min_trough)
            row.extend([avg_amp]*len(bout_angles))
        data[angle_vars[j] + '_range'] = row
    
    return data
            
# detect peaks and troughs to find the average amplitude
def get_average_range(data, ang_vars, dist = 20, height = None):
    
    bout_numbers = np.unique(data.behavior_bout.astype(int))
    for j in range(len(ang_vars)):
        row = []
        for i in range(len(bout_numbers)):
            bout_data = data[data.behavior_bout == bout_numbers[i]]
            bout_angles = np.array(bout_data[ang_vars[j]])
            peak_idxs, props = signal.find_peaks(bout_angles, distance = dist, height = height)
            peaks = bout_angles[peak_idxs]
            trough_idxs, props = signal.find_peaks(-1*bout_angles, distance = dist, height = height)
            troughs = bout_angles[trough_idxs]
            avg_amp = np.abs(np.nanmean(peaks) - np.nanmean(troughs))
            row.extend([avg_amp]*len(bout_angles))
        data[ang_vars[j] + '_avg_range'] = row
    
    return data
            
def get_bout_features(data, feature_names, flip, normalize = True):
    bout_numbers = np.unique(data.behavior_bout.astype(int))
    bout_features = np.zeros([len(bout_numbers), len(feature_names)])
    bout_data = data.groupby(['behavior_bout']).mean()
    for i in range(len(bout_numbers)):
        bout = bout_data[bout_data.index == bout_numbers[i]]
        features = []
        for j in range(len(feature_names)):
            f = bout[feature_names[j]]
            if flip[j]:
                f = -1*f
            features.append(f)
        bout_features[i, :] = features
        
    if normalize:
        scaler = MinMaxScaler()
        bout_features = scaler.fit_transform(bout_features)
        
    return bout_numbers, bout_features

def compute_thresh(lower_scores, higher_scores):
    dif = abs(np.min(higher_scores) - np.max(lower_scores)) / 2
    thresh = dif + np.max(lower_scores) 
    return thresh

# runs all the steps
def compute_grooming_scores(data, angle_vars, features, flip, dist=20, norm=False):
    data = get_range(data, angle_vars)
    data = get_average_range(data, angle_vars, dist = dist, height = None)
    bout_numbers, all_features = get_bout_features(data, features, flip, normalize = norm)
    all_scores = np.nanmean(all_features, axis = 1)
    data['grooming_score'] = np.nan
    for j in range(len(bout_numbers)):
        data.loc[(data.behavior_bout == bout_numbers[j]),'grooming_score'] = all_scores[j]
    return data

In [None]:
# load data
def adjust_rot_angles(angles, angle_names):
    conds = ['2', '3', 'L1A', 'L1B', 'L1C', 'R1A', 'R1B', 'R1C']
    offsets = np.array([-50, -20, 20, -70, 10, 20, 70, -30])
    for j in range(len(conds)):
        rot_angs = [r for r in angle_names if '_rot' in r and conds[j] in r]
        for ang in rot_angs:
            r = np.array(angles[ang])
            r[r > offsets[j]] = r[r > offsets[j]] - 360
            angles[ang] = r
        
    abduct_angs = [r for r in angle_names if '_abduct' in r or 'A_flex' in r]
    for ang in abduct_angs:
        r = np.array(angles[ang])
        r[r > 50] = r[r > 50] - 360
        angles[ang] = r
        
    return angles

prefix = r'/media/turritopsis/pierre/gdrive/latest/behavior/T1_grooming'
data_path = os.path.join(prefix, 'T1_grooming_all.csv.gz')
data = pd.read_csv(data_path, compression = 'gzip')
data['behavior_bout'] = data['T1_grooming_bout_number']
data['flyid'] = data['fly'].astype(str) + ' ' + data['date'].astype(str)
data = data[~data.behavior_bout.isnull()]
data = adjust_bout_numbers(data)
bout_length_dict = get_bout_lengths(data)
data = remove_short_bouts(data, 60)

fps = 300.0 # know this for this dataset

# get the joints to analyze
bodyparts = np.array(['L1A', 'L1B', 'L1C', 'L1D', 'L1E', 
                      'L2A', 'L2B', 'L2C', 'L2D', 'L2E', 
                      'L3A', 'L3B', 'L3C', 'L3D', 'L3E', 
                      'R1A', 'R1B', 'R1C', 'R1D', 'R1E', 
                      'R2A', 'R2B', 'R2C', 'R2D', 'R2E',
                      'R3A', 'R3B', 'R3C', 'R3D', 'R3E'])

bodyparts = [x.replace('_error', '') for x in data.columns if '_error' in x]
bodyparts_xyz = [bp + '_' + x for bp in bodyparts for x in ['x', 'y', 'z']]

angle_types = np.array(['_BC', '_flex', '_rot', '_abduct'])
angle_names_t1 = get_angle_names(data, angle_types, only_t1 = True)
angle_names = get_angle_names(data, angle_types, only_t1 = False)
data = correct_angles(data, angle_names)
data = adjust_rot_angles(data, angle_names)

bout_numbers = np.unique(np.array(data.behavior_bout))
fly_dict = get_fly_id(data, bout_numbers)
videos = get_videos(bout_numbers, data)
fly_videos = fly_to_video(data)
dif_flies = np.unique(list(fly_dict.values()))
fly_data, fly_names_sorted = data_per_fly(data)

In [None]:
titles = {
    'A_flex': 'coxa flexion',
    'A_abduct': 'body-coxa abduction',
    'A_rot': 'coxa rotation',
    'B_flex': 'coxa-femur flexion',
    'B_rot': 'femur rotation',
    'C_flex': 'femur-tibia flexion',
    'C_rot': 'tibia rotation',
    'D_flex': 'tibia-tarsus flexion',
    '_BC': 'body-coxa abduction'
}

hg_filenames = ['06132019_fly3_0 R1C10  str-ccw-0.06 sec', '06132019_fly3_0 R1C2  str-cw-0.03 sec',
                '06132019_fly3_0 R2C18  rot-cw-0.09 sec', '06132019_fly3_0 R2C6  str-cw-0.36 sec', 
                '06132019_fly3_0 R2C7  str-cw-0.72 sec', '05212019_fly4_0 R2C15  rot-cw-0 sec',
                '05212019_fly4_0 R3C6  str-cw-0.36 sec', '06262019_fly1_1 R1C17  rot-cw-0.06 sec',
                '06232019_fly6_0 R2C27  rot-ccw-0.36 sec', '05242019_fly3_0 R1C26  rot-ccw-0.18 sec', 
                '06172019_fly2_0 R3C27  rot-ccw-0.36 sec', '06242019_fly6_0 R3C18  rot-cw-0.09 sec',
                '06262019_fly1_1 R1C17  rot-cw-0.06 sec', '07022019_fly3_0 R1C2  str-cw-0.03 sec']

t1_filenames = ['05222019_fly1_0 R1C3  str-cw-0.06 sec', '05222019_fly1_0 R2C17  rot-cw-0.06 sec',
                '05222019_fly1_0 R3C25  rot-ccw-0.09 sec', '05222019_fly3_0 R1C27  rot-ccw-0.36 sec',
                '05222019_fly3_0 R2C1  str-cw-0 sec', '05242019_fly1_0 R3C17  rot-cw-0.06 sec',
                '05272019_fly1_0 R1C11  str-ccw-0.09 sec', '05272019_fly2_0 R1C3  str-cw-0.06 sec',
                '05272019_fly2_0 R1C12  str-ccw-0.18 sec', '05272019_fly2_0 R1C14  str-ccw-0.72 sec',
                '05212019_fly4_0 R1C15  rot-cw-0 sec', '05212019_fly4_0 R2C26  rot-ccw-0.18 sec',
                '06112019_fly3_0 R3C28  rot-ccw-0.72 sec', '06202019_fly2_0 R2C16  rot-cw-0.03 sec', 
                '06242019_fly2_1 R2C10  str-ccw-0.06 sec', '06242019_fly6_0 R2C8  str-ccw-0 sec',
                '06242019_fly8_0 R1C3  str-cw-0.06 sec', '06142019_fly3_0 R2C25  rot-ccw-0.09 sec']

In [None]:
angle_vars = [v for v in data.columns
              if some_contains(v, ['_flex', '_rot', '_x', '_y', '_z'])
              and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
              and v[:2] == 'L1']

hg_data = data[data.filename.isin(hg_filenames)]
t1_data = data[data.filename.isin(t1_filenames)]

hg_data = get_range(hg_data, angle_vars)
t1_data = get_range(t1_data, angle_vars)
hg_data = get_average_range(hg_data, angle_vars, dist = 20, height = None)
t1_data = get_average_range(t1_data, angle_vars, dist = 20, height = None)

In [None]:
# head grooming
# plot all grooming angles across time (subplots) 
legs = ['L1', 'R1']
angle_types = ['_abduct', '_flex', '_rot']
angle_titles = ['abduction', 'flexion', 'rotation']
bout_numbers = np.unique(hg_data.behavior_bout.astype(int))

for k in range(len(bout_numbers)):

    bout = data[data.behavior_bout == bout_numbers[k]]
    fig, axs = plt.subplots(1, 3, sharex=True, sharey=True, figsize = (18, 3))
    ax = axs.T.flatten()
    print(bout.filename.iloc[0])      
    handles = []
    labels = []
    
    for i in range(len(angle_types)): 

        angle_vars = [v for v in data.columns if some_contains(v, [angle_types[i]])
                     and not some_contains(v, ['_d1', '_d2', '_freq'])
                     and v[:2] in legs] 

        for j in range(len(angle_vars)):
            
            angle = np.array(bout.iloc[0:][angle_vars[j]])
            t = np.array(range(len(angle))) / fps
            ax[i].plot(t, angle, label = angle_vars[j])
            ax[i].set_xlabel('time (seconds)', fontsize = 14)
            ax[i].set_ylabel('angle (deg)', fontsize = 14)
            ax[i].set_title('{} angles ({}, bout {})'.format(angle_titles[i], bout.flyid.iloc[0] , str(int(bout_numbers[k]))), fontsize = 14)
            ax[i].tick_params(labelleft = True)
            
        hs, ls = ax[i].get_legend_handles_labels()
        handles.append(hs)
        labels.append(hs)     
            
    sns.despine()
    plt.show() 

In [None]:
# plot all grooming angles across time (subplots) 
legs = ['L1', 'R1']
angle_types = ['_abduct', '_flex', '_rot']
angle_titles = ['abduction', 'flexion', 'rotation']
bout_numbers = np.unique(t1_data.behavior_bout.astype(int))

for k in range(len(bout_numbers)):

    bout = t1_data[t1_data.behavior_bout == bout_numbers[k]]
    fig, axs = plt.subplots(1, 3, sharex=True, sharey=True, figsize = (18, 3))
    ax = axs.T.flatten()
    print(bout.filename.iloc[0])     
    handles = []
    labels = []
    
    for i in range(len(angle_types)): 

        angle_vars = [v for v in data.columns if some_contains(v, [angle_types[i]])
                     and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
                     and v[:2] in legs] 

        for j in range(len(angle_vars)):
            
            angle = np.array(bout.iloc[0:][angle_vars[j]])
            t = np.array(range(len(angle))) / fps
            ax[i].plot(t, angle, label = angle_vars[j])
            ax[i].set_xlabel('time (seconds)', fontsize = 14)
            ax[i].set_ylabel('{} angle (deg)'.format(angle_titles[i]), fontsize = 14)
            ax[i].set_title('{}, bout {}'.format(bout.flyid.iloc[0] , str(int(bout_numbers[k]))), fontsize = 14)
            ax[i].tick_params(labelleft = True)
            
        hs, ls = ax[i].get_legend_handles_labels()
        handles.append(hs)
        labels.append(hs)     
            
    sns.despine()
    plt.show() 

In [None]:
# head grooming
# plot all grooming coords across time (subplots) 
legs = ['L1', 'R1']
coords = ['_x', '_y', '_z']
bout_numbers = np.unique(hg_data.behavior_bout.astype(int))

for k in range(len(bout_numbers)):

    bout = data[data.behavior_bout == bout_numbers[k]]
    fig, axs = plt.subplots(1, 3, sharex=True, sharey=True, figsize = (18, 3))
    ax = axs.T.flatten()
    print(bout.filename.iloc[0])      
    handles = []
    labels = []
    
    for i in range(len(coords)): 

        coord_vars = [v for v in data.columns if some_contains(v, [coords[i]])
                     and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
                     and v[:2] in legs]

        for j in range(len(coord_vars)):
            
            angle = np.array(bout.iloc[0:][coord_vars[j]])
            t = np.array(range(len(angle))) / fps
            ax[i].plot(t, angle, label = coord_vars[j])
            ax[i].set_xlabel('time (seconds)', fontsize = 14)
            ax[i].set_ylabel('position (mm)', fontsize = 14)
            ax[i].set_title('{} coordinates ({}, bout {})'.format(coords[i][1], bout.flyid.iloc[0] , str(int(bout_numbers[k]))), fontsize = 14)
            ax[i].tick_params(labelleft = True)
            
        hs, ls = ax[i].get_legend_handles_labels()
        handles.append(hs)
        labels.append(hs)     
            
    sns.despine()
    plt.show() 

In [None]:
# t1 grooming
# plot all grooming coords across time (subplots) 
legs = ['L1', 'R1']
coords = ['_x', '_y', '_z']
bout_numbers = np.unique(t1_data.behavior_bout.astype(int))

for k in range(len(bout_numbers)):

    bout = t1_data[t1_data.behavior_bout == bout_numbers[k]]
    fig, axs = plt.subplots(1, 3, sharex=True, sharey=True, figsize = (18, 3))
    ax = axs.T.flatten()
    print(bout.filename.iloc[0])      
    handles = []
    labels = []
    
    for i in range(len(coords)): 

        coord_vars = [v for v in data.columns if some_contains(v, [coords[i]])
                     and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
                     and v[:2] in legs]

        for j in range(len(coord_vars)):
            
            angle = np.array(bout.iloc[0:][coord_vars[j]])
            t = np.array(range(len(angle))) / fps
            ax[i].plot(t, angle, label = coord_vars[j])
            ax[i].set_xlabel('time (seconds)', fontsize = 14)
            ax[i].set_ylabel('position (mm)', fontsize = 14)
            ax[i].set_title('{} coordinates ({}, bout {})'.format(coords[i][1], bout.flyid.iloc[0] , str(int(bout_numbers[k]))), fontsize = 14)
            ax[i].tick_params(labelleft = True)
            
        hs, ls = ax[i].get_legend_handles_labels()
        handles.append(hs)
        labels.append(hs)     
            
    sns.despine()
    plt.show() 

In [None]:
# plot legends 
for j in range(len(handles)):
    fig = plt.figure(figsize = (3,3))
    plt.legend(handles = handles[j], loc = 'center', fontsize = 12, ncol = 1)
    plt.axis('off')
    plt.tight_layout()
    plt.show()
    
labels = ['L1A', 'L1B', 'L1C', 'L1D', 'R1A', 'R1B', 'R1C', 'R1D']
fig = plt.figure(figsize = (3,1))
plt.legend(handles = handles[1], labels = labels, loc = 'center', fontsize = 12, ncol = 4)
plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# plot mean interval for each joint averaged across all bouts and scatter 
# points around them 
angle_types = ['_flex', '_rot']
angle_titles = ['flexion', 'rotation']
range_types = ['_range', '_avg_range']
ylabels = ['angle range (deg)', 'average angle range (deg)']
colors = ['xkcd:indigo blue', 'xkcd:wine red']

for k in range(len(range_types)):
    
    for i in range(len(angle_types)):
    
            angle_vars = [v for v in t1_data.columns if some_contains(v, [angle_types[i] + range_types[k]])
                         and v[:2] == 'L1'] 

            angles_t1 = []
            angles_hg = []
            for j in range(len(angle_vars)):
                bouts_t1 = t1_data.groupby(['behavior_bout']).mean()
                t1 = np.array(bouts_t1[angle_vars[j]])
                angles_t1.append(t1)
                bouts_hg = hg_data.groupby(['behavior_bout']).mean()
                hg = np.array(bouts_hg[angle_vars[j]])
                angles_hg.append(hg)


            fig = plt.figure(figsize = (9, 4))
            t1_pos = np.array(range(len(angles_t1)))*2.0-0.4
            hg_pos = np.array(range(len(angles_hg)))*2.0+0.4
            bpl = plt.boxplot(list(angles_t1), positions=t1_pos, sym='', widths=0.6)
            bpr = plt.boxplot(list(angles_hg), positions=hg_pos, sym='', widths=0.6)
            
            for n in range(len(t1_pos)):
                vals = angles_t1[n]
                plt.scatter([t1_pos[n]]*len(vals), vals, facecolor = 'w', edgecolor = colors[0])
            for n in range(len(hg_pos)):
                vals = angles_hg[n]
                plt.scatter([hg_pos[n]]*len(vals), vals, facecolor = 'w', edgecolor = colors[1])
            
            set_box_color(bpl, colors[0]) 
            set_box_color(bpr, colors[1])

            plt.plot([], c=colors[0], label='t1 grooming')
            plt.plot([], c=colors[1], label='head grooming')
            plt.xlabel(angle_titles[i] + ' angles', fontsize = 14)
            plt.ylabel(ylabels[k], fontsize = 14) 
            plt.legend(fontsize = 12, loc = 'upper left')

            xlabels = [x.split('_')[0] for x in angle_vars]
            plt.xticks(np.arange(0, len(angle_vars) * 2, 2), labels = xlabels, fontsize = 12)
            plt.xlim(-2, len(angle_vars)*2)
            plt.show()

In [None]:
# plot mean interval for each joint averaged across all bouts and scatter 
# points around them 
angle_types = ['_flex', '_rot']
angle_titles = ['flexion', 'rotation']
colors = ['xkcd:indigo blue', 'xkcd:wine red']

for i in range(len(angle_types)):

        angle_vars = [v for v in t1_data.columns if some_contains(v, [angle_types[i]])
                      and not some_contains(v, ['_range'])
                      and v[:2] == 'L1'] 

        angles_t1 = []
        angles_hg = []
        for j in range(len(angle_vars)):
            bouts_t1 = t1_data.groupby(['behavior_bout']).mean()
            t1 = np.array(bouts_t1[angle_vars[j]])
            angles_t1.append(t1)
            bouts_hg = hg_data.groupby(['behavior_bout']).mean()
            hg = np.array(bouts_hg[angle_vars[j]])
            angles_hg.append(hg)


        fig = plt.figure(figsize = (9, 4))
        t1_pos = np.array(range(len(angles_t1)))*2.0-0.4
        hg_pos = np.array(range(len(angles_hg)))*2.0+0.4
        bpl = plt.boxplot(list(angles_t1), positions=t1_pos, sym='', widths=0.6)
        bpr = plt.boxplot(list(angles_hg), positions=hg_pos, sym='', widths=0.6)

        for n in range(len(t1_pos)):
            vals = angles_t1[n]
            plt.scatter([t1_pos[n]]*len(vals), vals, facecolor = 'w', edgecolor = colors[0])
        for n in range(len(hg_pos)):
            vals = angles_hg[n]
            plt.scatter([hg_pos[n]]*len(vals), vals, facecolor = 'w', edgecolor = colors[1])

        set_box_color(bpl, colors[0]) 
        set_box_color(bpr, colors[1])

        plt.plot([], c=colors[0], label='t1 grooming')
        plt.plot([], c=colors[1], label='head grooming')
        plt.xlabel(angle_titles[i] + ' angles', fontsize = 14)
        plt.ylabel('average angle (deg)', fontsize = 14) 
        plt.legend(fontsize = 12, loc = 'upper left')

        xlabels = [x.split('_')[0] for x in angle_vars]
        plt.xticks(np.arange(0, len(angle_vars) * 2, 2), labels = xlabels, fontsize = 12)
        plt.xlim(-2, len(angle_vars)*2)
        plt.show()

In [None]:
# average position ranges
coord_types = ['_x', '_y', '_z']
range_types = ['_range', '_avg_range']
colors = ['xkcd:forest green', 'xkcd:red']
ylabels = ['{} position range (mm)', 'average {} position range (mm)']

for k in range(len(range_types)):
    
    for i in range(len(coord_types)):
    
            coord_vars = [v for v in t1_data.columns if some_contains(v, [coord_types[i] + range_types[k]])
                         and v[:2] == 'L1'] 

            coords_t1 = []
            coords_hg = []
            for j in range(len(coord_vars)):
                bouts_t1 = t1_data.groupby(['behavior_bout']).mean()
                t1 = np.array(bouts_t1[coord_vars[j]])
                coords_t1.append(t1)
                bouts_hg = hg_data.groupby(['behavior_bout']).mean()
                hg = np.array(bouts_hg[coord_vars[j]])
                coords_hg.append(hg)


            fig = plt.figure(figsize = (9, 4))
            t1_pos = np.array(range(len(coords_t1)))*2.0-0.4
            hg_pos = np.array(range(len(coords_hg)))*2.0+0.4
            bpl = plt.boxplot(list(coords_t1), positions=t1_pos, sym='', widths=0.6)
            bpr = plt.boxplot(list(coords_hg), positions=hg_pos, sym='', widths=0.6)
            
            for n in range(len(t1_pos)):
                vals = coords_t1[n]
                plt.scatter([t1_pos[n]]*len(vals), vals, facecolor = 'w', edgecolor = colors[0])
            for n in range(len(hg_pos)):
                vals = coords_hg[n]
                plt.scatter([hg_pos[n]]*len(vals), vals, facecolor = 'w', edgecolor = colors[1])
            
            set_box_color(bpl, colors[0]) 
            set_box_color(bpr, colors[1])

            plt.plot([], c=colors[0], label='t1 grooming')
            plt.plot([], c=colors[1], label='head grooming')
            plt.xlabel('joints', fontsize = 14)
            plt.ylabel(ylabels[k].format(coord_types[i][1]), fontsize = 14) 
            plt.legend(fontsize = 12, loc = 'upper left')

            xlabels = [x.split('_')[0] for x in coord_vars]
            plt.xticks(np.arange(0, len(angle_vars) * 2, 2), labels = xlabels, fontsize = 12)
            plt.xlim(-2, len(coord_vars)*2)
            plt.show()

In [None]:
# average positon
coord_types = ['_x', '_y', '_z']
colors = ['xkcd:forest green', 'xkcd:red']
for i in range(len(coord_types)):

        coord_vars = [v for v in t1_data.columns if some_contains(v, [coord_types[i]])
                      and not some_contains(v, ['_range'])
                      and v[:2] == 'L1'] 

        coords_t1 = []
        coords_hg = []
        for j in range(len(coord_vars)):
            bouts_t1 = t1_data.groupby(['behavior_bout']).mean()
            t1 = np.array(bouts_t1[coord_vars[j]])
            coords_t1.append(t1)
            bouts_hg = hg_data.groupby(['behavior_bout']).mean()
            hg = np.array(bouts_hg[coord_vars[j]])
            coords_hg.append(hg)


        fig = plt.figure(figsize = (9, 4))
        t1_pos = np.array(range(len(coords_t1)))*2.0-0.4
        hg_pos = np.array(range(len(coords_hg)))*2.0+0.4
        bpl = plt.boxplot(list(coords_t1), positions=t1_pos, sym='', widths=0.6)
        bpr = plt.boxplot(list(coords_hg), positions=hg_pos, sym='', widths=0.6)

        for n in range(len(t1_pos)):
            vals = coords_t1[n]
            plt.scatter([t1_pos[n]]*len(vals), vals, facecolor = 'w', edgecolor = colors[0])
        for n in range(len(hg_pos)):
            vals = coords_hg[n]
            plt.scatter([hg_pos[n]]*len(vals), vals, facecolor = 'w', edgecolor = colors[1])

        set_box_color(bpl, colors[0]) 
        set_box_color(bpr, colors[1])

        plt.plot([], c=colors[0], label='t1 grooming')
        plt.plot([], c=colors[1], label='head grooming')
        plt.xlabel('joints', fontsize = 14)
        plt.ylabel('average {} position (mm)'.format(coord_types[i][1]), fontsize = 14) 
        plt.legend(fontsize = 12)

        xlabels = [x.split('_')[0] for x in coord_vars]
        plt.xticks(np.arange(0, len(angle_vars) * 2, 2), labels = xlabels, fontsize = 12)
        plt.xlim(-2, len(coord_vars)*2)
        plt.show()

In [None]:
flyid = '3_0 522019'
flyid = '1_0 6182019'
videos = fly_videos[flyid]
for fname in videos:
    print(fname + ' ({})'.format(len(data[data.filename == fname])))
    #print(get_url(flyid, filename))

In [None]:
# plot distribution of z coords 
dfs = [t1_data, hg_data]
fig = plt.figure()
fig, axs = plt.subplots(1, 2, sharex=True, sharey=True, figsize = (16, 4))
grooming_types = ['t1', 'head']
ax = axs.T.flatten()
for i in range(len(dfs)):
    df = dfs[i]
    angle_vars = [v for v in df.columns if some_contains(v, ['_z']) and v[:2] == 'L1']
    for j in range(len(angle_vars)): 
        t1 = df.iloc[0:][angle_vars[j]]
        t1 = t1[np.isfinite(t1)] # ignores nans        
        kernel_t1 = stats.gaussian_kde(t1)    
        t1 = np.linspace(np.percentile(t1, 5), np.percentile(t1, 95), 1000)
        height_t1 = kernel_t1.pdf(t1)                                                     
        ax[i].plot(t1, height_t1, label = angle_vars[j])
        ax[i].tick_params(labelbottom = True, labelleft = True)
        ax[i].set_xlabel('z-coordinate')
        ax[i].set_ylabel('PDF')
        ax[i].set_title('distribution of z-coordinates during {} grooming'.format(grooming_types[i]))
        ax[i].legend(loc = 'upper left')

###### compute grooming scores for ground truth videos

In [None]:
features = ['L1B_rot_avg_range', 'L1A_flex_avg_range', 'L1E_z_avg_range', 'L1D_z', 'L1E_z']
# features = ['L1D_z', 'L1E_z']
flip = [False, False, False, True, True]
bout_numbers, t1_features = get_bout_features(t1_data, features, flip, normalize = False)
bout_numbers, hg_features = get_bout_features(hg_data, features, flip, normalize = False)

t1_scores = np.nanmean(t1_features, axis = 1)
hg_scores = np.nanmean(hg_features, axis = 1) 
thresh = compute_thresh(t1_scores, hg_scores)

In [None]:
fig = plt.figure(figsize=(9,3))
colors = ['xkcd:indigo blue', 'xkcd:wine red']
plt.yticks([0.25, 0.75], labels = ['t1 grooming', 'head grooming'], fontsize = 12)
plt.scatter(t1_scores, [0.25]*len(t1_scores), edgecolor = colors[0], facecolor = 'w', label = 't1 grooming')
plt.scatter(hg_scores, [0.75]*len(hg_scores), edgecolor = colors[1], facecolor = 'w', label = 'head grooming')
plt.axvline(thresh, color = 'k', linestyle = ':')
plt.ylim([0, 1])
plt.xlabel('grooming score', fontsize = 14) 
plt.show()

###### compute grooming scores for entire dataset (all  features)

In [None]:
angle_vars = [v for v in data.columns
              if some_contains(v, ['_flex', '_rot', '_x', '_y', '_z'])
              and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
              and v[:2] == 'L1']
features = ['L1B_rot_avg_range', 'L1A_flex_avg_range', 'L1E_z_avg_range', 'L1D_z', 'L1E_z']
flip = [False, False, False, True, True]
data = get_range(data, angle_vars)
data = get_average_range(data, angle_vars, dist = 20, height = None)
bout_numbers, all_features = get_bout_features(data, features, flip, normalize = False)
all_scores = np.nanmean(all_features, axis = 1)

data['grooming_score'] = np.nan
for j in range(len(bout_numbers)):
    data.loc[(data.behavior_bout == bout_numbers[j]),'grooming_score'] = all_scores[j]
    
data_cleaned = data[data.grooming_score < 8.25]
data_cleaned = data[data.grooming_score > 1.6]

In [None]:
# plot all grooming angles across time (subplots) 
legs = ['L1', 'R1']
angle_types = ['_abduct', '_flex', '_rot']
angle_titles = ['abduction', 'flexion', 'rotation']
data_sorted = data.sort_values('grooming_score')
bout_numbers_sorted = np.array(data_sorted.drop_duplicates('behavior_bout').behavior_bout.astype(int))

for k in range(len(bout_numbers_sorted)):

    bout = data[data.behavior_bout == bout_numbers_sorted[k]]
    fig, axs = plt.subplots(1, 3, sharex=True, sharey=True, figsize = (18, 3))
    ax = axs.T.flatten()
    print(bout.filename.iloc[0])  
    print(bout.grooming_score.iloc[0])
    handles = []
    labels = []
    
    for i in range(len(angle_types)): 

        angle_vars = [v for v in data.columns if some_contains(v, [angle_types[i]])
                     and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
                     and v[:2] in legs] 

        for j in range(len(angle_vars)):
            
            angle = np.array(bout.iloc[0:][angle_vars[j]])
            t = np.array(range(len(angle))) / fps
            ax[i].plot(t, angle, label = angle_vars[j])
            ax[i].set_xlabel('time (seconds)', fontsize = 14)
            ax[i].set_ylabel('{} angle (deg)'.format(angle_titles[i]), fontsize = 14)
            ax[i].set_title('{}, bout {}'.format(bout.flyid.iloc[0] , str(int(bout_numbers_sorted[k]))), fontsize = 14)
            ax[i].tick_params(labelleft = True)
            
        hs, ls = ax[i].get_legend_handles_labels()
        handles.append(hs)
        labels.append(hs)     
            
    sns.despine()
    plt.show() 

###### classified as t1 grooming, but actually head according to the grooming score:
    06132019_fly3_0 R1C19  rot-cw-0.18 sec
    06132019_fly3_0 R2C11  str-ccw-0.09 sec
    05212019_fly4_0 R3C6  str-cw-0.36 sec (when only coords are used)

###### compute grooming scores for entire dataset (z coords only)

In [None]:
angle_vars = [v for v in data.columns
              if some_contains(v, ['_flex', '_rot', '_x', '_y', '_z'])
              and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
              and v[:2] == 'L1']
features = ['L1D_z', 'L1E_z']
flip = [False, False]
data = get_range(data, angle_vars)
data = get_average_range(data, angle_vars, dist = 20, height = None)
bout_numbers, all_features = get_bout_features(data, features, flip, normalize = False)
all_scores = np.nanmean(all_features, axis = 1)

data['grooming_score'] = np.nan
for j in range(len(bout_numbers)):
    data.loc[(data.behavior_bout == bout_numbers[j]),'grooming_score'] = all_scores[j]

In [None]:
# plot all grooming angles across time (subplots) 
legs = ['L1', 'R1']
angle_types = ['_abduct', '_flex', '_rot']
angle_titles = ['abduction', 'flexion', 'rotation']
data_sorted = data.sort_values('grooming_score')
bout_numbers_sorted = np.array(data_sorted.drop_duplicates('behavior_bout').behavior_bout.astype(int))

for k in range(len(bout_numbers_sorted)):

    bout = data[data.behavior_bout == bout_numbers_sorted[k]]
    fig, axs = plt.subplots(1, 3, sharex=True, sharey=True, figsize = (18, 3))
    ax = axs.T.flatten()
    print(bout.filename.iloc[0])  
    print(bout.grooming_score.iloc[0])
    handles = []
    labels = []
    
    for i in range(len(angle_types)): 

        angle_vars = [v for v in data.columns if some_contains(v, [angle_types[i]])
                     and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
                     and v[:2] in legs] 

        for j in range(len(angle_vars)):
            
            angle = np.array(bout.iloc[0:][angle_vars[j]])
            t = np.array(range(len(angle))) / fps
            ax[i].plot(t, angle, label = angle_vars[j])
            ax[i].set_xlabel('time (seconds)', fontsize = 14)
            ax[i].set_ylabel('{} angle (deg)'.format(angle_titles[i]), fontsize = 14)
            ax[i].set_title('{}, bout {}'.format(bout.flyid.iloc[0] , str(int(bout_numbers_sorted[k]))), fontsize = 14)
            ax[i].tick_params(labelleft = True)
            
        hs, ls = ax[i].get_legend_handles_labels()
        handles.append(hs)
        labels.append(hs)     
            
    sns.despine()
    plt.show() 