In [39]:
%matplotlib inline
%run /media/turritopsis/katie/grooming/t1-grooming/grooming_functions.ipynb

import os 
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import gc
from scipy import signal, stats

warnings.filterwarnings('ignore')

In [40]:
behavior = 't1_grooming'
prefix = '/media/turritopsis/pierre/gdrive/Tuthill Lab Shared/Pierre/summaries/v3-b2/lines-t1_grooming'
prefix_out = '/media/turritopsis/katie/grooming/summaries/v3-b2'

In [None]:
(root, dirs, files) = next(os.walk(prefix))
files = sorted(files)

In [54]:
wt_flies = [file for file in files if 'Berlin-WT' in file and ('offball' not in file and 'headless' not in file)]
hg_flies = [file for file in files if '39A11-gal4xUAS-10x-ChrimsonR' in file]
ag_flies = [file for file in files if 'AntennalGrooming' in file]
eg_flies = [file for file in files if 'EyeGrooming' in file]
files = [*wt_flies, *hg_flies, *ag_flies, *eg_flies]

In [74]:
line_dict = {'evyn--Berlin-WT.pq':'berlin_wt',
             'sarah--rv1-Berlin-WT.pq':'berlin_wt',
             'sarah--rv3-Berlin-WT.pq':'berlin_wt',
             'sarah--rv4-Berlin-WT.pq':'berlin_wt',
             'sarah--rv2-39A11-gal4xUAS-10x-ChrimsonR.pq':'head_grooming',
             'sarah--rv3-AntennalGrooming-w;25F11-AD;27H08-DBDxUAS-10x-ChrimsonR.pq':'antennal_grooming',
             'sarah--rv3-AntennalGrooming-w;VT005525-AD(100C03);27H08-DBDxUAS-10x-ChrimsonR.pq':'antennal_grooming2',
             'sarah--rv3-EyeGrooming-w;VT017251-LexA(3012796)xLexAop-Chrimson-tdTomato.pq':'eye_grooming'}

line_dict_alt = {'evyn--Berlin-WT.pq':'berlin_wt',
             'sarah--rv1-Berlin-WT.pq':'berlin_wt',
             'sarah--rv3-Berlin-WT.pq':'berlin_wt',
             'sarah--rv4-Berlin-WT.pq':'berlin_wt',
             'sarah--rv2-39A11-gal4xUAS-10x-ChrimsonR.pq':'39A11',
             'sarah--rv3-AntennalGrooming-w;25F11-AD;27H08-DBDxUAS-10x-ChrimsonR.pq':'25F11',
             'sarah--rv3-AntennalGrooming-w;VT005525-AD(100C03);27H08-DBDxUAS-10x-ChrimsonR.pq':'27H08',
             'sarah--rv3-EyeGrooming-w;VT017251-LexA(3012796)xLexAop-Chrimson-tdTomato.pq':'eye_grooming'}

In [75]:
#(root, dirs, files) = next(os.walk(prefix))
# files = sorted(files)
bout_num = 1
thresh = 50
datas = []

for file in files:
        
    print(file)
    path = os.path.join(prefix, file)
    data = pd.read_parquet(path, engine='fastparquet')
    cols_good = np.unique([v for v in data.columns
              if not some_contains(v, ['_range', '_score', '_error', '_ncams',
                                       '_prob', '_class', '_bout_number'])])
    cols_good = np.append(cols_good, ['behavior_bout', 'line'])
    if len(data) == 0:
        continue
        
    dsub = data[data[behavior + '_class']]
    d = dsub[~dsub['t1_grooming_bout_number'].isna()]
    bout_numbers = np.unique(d['t1_grooming_bout_number'])
    
    for j in range(len(bout_numbers)): 
        bout = dsub[dsub['t1_grooming_bout_number'] == bout_numbers[j]]
        bout['behavior_bout'] = bout_num
        bout['line'] = line_dict[file]
        if len(bout) >= thresh:
            datas.append(bout[cols_good])
            bout_num += 1
        
data = pd.concat(datas)
data['flyid'] = data['Fly #'].astype(str) + ' ' + data['Date'].astype(str)

evyn--Berlin-WT.pq
sarah--rv1-Berlin-WT.pq
sarah--rv3-Berlin-WT.pq
sarah--rv4-Berlin-WT.pq
sarah--rv2-39A11-gal4xUAS-10x-ChrimsonR.pq
sarah--rv3-AntennalGrooming-w;25F11-AD;27H08-DBDxUAS-10x-ChrimsonR.pq
sarah--rv3-AntennalGrooming-w;VT005525-AD(100C03);27H08-DBDxUAS-10x-ChrimsonR.pq
sarah--rv3-EyeGrooming-w;VT017251-LexA(3012796)xLexAop-Chrimson-tdTomato.pq


In [64]:
gc.collect()

20

In [66]:
# adjust data
def adjust_rot_angles(angles, angle_names):
    conds = ['2', '3', 'L1A', 'L1B', 'L1C', 'R1A', 'R1B', 'R1C']
    offsets = np.array([-50, -20, 20, -70, 10, 20, 70, -30])
    for j in range(len(conds)):
        rot_angs = [r for r in angle_names if '_rot' in r and conds[j] in r]
        for ang in rot_angs:
            r = np.array(angles[ang])
            r[r > offsets[j]] = r[r > offsets[j]] - 360
            angles[ang] = r
        
    abduct_angs = [r for r in angle_names if '_abduct' in r or 'A_flex' in r]
    for ang in abduct_angs:
        r = np.array(angles[ang])
        r[r > 50] = r[r > 50] - 360
        angles[ang] = r
        
    return angles

angle_vars = np.unique([v for v in data.columns
              if some_contains(v, ['_BC', '_flex', '_rot', '_abduct'])
              and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])])
data = correct_angles(data, angle_vars)
data = adjust_rot_angles(data, angle_vars)
data = normalize_data(data)

In [67]:
# remove head_grooming from t1_grooming data
features = [v for v in data.columns
              if some_contains(v, ['_flex', '_rot', '_x', '_y', '_z'])
              and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
              and v[:2] == 'L1']
feature_names= ['L1B_rot_avg_range', 'L1A_flex_avg_range', 'L1E_z_avg_range', 'L1D_z', 'L1E_z']
flip = [False, False, False, True, True]
data = compute_grooming_scores(data, features, feature_names, flip = flip, dist=20, norm=False)
df0 = data[data.grooming_score < 8.25]
df = df0[df0.grooming_score > 1.6]

In [68]:
# save with the grooming score before removing bouts with certain scores?
cols_good = np.unique([v for v in data.columns
              if not some_contains(v, ['_range', '_error', '_ncams', '_prob', '_class', '_bout_number'])])

# out = os.path.join(prefix_out, 'lines-' + behavior + '_onball_processed_all_gs.parquet')
out = os.path.join(prefix_out, 'subset_t1_grooming_all_gs.parquet')
data = data[cols_good]
data.to_parquet(out, compression = 'gzip')

In [69]:
cols_good = np.unique([v for v in df.columns
              if not some_contains(v, ['_range', '_error', '_ncams', '_prob', '_class', '_bout_number'])])
df = df[cols_good]


In [70]:
# path_out = os.path.join(prefix_out, 'lines-' + behavior + '_onball_processed.parquet')
path_out = os.path.join(prefix_out, 'subset_t1_grooming.parquet')
df.to_parquet(path_out, compression = 'gzip')

In [73]:
print(len(df))
print(len(np.unique(df.behavior_bout)))
print(len(data))
print(len(np.unique(data.behavior_bout)))

324631
964
359873
1120
