In [1]:
%matplotlib inline
%run /media/turritopsis/katie/grooming/t1-grooming/grooming_functions.ipynb

import os 
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from scipy import signal, stats

warnings.filterwarnings('ignore')

In [2]:
behavior = 't1_grooming'
prefix = '/media/turritopsis/pierre/gdrive/Tuthill Lab Shared/Pierre/summaries/v3-b2/days'
prefix_out = '/media/turritopsis/katie/grooming/summaries/v3-b2'
path_out = os.path.join(prefix_out, behavior + '_processed.parquet')

In [3]:
(root, dirs, files) = next(os.walk(prefix))
files = sorted(files)
max_bout = 0
datas = []

for file in files:
    print(file)
    path = os.path.join(prefix, file)
    data = pd.read_parquet(path, engine='fastparquet')
    dsub = data[data[behavior + '_class']]
    dsub = dsub[~dsub.date.isnull()]
    bout_var = behavior + '_bout_number' 
    dsub.loc[:, bout_var] += max_bout
    max_bout = np.max(dsub[bout_var])
    datas.append(dsub)

data = pd.concat(datas)

all_1.10.20.parquet
all_1.15.20.parquet
all_1.7.20.parquet
all_1.8.20.parquet
all_1.9.20.parquet
all_10.1.20.parquet
all_10.12.20.parquet
all_10.13.20.parquet
all_10.14.20.parquet
all_10.15.20.parquet
all_10.16.20.parquet
all_10.19.20.parquet
all_10.2.20.parquet
all_10.20.20.parquet
all_10.21.20.parquet
all_10.22.20.parquet
all_10.30.19.parquet
all_10.31.19.parquet
all_10.5.20.parquet
all_10.6.20.parquet
all_10.7.19.parquet
all_10.7.20.parquet
all_10.8.19.parquet
all_10.8.20.parquet
all_10.9.20.parquet
all_11.1.19.parquet
all_11.10.20.parquet
all_11.11.20.parquet
all_11.13.19.parquet
all_11.15.19.parquet
all_11.16.20.parquet
all_11.17.20.parquet
all_11.18.19.parquet
all_11.18.20.parquet
all_11.19.19.parquet
all_11.19.20.parquet
all_11.20.19.parquet
all_11.20.20.parquet
all_11.21.19.parquet
all_11.23.20.parquet
all_11.30.20.parquet
all_11.4.19.parquet
all_11.5.19.parquet
all_11.5.20.parquet
all_11.6.20.parquet
all_11.9.20.parquet
all_12.1.20.parquet
all_12.10.19.parquet
all_12.16.19.par

In [4]:
data['behavior_bout'] = data[behavior + '_bout_number']
data['flyid'] = data['fly'].astype(str) + ' ' + data['date'].astype(str)
data = remove_short_bouts(data, 50)
data = adjust_bout_numbers(data)

In [5]:
# load data
def adjust_rot_angles(angles, angle_names):
    conds = ['2', '3', 'L1A', 'L1B', 'L1C', 'R1A', 'R1B', 'R1C']
    offsets = np.array([-50, -20, 20, -70, 10, 20, 70, -30])
    for j in range(len(conds)):
        rot_angs = [r for r in angle_names if '_rot' in r and conds[j] in r]
        for ang in rot_angs:
            r = np.array(angles[ang])
            r[r > offsets[j]] = r[r > offsets[j]] - 360
            angles[ang] = r
        
    abduct_angs = [r for r in angle_names if '_abduct' in r or 'A_flex' in r]
    for ang in abduct_angs:
        r = np.array(angles[ang])
        r[r > 50] = r[r > 50] - 360
        angles[ang] = r
        
    return angles

angle_vars = np.unique([v for v in data.columns
              if some_contains(v, ['_BC', '_flex', '_rot', '_abduct'])
              and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])])
data = correct_angles(data, angle_vars)
data = adjust_rot_angles(data, angle_vars)

In [6]:
fly_data, fly_names_sorted = data_per_fly(data)

In [7]:
# remove head_grooming from t1_grooming data
features = [v for v in data.columns
              if some_contains(v, ['_flex', '_rot', '_x', '_y', '_z'])
              and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])
              and v[:2] == 'L1']
feature_names= ['L1B_rot_avg_range', 'L1A_flex_avg_range', 'L1E_z_avg_range', 'L1D_z', 'L1E_z']
flip = [False, False, False, True, True]
data = compute_grooming_scores(data, features, feature_names, flip = flip, dist=20, norm=False)
data = data[data.grooming_score < 8.25]
data = data[data.grooming_score > 1.6]

In [37]:
# add velocity and acceleration columns to data
fps = 300.0
bout_numbers = np.unique(np.array(data.behavior_bout))
angle_vars = [v for v in data.columns
              if some_contains(v, ['_flex', '_abduct', '_rot', '_BC'])
              and not some_contains(v, ['_d1', '_d2', '_freq', '_range'])]

dt = 1/fps
s = 1.0/dt
s2 = 1.0 / (dt * dt)

for j in range(len(bout_numbers)):
    mask = data.behavior_bout == bout_numbers[j]
    bout_df = data.loc[mask]
    for ang in angle_vars:
        bout = np.array(bout_df[ang])
        data.loc[mask, ang + '_d1'] = signal.savgol_filter(bout, 5, 3, deriv=1) * s
        data.loc[mask, ang + '_d2'] = signal.savgol_filter(bout, 5, 3, deriv=2) * s2

MemoryError: Unable to allocate 5.87 GiB for an array with shape (348, 2265837) and data type float64

In [40]:
cols_good = np.unique([v for v in data.columns
              if not some_contains(v, ['_score', '_error', '_ncams', '_prob', '_class', '_bout_number'])])
data[cols_good]

MemoryError: Unable to allocate 5.87 GiB for an array with shape (348, 2265837) and data type float64

In [39]:
path_out = os.path.join(prefix_out, behavior + '_angles_coords.parquet')
data.to_parquet(path_out, compression = 'gzip')

In [38]:
cols_good = np.unique([v for v in data.columns
              if not some_contains(v, ['walking_class'])])
data = data[cols_good]

In [8]:
print(len(data))
print(len(np.unique(data.behavior_bout)))

1273507
6279
