In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import re
import os
import plotly.express as px
import plotly.graph_objects as go

In [2]:
pd.options.plotting.backend = "plotly"

In [3]:
datadir = '/Users/etytel01/Box/Katz lamprey kinematics/2021/Processed Data'
scalefile = '/Users/etytel01/Box/Katz lamprey kinematics/2021/Processed Data/scales.csv'

In [16]:
testfile = '/Users/etytel01/Box/Katz lamprey kinematics/2021/Processed Data/Exp_062921/Exp_062921/Animal2/2dpi/063021_A2V13_01DLC_resnet50_LesionsOct13shuffle1_1030000.h5'

In [17]:
def process_filepath(filepath):
    m = re.search(r'Animal(\d+)/(\d(h|d)pi|Control)/(\d{6})', filepath)
    if m is None:
        raise(Exception("Bad file name: {}".format(filepath)))
    animal, treatment, _, date = m.groups()
    date = datetime.strptime(date, '%m%d%y')
    return(date, animal, treatment)    

In [18]:
process_filepath(testfile)

(datetime.datetime(2021, 6, 30, 0, 0), '2', '2dpi')

In [19]:
scales = pd.read_csv(scalefile, parse_dates=['Date'], dtype = {'Treatment': pd.StringDtype()})

In [20]:
scales

Unnamed: 0,Date,Animal,Treatment,Scale
0,2021-06-30,1,1dpi,0.414299
1,2021-06-30,1,1dpi,0.414299
2,2021-07-01,1,2dpi,0.41488
3,2021-07-01,1,2dpi,0.41488
4,2021-06-29,1,2hpi,0.415793
5,2021-06-29,1,2hpi,0.415793
6,2021-07-02,1,3dpi,0.414109
7,2021-07-02,1,3dpi,0.414109
8,2021-06-29,1,Control,0.411436
9,2021-06-29,1,Control,0.411436


In [21]:
fps = 60

In [22]:
pointorder = ['snout', 'gill', 'mid1', 'mid2', 'anus', 'mid4',  'tailbase', 'tailtip']

def flatten_DLC_points(df):
    scorer = df.columns.levels[0][-1]
    df = df.xs(scorer, level="scorer", axis=1)

    df = df.stack(0)
    df.index.set_names('frame', level=0, inplace=True)

    df = df.reset_index()

    df["bodyparts"] = df["bodyparts"].astype("category").cat.reorder_categories(pointorder)
    df = df.sort_values(['frame', 'bodyparts'])

    return df

In [36]:
date, animal, treatment = process_filepath(testfile)

midline = pd.read_hdf(testfile)
midline = flatten_DLC_points(midline)
midline['Date'] = date
midline['Animal'] = int(animal)
midline['Treatment'] = treatment
midline['Treatment'] = midline['Treatment'].astype('string')

In [37]:
(date, animal, treatment)

(datetime.datetime(2021, 6, 30, 0, 0), '2', '2dpi')

In [38]:
midline.dtypes

coords
frame                  int64
bodyparts           category
likelihood           float64
x                    float64
y                    float64
Date          datetime64[ns]
Animal                 int64
Treatment             string
dtype: object

In [39]:
midline = pd.merge(midline, scales[['Animal', 'Treatment', 'Scale']], how='left', on = ['Animal','Treatment'])
midline.head()

Unnamed: 0,frame,bodyparts,likelihood,x,y,Date,Animal,Treatment,Scale
0,0,snout,0.999965,129.632828,158.449097,2021-06-30,2,2dpi,0.413758
1,0,snout,0.999965,129.632828,158.449097,2021-06-30,2,2dpi,0.413758
2,0,gill,0.994267,127.632233,117.417763,2021-06-30,2,2dpi,0.413758
3,0,gill,0.994267,127.632233,117.417763,2021-06-30,2,2dpi,0.413758
4,0,mid1,0.927043,147.987961,74.872963,2021-06-30,2,2dpi,0.413758


In [40]:
midline['t'] = midline['frame'] / fps
midline['xmm'] = midline['x'] * midline['Scale']
midline['ymm'] = midline['y'] * midline['Scale']

In [41]:
midline

Unnamed: 0,frame,bodyparts,likelihood,x,y,Date,Animal,Treatment,Scale,t,xmm,ymm
0,0,snout,0.999965,129.632828,158.449097,2021-06-30,2,2dpi,0.413758,0.0,53.636600,65.559557
1,0,snout,0.999965,129.632828,158.449097,2021-06-30,2,2dpi,0.413758,0.0,53.636600,65.559557
2,0,gill,0.994267,127.632233,117.417763,2021-06-30,2,2dpi,0.413758,0.0,52.808838,48.582521
3,0,gill,0.994267,127.632233,117.417763,2021-06-30,2,2dpi,0.413758,0.0,52.808838,48.582521
4,0,mid1,0.927043,147.987961,74.872963,2021-06-30,2,2dpi,0.413758,0.0,61.231180,30.979276
...,...,...,...,...,...,...,...,...,...,...,...,...
9131,570,mid4,0.998988,24.303572,216.998230,2021-06-30,2,2dpi,0.413758,9.5,10.055794,89.784721
9132,570,tailbase,0.999698,48.436047,167.474289,2021-06-30,2,2dpi,0.413758,9.5,20.040794,69.293802
9133,570,tailbase,0.999698,48.436047,167.474289,2021-06-30,2,2dpi,0.413758,9.5,20.040794,69.293802
9134,570,tailtip,0.999992,65.729019,139.251877,2021-06-30,2,2dpi,0.413758,9.5,27.195898,57.616557


In [42]:
px.line(midline[(midline["frame"] >= 10) & (midline["frame"] < 15)], x="xmm", y="ymm", color="frame")

In [121]:
def get_length(x, y):
    dx = x.diff()
    dy = y.diff()
    length = np.sum(sqrt(dx[1:]**2 + dy[1:]**2))
    return(length)

In [122]:
midline[midline["frame"] >= 10]

Unnamed: 0,frame,bodyparts,likelihood,x,y,Date,Animal,Treatment,Scale,t,xmm,ymm
80,10,snout,0.999991,744.759888,260.982300,2021-06-30,1,2dpi,0.4149,0.166667,309.000877,108.281556
81,10,gill,0.999779,790.327942,258.849365,2021-06-30,1,2dpi,0.4149,0.166667,327.907063,107.396602
82,10,mid1,0.999799,826.330200,265.189148,2021-06-30,1,2dpi,0.4149,0.166667,342.844400,110.026977
83,10,mid2,0.999189,863.551453,281.135376,2021-06-30,1,2dpi,0.4149,0.166667,358.287498,116.643067
84,10,anus,0.997293,902.827820,289.509277,2021-06-30,1,2dpi,0.4149,0.166667,374.583262,120.117399
...,...,...,...,...,...,...,...,...,...,...,...,...
1051,131,mid2,0.969515,102.224045,102.421700,2021-06-30,1,2dpi,0.4149,2.183333,42.412756,42.494763
1052,131,anus,0.995628,130.032074,136.476669,2021-06-30,1,2dpi,0.4149,2.183333,53.950307,56.624170
1053,131,mid4,0.998067,167.713318,151.997925,2021-06-30,1,2dpi,0.4149,2.183333,69.584256,63.063939
1054,131,tailbase,0.999520,201.805969,149.854202,2021-06-30,1,2dpi,0.4149,2.183333,83.729297,62.174509


In [43]:
nfiles = 0

for root, dirs, files in os.walk(datadir):
    for f1 in files:
        basename1, ext = os.path.splitext(f1)
        if ext != '.h5':
            continue

        infile1 = os.path.join(root, f1)
        outfile1 = os.path.join(root, basename1 + '.csv')

        if os.path.exists(outfile1):
            continue
        
        print('{}: {}'.format(nfiles + 1, outfile1))

        midline = pd.read_hdf(infile1)
        midline = flatten_DLC_points(midline)


        midline.to_csv(outfile1, index=False)
        nfiles += 1


1: /Users/etytel01/Box/Katz lamprey kinematics/2021/Processed Data/Exp_062921/Exp_062921/Animal2/1dpi/063021_A2V5_01DLC_resnet50_LesionsOct13shuffle1_1030000.csv
2: /Users/etytel01/Box/Katz lamprey kinematics/2021/Processed Data/Exp_062921/Exp_062921/Animal2/1dpi/063021_A2V5_02DLC_resnet50_LesionsOct13shuffle1_1030000.csv
3: /Users/etytel01/Box/Katz lamprey kinematics/2021/Processed Data/Exp_062921/Exp_062921/Animal2/1dpi/063021_A2V5_03DLC_resnet50_LesionsOct13shuffle1_1030000.csv
4: /Users/etytel01/Box/Katz lamprey kinematics/2021/Processed Data/Exp_062921/Exp_062921/Animal2/1dpi/063021_A2V5_04DLC_resnet50_LesionsOct13shuffle1_1030000.csv
5: /Users/etytel01/Box/Katz lamprey kinematics/2021/Processed Data/Exp_062921/Exp_062921/Animal2/1dpi/063021_A2V5_05DLC_resnet50_LesionsOct13shuffle1_1030000.csv
6: /Users/etytel01/Box/Katz lamprey kinematics/2021/Processed Data/Exp_062921/Exp_062921/Animal2/1dpi/063021_A2V5_06DLC_resnet50_LesionsOct13shuffle1_1030000.csv
7: /Users/etytel01/Box/Katz 

In [25]:
shufflename = midline.columns.levels[0][-1]
print("Shuffle: {}".format(shufflename))

Shuffle: DLC_resnet50_LesionsOct13shuffle1_1030000


This will get rid of the top level index, which isn't very important for us.

In [27]:
midline = midline.xs(shufflename, level="scorer", axis=1)

Split into x and y arrays

In [118]:
midline.reset_index()

bodyparts,index,snout,snout,snout,gill,gill,gill,mid1,mid1,mid1,...,anus,mid4,mid4,mid4,tailbase,tailbase,tailbase,tailtip,tailtip,tailtip
coords,Unnamed: 1_level_1,x,y,likelihood,x,y,likelihood,x,y,likelihood,...,likelihood,x,y,likelihood,x,y,likelihood,x,y,likelihood
0,0,805.507202,269.256775,0.999997,851.992126,275.009308,0.999897,896.400146,270.211273,0.998845,...,0.996928,1012.632080,246.722748,0.996878,1038.379028,262.566345,0.997818,1051.980591,277.437653,0.999360
1,1,799.924133,266.169647,0.999982,846.198303,274.019318,0.999842,891.261597,273.894501,0.999570,...,0.999301,1005.258423,241.948837,0.999426,1032.572998,251.593765,0.999510,1046.555176,263.973999,0.998138
2,2,794.135559,262.285675,0.999994,840.605408,273.317108,0.999183,884.112549,275.111511,0.999463,...,0.998824,996.298645,239.050049,0.998586,1021.531250,243.133667,0.999608,1038.262695,251.611435,0.999611
3,3,787.785400,258.928406,0.999999,831.362305,271.491882,0.999712,872.850708,277.839874,0.999196,...,0.994730,983.460022,240.125961,0.997416,1011.008057,237.750000,0.999897,1030.147583,242.828293,0.999896
4,4,782.153931,256.069397,0.999980,825.138123,268.755859,0.999614,863.695801,279.627777,0.997954,...,0.995010,978.418335,242.866409,0.999220,1004.564148,236.727127,0.999878,1024.317505,238.298904,0.999851
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,127,42.041489,21.862974,0.999502,71.495865,52.025368,0.998195,93.890007,90.628922,0.993461,...,0.993607,192.170700,143.837234,0.995228,222.137222,143.016449,0.997092,255.910355,142.026337,0.999389
128,128,35.835537,18.121704,0.999965,66.932800,47.324997,0.996320,91.247452,83.815018,0.986256,...,0.988230,187.581085,146.080948,0.996607,221.529160,142.289337,0.995644,251.012695,140.705276,0.999943
129,129,30.526577,14.828379,0.998960,65.308456,46.052628,0.995395,88.988815,79.244041,0.986264,...,0.995456,180.305817,148.374847,0.999637,212.659836,144.845459,0.999549,241.566422,138.927628,0.999970
130,130,24.000473,11.739753,0.998812,61.215580,40.943035,0.998110,85.339905,73.733307,0.994646,...,0.998459,173.242691,150.525726,0.999405,207.471832,147.204727,0.999755,234.311005,139.556335,0.999943


Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
            ...
            122, 123, 124, 125, 126, 127, 128, 129, 130, 131],
           dtype='int64', length=132)

In [59]:
pointorder = ['snout', 'gill', 'mid1', 'mid2', 'anus', 'mid4',  'tailbase', 'tailtip']

In [17]:
x = midline.xs("x", level="coords", axis=1)
y = midline.xs("y", level="coords", axis=1)

In [28]:
x = x.stack()
y = y.stack()

In [81]:
x

Unnamed: 0_level_0,scorer,DLC_resnet50_LesionsOct13shuffle1_1030000
Unnamed: 0_level_1,bodyparts,Unnamed: 2_level_1
0,anus,975.182983
0,gill,851.992126
0,mid1,896.400146
0,mid2,936.800476
0,mid4,1012.632080
...,...,...
131,mid2,102.224045
131,mid4,167.713318
131,snout,17.691586
131,tailbase,201.805969


In [84]:
x.loc[(slice(None), pointorder), slice(None)]

Unnamed: 0_level_0,scorer,DLC_resnet50_LesionsOct13shuffle1_1030000
Unnamed: 0_level_1,bodyparts,Unnamed: 2_level_1
0,snout,805.507202
1,snout,799.924133
2,snout,794.135559
3,snout,787.785400
4,snout,782.153931
...,...,...
127,tailtip,255.910355
128,tailtip,251.012695
129,tailtip,241.566422
130,tailtip,234.311005


In [77]:
x.index

MultiIndex([(  0,     'anus'),
            (  0,     'gill'),
            (  0,     'mid1'),
            (  0,     'mid2'),
            (  0,     'mid4'),
            (  0,    'snout'),
            (  0, 'tailbase'),
            (  0,  'tailtip'),
            (  1,     'anus'),
            (  1,     'gill'),
            ...
            (130, 'tailbase'),
            (130,  'tailtip'),
            (131,     'anus'),
            (131,     'gill'),
            (131,     'mid1'),
            (131,     'mid2'),
            (131,     'mid4'),
            (131,    'snout'),
            (131, 'tailbase'),
            (131,  'tailtip')],
           names=[None, 'bodyparts'], length=1056)

In [90]:
idx = pd.IndexSlice
xy = midline.loc[:, idx[:, ["x", "y"]]].stack(0)

In [94]:
xy = xy.reset_index()
xy

coords,level_0,bodyparts,x,y
0,0,anus,975.182983,241.875992
1,0,gill,851.992126,275.009308
2,0,mid1,896.400146,270.211273
3,0,mid2,936.800476,252.487213
4,0,mid4,1012.632080,246.722748
...,...,...,...,...
1051,131,mid2,102.224045,102.421700
1052,131,mid4,167.713318,151.997925
1053,131,snout,17.691586,10.456287
1054,131,tailbase,201.805969,149.854202


In [96]:
xy = xy.rename(columns={"level_0": "frame"})

In [105]:
xy["bodyparts"] = xy["bodyparts"].astype("category").cat.reorder_categories(pointorder)

In [111]:
xy = xy.sort_values(['frame', 'bodyparts'])

In [89]:
xy = xy.loc[(slice(None), pointorder),]

In [120]:
def flatten_DLC_points(df):
    scorer = df.columns.levels[0][-1]
    df = df.xs(shufflename, level="scorer", axis=1)

    return df

In [121]:
flatten_DLC_points(midline)

KeyError: 'Level scorer not found'

In [117]:
px.line(xy, x="x", y="y", color='frame')

In [None]:
px.line()