# Applying Complex Orthogonal Decomposition to Lampreys swimming in fluid environment of various viscosity
# Part I - Pre-process
## In this notebook, we pre-process the original dataset, to obtain quantities such as center of mass and swimming velocity. This prepares the dataset to be analyzed by using complex orthogonal decomposition.
### Author: Yuexia Luna Lin (luna.lin@epfl.ch)
### Data provided by Prof. Eric Tytell.

# Start by loading some necessary libraries, files. Pre-process swimming dataset to compute swimming speed, body axis, etc.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.animation import FuncAnimation
from matplotlib import rc
import zipfile

import os
from os import listdir
from os.path import isfile, isdir, join
from scipy.interpolate import interp1d
from scipy.signal import hilbert
from scipy.fft import rfft, irfft
from scipy.linalg import eigh
from scipy.optimize import curve_fit, brute, minimize

# The following two lines is to be able toeasily convert
# comma decimal place to point decimal place
import locale
locale.setlocale(locale.LC_NUMERIC, "fr_CH.ISO8859-15")
import time

# To read Eric's h5 file, we can't use Pandas since it requires a particular structure within the HDF5 file.
# So we need this library
import h5py

#%matplotlib inline
%matplotlib notebook

# Read in dataset and pre-process it.

The original h5 file cannot be directly read in by Pandas. We use h5py to read it in, then we convert it to Pandas DataFrame for easier manipulation.

We also add columns for center of mass (comx, comy), body orientation (bodyaxisx, bodyaxisy), tracker coordinates in the body frame (bodycoordx, bodycoordy), and swimming speed (velx, vely).

For convenience, we will save the processed dataset as a h5 file named "processed_midlien_all.h5'.

# 1. We first compute center of mass and body axis. Then we rotate  the body cooridnates so that the body is algned with the $x$-axis.

For computing body axis, we tried using both least squares fit for the midline data, or a principal component analysis (notice it is exactly POD). After inspecting several examples, we conclude that the two approaches are extrememly similar.

PCA has the benefit that when the fish's body becomes more vertical, the results remain stable and consistent.
So we choose this.

In [2]:
######################### ########################## ########################## ########################## 
## Uncomment the code below if need to process "midline_all.h5" for the first time
######################### ########################## ########################## ########################## 

# A simple PCA function
def PCA(df):
    """ This function takes in a Pandas DataFrame that contains the (x,y)
    data, labeled as 'mxmm', 'mymm' respectively.
    It then calculates the two principal components of this set of data points."""
    
    x= df['mxmm'].tolist()
    y= df['mymm'].tolist()
    comx = np.mean(x)
    comy = np.mean(y)
    standard_x = (x - comx)
    standard_y = (y - comy)
    
    # perform PCA
    D = np.vstack([standard_x, standard_y]).T
    Corr = D.T @ D
    U, S, Vh = np.linalg.svd(Corr)
    pca = U[:, np.argmax(S)]
    # If the principal vector is pointing to the tail, we flip it around
    if (np.dot(pca, D[0,:])) < 0:
        pca = -pca
    return comx, comy, pca



# Open a file handlder for the HDF5 file
h5file = h5py.File("../Data/midlines_all.h5", 'r')

# In the h5file, there is the dataset named 'data'
# To convert it to pandas DataFrame, we first need to manipulated into Numpy NDArray
# To do that, we first cast the dataset into ndarray, but it contains numpy.void objects
# We then convert it to list, and cast the whole thing to ndarray again

dat = np.array( (np.array(h5file['data'])).tolist()  )
column_names = ['filename', 'date', 'indiv', 'trial', 't', 'frame', 'point', 'mxmm', 'mymm', 'viscosity', 'len']

data_frame = pd.DataFrame(dat, columns=column_names)
data_frame['filename']= data_frame['filename'].astype('int')
data_frame['date']    = data_frame['date'].astype('int')
data_frame['indiv']   = data_frame['indiv'].astype('int')
data_frame['trial']   = data_frame['trial'].astype('int')
data_frame['frame']   = data_frame['frame'].astype('int')
data_frame['point']   = data_frame['point'].astype('int')
data_frame['viscosity'] = data_frame['viscosity'].astype('int')

complete_col_names = column_names.copy()
new_fields = ['comx', 'comy', 'bodyaxisx', 'bodyaxisy', 'bodycoordx', 'bodycoordy']
for n in new_fields:
    complete_col_names.append(n)
    
data_frame = data_frame.reindex(columns = complete_col_names)

# Some of the fields need a bit massaging:
# 1. frames ought to start at 0
# 2. we add columns of center of mass

individuals = data_frame.loc[:,'indiv'].unique().tolist()

for ind in individuals:
    trials = data_frame.loc[data_frame['indiv'] == ind, 'trial'].unique().tolist()
    for tr in trials:
        print("working on individual {}, trial {}".format(ind, tr))
        
        # Clanky expression to avoid making copies of the data frame
        # i.e. to avoid writing on the copy rather than the original
        min_frame = data_frame.loc[(data_frame['indiv'] == ind)\
                                  &(data_frame['trial'] == tr)  \
                                   , 'frame'].min()
        
        # Subtract the minimum frame number so that the frame number starts at 0
        data_frame.loc[(data_frame['indiv'] == ind)\
                      &(data_frame['trial'] == tr)  \
                      , 'frame'] = \
        data_frame.loc[(data_frame['indiv'] == ind)\
                      &(data_frame['trial'] == tr)  \
                      , 'frame'].subtract(min_frame)
        
        # Get a list of frames as a numpy ndarray
        frames = data_frame.loc[(data_frame['indiv'] == ind)\
                               &(data_frame['trial'] == tr)  \
                               , 'frame'].unique().tolist()
        
        for fr in frames:
            # apply PCA to each frame in each trial
            comx, comy, pca = PCA (\
                data_frame.loc[(data_frame['indiv'] == ind)\
                              &(data_frame['trial'] == tr)  \
                              &(data_frame['frame'] == fr)]
                                  )
            # set the values of comx, comy, bodyaxisx, bodyaxisy of this frame
            data_frame.loc[(data_frame['indiv'] == ind)\
                           &(data_frame['trial'] == tr)  \
                           &(data_frame['frame'] == fr), 'comx'] = comx
            data_frame.loc[(data_frame['indiv'] == ind)\
                           &(data_frame['trial'] == tr)  \
                           &(data_frame['frame'] == fr), 'comy'] = comy
            data_frame.loc[(data_frame['indiv'] == ind)\
                           &(data_frame['trial'] == tr)  \
                           &(data_frame['frame'] == fr), 'bodyaxisx'] = pca[0]
            data_frame.loc[(data_frame['indiv'] == ind)\
                           &(data_frame['trial'] == tr)  \
                           &(data_frame['frame'] == fr), 'bodyaxisy'] = pca[1]

# We can apply the following calcuation to the entire dataframe:
# calcuation the projection of (x, y) in the lab frame onto the body frame
data_frame.loc[:, 'bodycoordx'] = (data_frame['mxmm'] - data_frame['comx']) * data_frame['bodyaxisx'] \
                                   + (data_frame['mymm'] - data_frame['comy']) * data_frame['bodyaxisy']
data_frame.loc[:, 'bodycoordy'] = - (data_frame['mxmm'] - data_frame['comx'])  * data_frame['bodyaxisy'] \
                                   + (data_frame['mymm'] - data_frame['comy'])  * data_frame['bodyaxisx']   

working on individual 1, trial 1
working on individual 1, trial 10
working on individual 1, trial 11
working on individual 1, trial 12
working on individual 1, trial 13
working on individual 1, trial 14
working on individual 1, trial 15
working on individual 1, trial 16
working on individual 1, trial 17
working on individual 1, trial 18
working on individual 1, trial 19
working on individual 1, trial 2
working on individual 1, trial 20
working on individual 1, trial 21
working on individual 1, trial 22
working on individual 1, trial 23
working on individual 1, trial 24
working on individual 1, trial 25
working on individual 1, trial 3
working on individual 1, trial 4
working on individual 1, trial 5
working on individual 1, trial 6
working on individual 1, trial 7
working on individual 1, trial 8
working on individual 1, trial 9
working on individual 1, trial 28
working on individual 1, trial 29
working on individual 1, trial 30
working on individual 1, trial 31
working on individual 1

In [None]:
# ## HERE WE JUST DOUBLE CHECKING that PCA and least squares give similar results
# ind = 3
# tr = 3
# frame = 30
# x= data_frame.loc[(data_frame['indiv']==ind) & (data_frame['trial'] == tr) & (data_frame['frame']== frame)]['mxmm'].tolist()
# y= data_frame.loc[(data_frame['indiv']==ind) & (data_frame['trial'] == tr) & (data_frame['frame']== frame)]['mymm'].tolist()
# standard_x = (x - np.mean(x))
# standard_y = (y - np.mean(y))

# A = np.vstack([standard_x, np.ones_like(x)]).T
# soln, res, rank, sv = np.linalg.lstsq(A, standard_y, rcond=None)

# B = np.vstack([standard_x, standard_y]).T
# C = B.T @ B
# U, S, Vh = np.linalg.svd(C)
# print(U)
# plt.figure()

# plt.plot(standard_x, standard_y,'bv-', label = 'centered actual data')
# plt.plot(standard_x[0], standard_y[0], 'rs')
# plt.plot(standard_x, A@soln, 'ro', label = 'LSQ, slope {:.3f}'.format(soln[0]))
# expanse  = np.max( np.linalg.norm(B, axis=1) )
# plt.plot([-expanse*U[0,0], expanse*U[0,0]], [-expanse*U[1,0], expanse*U[1,0]], 'g-', \
#          label='First principal component, slope {:.3f}'.format(U[1,0] / U[0,0] ))
# plt.legend()

# 2. Then we compute swimming velocity (center of mass translating velocity)


At the first frame and the last frame, we use forward and backward first order finite difference. In between, we used centered difference (second order accuracy in time). 
### Note we don't process any trial that has skipped frames

In [7]:
def swim_velx_first(df, ind, tr, frame):
    """First order finite difference to obtain COM velocity, x component,
    at the end points of a series of frames.
    Frame i=0: dx/dt_i = (dx(i+1) - dx(i))/dt,
    Frame i = last: dx/dt_i = (dx(i+1) - dx(i))/dt."""
    
    # Forward finite difference (FD)
    if frame >= 0:
        dt = data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame+1), 't'].unique()[0] \
        - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame), 't'].unique()[0]

        return (data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame+1), 'comx'].unique()[0] \
        - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame), 'comx'].unique()[0]) / dt

    # Backward FD
    else:
        num_frames = len( data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr), 'frame'].unique() )
        frame = frame%num_frames
        dt = data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame), 't'].unique()[0] \
        - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame-1), 't'].unique()[0]

        return (data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame), 'comx'].unique()[0] \
        - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame-1), 'comx'].unique()[0]) / dt

    
def swim_vely_first(df, ind, tr, frame):
    """First order finite difference to obtain COM velocity, y component,
    at the end points of a series of frames.
    Frame i=0: dx/dt_i = (dx(i+1) - dx(i))/dt,
    Frame i = last: dx/dt_i = (dx(i+1) - dx(i))/dt."""
    
    # Forward FD
    if frame >= 0:
        dt = data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame+1), 't'].unique()[0] \
        - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame), 't'].unique()[0]

        return (data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame+1), 'comy'].unique()[0] \
        - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame), 'comy'].unique()[0]) / dt

    # Backward FD
    else:
        num_frames = len( data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr), 'frame'].unique() )
        frame = frame%num_frames
        dt = data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame), 't'].unique()[0] \
        - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame-1), 't'].unique()[0]

        return (data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame), 'comy'].unique()[0] \
        - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
           & (data_frame['frame'] == frame-1), 'comy'].unique()[0]) / dt

In [8]:
def swim_velx_sec(df, ind, tr, frame):
    """Centered second order finite difference to obtain COM velocity, x component,
    in the middle of a series of frames.
    # dx/dt_i = (dx(i+1) - dx(i-1))/(2dt)."""

    tdt = data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
       & (data_frame['frame'] == frame+1), 't'].unique()[0] \
    - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
       & (data_frame['frame'] == frame-1), 't'].unique()[0]


    return (data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
       & (data_frame['frame'] == frame+1), 'comx'].unique()[0] \
    - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
       & (data_frame['frame'] == frame-1), 'comx'].unique()[0]) / tdt

    
def swim_vely_sec(df, ind, tr, frame):
    """Centered second order finite difference to obtain COM velocity, y component,
    in the middle of a series of frames.
    # dx/dt_i = (dx(i+1) - dx(i-1))/(2dt)."""


    tdt = data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
       & (data_frame['frame'] == frame+1), 't'].unique()[0] \
    - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
       & (data_frame['frame'] == frame-1), 't'].unique()[0]

    return (data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
       & (data_frame['frame'] == frame+1), 'comy'].unique()[0] \
    - data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
       & (data_frame['frame'] == frame-1), 'comy'].unique()[0]) / tdt

In [9]:
# HERE we calculate the COM moving velocity using first order finite difference at the borders\
# and second order centered different in the interior points 
individuals = data_frame.loc[:,'indiv'].unique().tolist()

for ind in individuals:
    trials = data_frame.loc[data_frame['indiv'] == ind, 'trial'].unique().tolist()
    for tr in trials:
        print("working on individual {}, trial {}".format(ind, tr))
        
        trial_df = data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr)]
        tmp_fr_diff = trial_df['frame'].unique() - np.roll(trial_df['frame'].unique(), 1 )
        # if the difference between two consecutive frame numbers is > 1 (skipped some frames)
        # we ignore this trial
        if( np.any( tmp_fr_diff[1:] > 1) ) : continue
        
        # The following 4 assignments calculates swimvelx, swimvely at the end points using first order FD
        max_frame = trial_df['frame'].max()
        data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
                       & (data_frame['frame'] == 0), 'swimvelx'] \
                        = swim_velx_first(trial_df, ind, tr, 0)
        
        data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr)\
                       & (data_frame['frame'] == max_frame), 'swimvelx'] \
                        = swim_velx_first(trial_df, ind, tr, -1)
        
        data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
                       & (data_frame['frame'] == 0), 'swimvely'] \
                        = swim_vely_first(trial_df, ind, tr, 0)
        
        data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
                       & (data_frame['frame'] == max_frame), 'swimvely'] \
                        = swim_vely_first(trial_df, ind, tr, -1)
        
        # Then we apply second order FD to in-between frames
        for f in range(1,max_frame):
            data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
                       & (data_frame['frame'] == f), 'swimvelx'] \
                        = swim_velx_sec(trial_df, ind, tr, f)
        
            data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) \
                       & (data_frame['frame'] == f), 'swimvely'] \
                        = swim_vely_sec(trial_df, ind, tr, f)
        data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr), 'swimvel']\
                =  np.sqrt( \
                np.array(data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) , 'swimvelx'])**2\
              + np.array(data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr) , 'swimvely'])**2)


working on individual 1, trial 1
working on individual 1, trial 10
working on individual 1, trial 11
working on individual 1, trial 12
working on individual 1, trial 13
working on individual 1, trial 14
working on individual 1, trial 15
working on individual 1, trial 16
working on individual 1, trial 17
working on individual 1, trial 18
working on individual 1, trial 19
working on individual 1, trial 2
working on individual 1, trial 20
working on individual 1, trial 21
working on individual 1, trial 22
working on individual 1, trial 23
working on individual 1, trial 24
working on individual 1, trial 25
working on individual 1, trial 3
working on individual 1, trial 4
working on individual 1, trial 5
working on individual 1, trial 6
working on individual 1, trial 7
working on individual 1, trial 8
working on individual 1, trial 9
working on individual 1, trial 28
working on individual 1, trial 29
working on individual 1, trial 30
working on individual 1, trial 31
working on individual 1

# Next, we compute
- heading angle, i.e. body orientation computed by using body axis ('theta') 
- swimming angle, i.e. angle computed by using COM velocity ('com_vel_theta')
- time derivatives in these angles, 'd_theta', 'come_vel_d_theta', for heading and swimming angles, respectively

In [10]:
individuals = data_frame.loc[:,'indiv'].unique().tolist()

for ind in individuals:
    trials = data_frame.loc[data_frame['indiv'] == ind, 'trial'].unique().tolist()
    for tr in trials:
        print("working on individual {}, trial {}".format(ind, tr))
        
        trial_df = data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr)]

        # Compute the heading angle based on body axis
        data_frame.loc[(data_frame['indiv'] == ind) &\
                      (data_frame['trial'] == tr), 'theta'] \
                        = np.arctan( np.array(trial_df['bodyaxisy'])/np.array(trial_df['bodyaxisx'])\
                        * np.sign(trial_df['bodyaxisx']) )

        trial_df = data_frame.loc[(data_frame['indiv'] == ind) &\
                                              (data_frame['trial'] == tr)]

        frame = np.array(trial_df.loc[:,'frame'].unique()).copy()
        
        # Compute the changes in heading angle using forward finite difference
        data_frame.loc[(data_frame['indiv'] == ind) &\
                      (data_frame['trial'] == tr), 'd_theta'] = np.nan

        data_frame.loc[(data_frame['indiv'] == ind) &\
                      (data_frame['trial'] == tr) &\
                      (data_frame['frame']<np.max(frame)), 'd_theta'] \
                        = (np.array(trial_df[trial_df['frame']>0]['theta'])\
                        - np.array(trial_df[trial_df['frame']<np.max(frame)]['theta']))/0.02

        # The last frame we used backward finite difference
        data_frame.loc[(data_frame['indiv'] == ind) &\
                      (data_frame['trial'] == tr) &\
                      (data_frame['frame']==np.max(frame)), 'd_theta'] \
        = np.array( data_frame.loc[(data_frame['indiv'] == ind) &\
                      (data_frame['trial'] == tr) &\
                      (data_frame['frame']==np.max(frame)-1), 'd_theta'])

        # Compute the swimming angle
        data_frame.loc[(data_frame['indiv'] == ind) &\
                      (data_frame['trial'] == tr), 'com_vel_theta'] \
                        = np.arctan( np.array(trial_df['swimvely'])/np.array(trial_df['swimvelx'])\
                                            * np.sign(trial_df['swimvelx'])\
                                           )
        trial_df = data_frame.loc[(data_frame['indiv'] == ind) & (data_frame['trial'] == tr)]

        # Compute the changes in swimming angle, similar to what we did with heading angle
        data_frame.loc[(data_frame['indiv'] == ind) &\
                      (data_frame['trial'] == tr),'com_vel_d_theta'] = np.nan
        data_frame.loc[(data_frame['indiv'] == ind) &\
                      (data_frame['trial'] == tr) & \
                    (data_frame['frame']<np.max(frame)), 'com_vel_d_theta'] \
                    = (np.array(trial_df[trial_df['frame']>0]['com_vel_theta'])\
                             - np.array(trial_df[trial_df['frame']<np.max(frame)]['com_vel_theta']))/0.02
        data_frame.loc[(data_frame['indiv'] == ind) &\
                      (data_frame['trial'] == tr) & \
                    (data_frame['frame']==np.max(frame)), 'com_vel_d_theta'] \
                    = np.array( data_frame.loc[(data_frame['indiv'] == ind) &\
                      (data_frame['trial'] == tr) & \
                    (data_frame['frame']==np.max(frame)-1), 'com_vel_d_theta'])


working on individual 1, trial 1
working on individual 1, trial 10
working on individual 1, trial 11
working on individual 1, trial 12
working on individual 1, trial 13
working on individual 1, trial 14
working on individual 1, trial 15
working on individual 1, trial 16
working on individual 1, trial 17
working on individual 1, trial 18
working on individual 1, trial 19
working on individual 1, trial 2
working on individual 1, trial 20
working on individual 1, trial 21
working on individual 1, trial 22
working on individual 1, trial 23
working on individual 1, trial 24
working on individual 1, trial 25
working on individual 1, trial 3
working on individual 1, trial 4
working on individual 1, trial 5
working on individual 1, trial 6
working on individual 1, trial 7
working on individual 1, trial 8
working on individual 1, trial 9
working on individual 1, trial 28
working on individual 1, trial 29
working on individual 1, trial 30
working on individual 1, trial 31
working on individual 1

# Save the dataset for future use!

In [11]:
data_frame.head(30)

Unnamed: 0,filename,date,indiv,trial,t,frame,point,mxmm,mymm,viscosity,...,bodyaxisy,bodycoordx,bodycoordy,swimvelx,swimvely,swimvel,theta,d_theta,com_vel_theta,com_vel_d_theta
0,1,15868,1,1,0.58,0,1,341.299771,88.110667,1,...,-0.311522,73.79677,-7.270154,-206.90573,-68.994977,218.106139,-0.316794,0.148989,-0.321865,-0.842216
1,1,15868,1,1,0.58,0,2,349.324554,90.105424,1,...,-0.311522,65.549898,-6.665755,-206.90573,-68.994977,218.106139,-0.316794,0.148989,-0.321865,-0.842216
2,1,15868,1,1,0.58,0,3,357.545449,91.064575,1,...,-0.311522,57.439287,-5.01619,-206.90573,-68.994977,218.106139,-0.316794,0.148989,-0.321865,-0.842216
3,1,15868,1,1,0.58,0,4,365.759349,91.948042,1,...,-0.311522,49.3589,-3.296886,-206.90573,-68.994977,218.106139,-0.316794,0.148989,-0.321865,-0.842216
4,1,15868,1,1,0.58,0,5,374.046024,91.78167,1,...,-0.311522,41.536407,-0.557311,-206.90573,-68.994977,218.106139,-0.316794,0.148989,-0.321865,-0.842216
5,1,15868,1,1,0.58,0,6,382.245394,90.91437,1,...,-0.311522,34.015228,2.821113,-206.90573,-68.994977,218.106139,-0.316794,0.148989,-0.321865,-0.842216
6,1,15868,1,1,0.58,0,7,390.32937,89.114436,1,...,-0.311522,26.894238,7.049815,-206.90573,-68.994977,218.106139,-0.316794,0.148989,-0.321865,-0.842216
7,1,15868,1,1,0.58,0,8,398.584686,88.492048,1,...,-0.311522,19.243602,10.212945,-206.90573,-68.994977,218.106139,-0.316794,0.148989,-0.321865,-0.842216
8,1,15868,1,1,0.58,0,9,406.743469,89.811668,1,...,-0.311522,11.079718,11.500629,-206.90573,-68.994977,218.106139,-0.316794,0.148989,-0.321865,-0.842216
9,1,15868,1,1,0.58,0,10,414.012944,93.737974,1,...,-0.311522,2.948849,10.0343,-206.90573,-68.994977,218.106139,-0.316794,0.148989,-0.321865,-0.842216


In [12]:
# We save this processed dataset for future use!
data_frame.to_hdf("../Data/processed_midline_all.h5", "data")