In [44]:
from progress.bar import Bar
import pandas as pd
import datetime
import numpy as np
import pytz
import scipy
import matplotlib.pyplot as plt
from collections import OrderedDict
from numpy import linalg as LA
import time
from matplotlib import style
from scipy import signal
from scipy import fftpack
import pywt


style.use('dark_background')

# Accelerometer Data Feature Extraction

### 1) Reading John Doe's Accelerometer data 

In [2]:
df_acc = pd.read_csv("../../../data/accelerometer/835b51bd-ee31-49e8-a653-cb75a7e4c98e.csv", header=None)
df_acc.columns = ['_id1', '_id2', 'timestamp', 'device_id', 'double_x', 'double_y', 'double_z', 'accuracy', 'label']
df_acc.device_id='John Doe'
df_acc = df_acc.sort_values(by=['timestamp']).reset_index(drop=True)[['timestamp', 'device_id', 'double_x', 'double_y', 'double_z', 'accuracy', 'label']]
df_acc = df_acc.sort_values(by=['timestamp'])
df_acc_sample = df_acc[0:100000]

In [3]:
def ecdfRep(data, components):
    #
    #   rep = ecdfRep(data, components)
    #
    #   Estimate ecdf-representation according to 
    #     Hammerla, Nils Y., et al. "On preserving statistical characteristics of 
    #     accelerometry data using their empirical cumulative distribution." 
    #     ISWC. ACM, 2013.
    #
    #   Input: 
    #       data        Nxd     Input data (rows = samples).
    #       components  int     Number of components to extract per axis.
    #
    #   Output:
    #       rep         Mx1     Data representation with M = d*components+d
    #                           elements.
    #
    #   Nils Hammerla '15
    #
        m = data.mean(0)
        data = np.sort(data, axis=0)
        data = data[np.int32(np.around(np.linspace(0,data.shape[0]-1,num=components))),:]
        data = data.flatten(1)
        return np.hstack((data, m))

In [4]:
"""
modes 
0 - time domain only
1 - time + frequency domain
2 - time + frequency + stats
3 - statistical methods only3
"""
def featurize_window(df_fw, feature_list, mode, window_size_in_minutes):
    local_dict = OrderedDict()
    
    
    if mode > 0 and mode < 3:
        if df_fw.index.size >= (30*window_size_in_minutes):
            df_fw.double_x = df_fw.double_x.replace({0:1e-08})
            df_fw.double_y = df_fw.double_y.replace({0:1e-08})
            df_fw.double_z = df_fw.double_z.replace({0:1e-08})
            f_x = scipy.interpolate.interp1d(df_fw.timestamp, df_fw.double_x)
            f_y = scipy.interpolate.interp1d(df_fw.timestamp, df_fw.double_y)
            f_z = scipy.interpolate.interp1d(df_fw.timestamp, df_fw.double_z)
            r = (np.sqrt(df_fw.double_x**2 + df_fw.double_y**2 + df_fw.double_z**2)).replace({0:1e-08})
            f_r = scipy.interpolate.interp1d(df_fw.timestamp, r)
            xnew = []
            step = (df_fw.timestamp.iloc[-1] - df_fw.timestamp.iloc[0]) /df_fw.index.size
            for ti in range(df_fw.timestamp.iloc[0], df_fw.timestamp.iloc[-1], int(step)):
                xnew.append(ti)
            
            f_fs = window_size_in_minutes * 60 / df_fw.index.size
            L = 512 # change it to 512
            local_dict.update({'skip_fft':False, 'fx': f_x(xnew), 'fy': f_y(xnew), 'fz': f_z(xnew), 'fr': f_r(xnew), 'fs': f_fs, 'L': L})
        else:
            local_dict.update({'skip_fft':True})
        if df_fw.index.size == 0:
            local_dict['skip_td'] = True
        else:
            local_dict['skip_td'] = False
            
    if mode == 0:
        local_dict['skip_fft'] = True
        if df_fw.index.size == 0:
            local_dict['skip_td'] = True
        else:
            local_dict['skip_td'] = False
    if mode == 3:
        local_dict['skip_fft'] = True
        local_dict['skip_td'] = True

        
    feat_dict = {}

    

    for feature in feature_list:
        if feature == 'int_desc':
            if not local_dict['skip_td']:
                int_desc = np.sqrt((df_fw.double_x ** 2).describe() + (df_fw.double_y **2).describe() + (df_fw.double_z ** 2).describe())
                feat_dict.update({'int_mean': int_desc[1], 'int_std': int_desc[2], 
                                  'int_min': int_desc[3],'int_25': int_desc[4], 'int_50': int_desc[5],'int_75': int_desc[6]})
            else:
                feat_dict.update({'int_mean': np.nan, 'int_std': np.nan, 
                                  'int_min': np.nan,'int_25': np.nan, 'int_50': np.nan,'int_75': np.nan})
        elif feature == 'int_rms':
            if not local_dict['skip_td']:
                int_rms = np.sqrt((df_fw.double_x**2).sum() + (df_fw.double_y**2).sum() + (df_fw.double_z**2).sum()) / np.sqrt(df_fw.index.size)
                feat_dict.update({'int_rms':int_rms})
            else:
                feat_dict.update({'int_rms': np.nan})
        elif feature == 'mag_desc':
            if not local_dict['skip_td']:
                mag_desc = np.sqrt(df_fw.double_x**2 + df_fw.double_y**2 + df_fw.double_z**2).describe()
                feat_dict.update({'mag_mean': mag_desc[1], 'mag_std': mag_desc[2], 'mag_min': mag_desc[3],
                                  'mag_25': mag_desc[4], 'mag_50': mag_desc[5],'mag_75': mag_desc[6]})
            else:
                feat_dict.update({'mag_mean': np.nan, 'mag_std': np.nan, 'mag_min': np.nan,
                  'mag_25': np.nan, 'mag_50': np.nan,'mag_75': np.nan})
        elif feature == 'pear_coef':
            if not local_dict['skip_td']:
                cov_matrix =  np.cov(np.stack((df_fw.double_x,df_fw.double_y, df_fw.double_z), axis=0))
                pear_coef_xy = cov_matrix[0,1] / (df_fw.double_x.std() * df_fw.double_y.std())
                pear_coef_yz = cov_matrix[1,2] / (df_fw.double_y.std() * df_fw.double_z.std())
                pear_coef_xz = cov_matrix[0,2] / (df_fw.double_x.std() * df_fw.double_z.std())
                feat_dict.update({'pear_coef_xy':pear_coef_xy, 'pear_coef_yz':pear_coef_yz,'pear_coef_xz':pear_coef_xz })
            else:
                feat_dict.update({'pear_coef_xy':np.nan, 'pear_coef_yz':np.nan,'pear_coef_xz':np.nan})
        elif feature == 'sma':
            if not local_dict['skip_td']:
                sma = (np.abs(df_fw.double_x.to_numpy()).sum() + np.abs(df_fw.double_y.to_numpy()).sum() + np.abs(df_fw.double_z.to_numpy()).sum()) / df_fw.index.size
                feat_dict.update({'sma':sma})
            else:
                feat_dict.update({'sma':np.nan})
        elif feature == 'svm':
            if not local_dict['skip_td']:
                svm = np.sqrt(df_fw.double_x**2 + df_fw.double_y**2 + df_fw.double_z**2).sum() / df_fw.index.size
                feat_dict.update({'svm':svm})
            else:
                feat_dict.update({'svm':np.nan})
        elif feature == 'fft':
            if not local_dict['skip_fft']:
                L = local_dict['L']
                dfx = fftpack.fft(local_dict['fx'], 512)
                dfy = fftpack.fft(local_dict['fy'], 512)
                dfz = fftpack.fft(local_dict['fz'], 512)
                dfr = fftpack.fft(local_dict['fr'], 512)
                # DC component
                # Remove the L part!
                feat_dict.update({'fdc_x': np.mean(np.real(dfx)), 'fdc_y': np.mean(np.real(dfy)),
                                  'fdc_z':  np.mean(np.real(dfz)), 'fdc_r':  np.mean(np.real(dfr))})
                # Energy
                feat_dict.update({'feng_x': (np.sum(np.real(dfx)**2 + np.imag(dfx)**2)) / L, 'feng_y': (np.sum(np.real(dfy)**2 + np.imag(dfy)**2)) / L,
                                  'feng_z':  (np.sum(np.real(dfz)**2 + np.imag(dfz)**2)) / L, 'feng_r':  (np.sum(np.real(dfr)**2 + np.imag(dfr)**2)) / L})
                # Entropy
                ck_x = np.sqrt(np.real(dfx)**2  + np.imag(dfx)**2)
                cj_x = ck_x / np.sum(ck_x)
                e_x = np.sum(cj_x * np.log(cj_x))
                
                ck_y = np.sqrt(np.real(dfy)**2  + np.imag(dfy)**2)
                cj_y = ck_y / np.sum(ck_y)
                e_y = np.sum(cj_y * np.log(cj_y))
                
                ck_z = np.sqrt(np.real(dfz)**2  + np.imag(dfz)**2)
                cj_z = ck_z / np.sum(ck_z)
                e_z = np.sum(cj_z * np.log(cj_z))
                
                ck_r = np.sqrt(np.real(dfr)**2  + np.imag(dfr)**2)
                cj_r = ck_r / np.sum(ck_r)
                e_r = np.sum(cj_r * np.log(cj_r))
                
                feat_dict.update({'fent_x': e_x, 'fent_y':  e_y,'fent_z':  e_z, 'fent_r': e_r})
                
                # Correlation
                # Fix the length, should be FFT wndow size 512
            
                fcorr_xy = np.dot(np.real(dfx) / L, np.real(dfy) / L)
                fcorr_xz = np.dot(np.real(dfx) / L, np.real(dfz) / L)
                fcorr_yz = np.dot(np.real(dfy) / L, np.real(dfz) / L)
                
                feat_dict.update({'fcorr_xy': fcorr_xy,'fcorr_xz':  fcorr_xz, 'fcorr_yz': fcorr_yz})
                
            else:
                feat_dict.update({'fdc_x': np.nan, 'fdc_y':  np.nan,'fdc_z':  np.nan, 'fdc_r': np.nan})
                feat_dict.update({'feng_x':  np.nan, 'feng_y':  np.nan, 'feng_z':   np.nan, 'feng_r':   np.nan})
                feat_dict.update({'fent_x': np.nan, 'fent_y':  np.nan,'fent_z':  np.nan, 'fent_r': np.nan})
                feat_dict.update({'fcorr_xy': np.nan,'fcorr_xz':  np.nan, 'fcorr_yz': np.nan})
        elif feature == 'psd':
            if not local_dict['skip_fft']:
                fs = local_dict['fs']
                psd_window = signal.get_window('boxcar', len(local_dict['fx'])) # do not pass this window
                freqs_x, pxx_denx = signal.periodogram(local_dict['fx'], window=psd_window, fs=fs)
                freqs_y, pxx_deny = signal.periodogram(local_dict['fy'], window=psd_window, fs=fs)
                freqs_z, pxx_denz = signal.periodogram(local_dict['fz'], window=psd_window, fs=fs)
                freqs_r, pxx_denr = signal.periodogram(local_dict['fr'], window=psd_window, fs=fs)
                feat_dict.update({'psd_mean_x': np.mean(pxx_denx), 'psd_mean_y': np.mean(pxx_deny),
                                  'psd_mean_z': np.mean(pxx_denz), 'psd_mean_r': np.mean(pxx_denr)})
                
                feat_dict.update({'psd_max_x': np.max(pxx_denx), 
                                  'psd_max_y': np.max(pxx_deny),
                                  'psd_max_z': np.max(pxx_denz), 
                                  'psd_max_r': np.max(pxx_denr)})
                
                
                freqs_05_3_x = np.argwhere((freqs_x >= 0.5) & (freqs_x <= 3))
                freqs_05_3_y = np.argwhere((freqs_y >= 0.5) & (freqs_y <= 3))
                freqs_05_3_z = np.argwhere((freqs_z >= 0.5) & (freqs_z <= 3))
                freqs_05_3_r = np.argwhere((freqs_r >= 0.5) & (freqs_r <= 3))
                
                
                # max b/w 0.3 - 3Hz 
                # 0.5 - 3 Hz if missing, maybe not 0.0
                feat_dict.update({'psd_max_x_05_3': np.max(pxx_denx[freqs_05_3_x]) if freqs_05_3_x.any() else 0.0, 
                  'psd_max_y_05_3': np.max(pxx_deny[freqs_05_3_y]) if freqs_05_3_y.any() else 0.0,
                  'psd_max_z_05_3': np.max(pxx_denz[freqs_05_3_z]) if freqs_05_3_z.any() else 0.0,
                  'psd_max_r_05_3': np.max(pxx_denr[freqs_05_3_r]) if freqs_05_3_r.any() else 0.0})
            else:
                feat_dict.update({'psd_mean_x': np.nan, 'psd_mean_y':np.nan,
                                  'psd_mean_z': np.nan, 'psd_mean_r': np.nan})
                feat_dict.update({'psd_max_x': np.nan, 
                                  'psd_max_y': np.nan,
                                  'psd_max_z': np.nan, 
                                  'psd_max_r': np.nan})
        elif feature == 'lmbs':
            if not local_dict['skip_td']:
                lmb_f_05_3 = np.linspace(0.5, 3, 10000)
                lmb_psd_x = signal.lombscargle(df_fw.timestamp, df_fw.double_x, lmb_f_05_3, normalize=False)
                lmb_psd_y = signal.lombscargle(df_fw.timestamp, df_fw.double_y, lmb_f_05_3, normalize=False)
                lmb_psd_z = signal.lombscargle(df_fw.timestamp, df_fw.double_z, lmb_f_05_3, normalize=False)
                
                feat_dict.update({'lmb_psd_max_x_05_3': np.max(lmb_psd_x) if lmb_psd_x.any() else 0.0, 
                  'lmb_psd_max_y_05_3': np.max(lmb_psd_y) if lmb_psd_y.any() else 0.0,
                  'lmb_psd_max_z_05_3': np.max(lmb_psd_z) if lmb_psd_z.any() else 0.0})
            else:
                feat_dict.update({'lmb_psd_max_x_05_3': np.nan, 
                  'lmb_psd_max_y_05_3': np.nan,
                  'lmb_psd_max_z_05_3': np.nan})
                
                
    return feat_dict

In [5]:
# acc_features = ['int_desc', 'int_rms', 'mag_desc', 'pear_coef', 'sma', 'svm', 'ecdf_5', 'fft', 'psd', 'lmbs']
acc_features = ['int_desc', 'int_rms', 'mag_desc', 'pear_coef', 'sma', 'svm', 'ecdf_5', 'fft', 'psd']


def init_window_feature_df():
    return pd.DataFrame(columns= ['_window_id', 'start_time', 'end_time'])


def acc_windowizer(df_acc_sample, mode, window_size_in_minutes):
    window = 0
    window_start_time = df_acc_sample.timestamp.iloc[0]
    window_end_time = df_acc_sample.timestamp.iloc[-1]
    window_next_time = window_start_time + (datetime.timedelta(minutes=window_size_in_minutes).seconds * 10**3)
    window_id = 0
    window_start_index = 0
    df_acc_sample['window_id'] = -1
    df_feature_windows = init_window_feature_df()
    estimated_windows = (df_acc_sample.timestamp.iloc[-1] - df_acc_sample.timestamp.iloc[0]) / (window_size_in_minutes * 60000)
    print("Estimated no. of windows: ", estimated_windows)
    
    

    while window_next_time < window_end_time:
        window += 1
        print("window: ", window)
        print("Percentage: ", (window/estimated_windows) * 100, "%")
        window_indices = df_acc_sample.iloc[window_start_index:].timestamp < window_next_time
        df_acc_sample.window_id.iloc[window_start_index:][window_indices] = window_id
        feature_dict = featurize_window(df_acc_sample.iloc[window_start_index:][window_indices], acc_features, mode, window_size_in_minutes)
        feature_dict.update({'_window_id': window_id, 'start_time': window_start_time, 'end_time': window_next_time - 1, 'sample_count':window_indices[window_indices].index.size})
        df_feature_windows = df_feature_windows.append(feature_dict, ignore_index=True)
        window_start_time = window_next_time
        window_next_time = window_next_time + (datetime.timedelta(minutes=window_size_in_minutes).seconds * 10**3)
        window_id = window_id + 1
        window_start_index = window_indices[~window_indices].index[0]

    window_indices = df_acc_sample.iloc[window_start_index:].timestamp < window_next_time
    feature_dict = featurize_window(df_acc_sample.iloc[window_start_index:], acc_features, mode, window_size_in_minutes)
    df_acc_sample.window_id.iloc[window_start_index:][window_indices] = window_id
    feature_dict.update({'_window_id': window_id, 'start_time': window_start_time, 'end_time': window_next_time, 'sample_count':window_indices[window_indices].index.size})
    df_feature_windows = df_feature_windows.append(feature_dict, ignore_index=True)
    df_feature_windows.start_time = df_feature_windows.start_time.astype(np.int64)
    df_feature_windows.end_time = df_feature_windows.end_time.astype(np.int64)
    df_feature_windows.sample_count = df_feature_windows.sample_count.astype(np.int64)
    df_feature_windows._window_id = df_feature_windows._window_id.astype(np.int64)
    return df_feature_windows, df_acc_sample

In [9]:
df_feature_windows, df_acc_sample_e = acc_windowizer(df_acc_sample, 2, 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()


Estimated no. of windows:  425.50425
window:  1
Percentage:  0.2350152789308215 %
window:  2
Percentage:  0.470030557861643 %
window:  3
Percentage:  0.7050458367924645 %
window:  4
Percentage:  0.940061115723286 %
window:  5
Percentage:  1.1750763946541074 %
window:  6
Percentage:  1.410091673584929 %
window:  7
Percentage:  1.6451069525157502 %
window:  8
Percentage:  1.880122231446572 %
window:  9
Percentage:  2.1151375103773935 %
window:  10
Percentage:  2.350152789308215 %
window:  11
Percentage:  2.585168068239036 %
window:  12
Percentage:  2.820183347169858 %
window:  13
Percentage:  3.055198626100679 %
window:  14
Percentage:  3.2902139050315005 %
window:  15
Percentage:  3.525229183962322 %


  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  y_new = slope*(x_new - x_lo)[:, None] + y_lo


window:  16
Percentage:  3.760244462893144 %
window:  17
Percentage:  3.9952597418239653 %
window:  18
Percentage:  4.230275020754787 %
window:  19
Percentage:  4.465290299685608 %
window:  20
Percentage:  4.70030557861643 %
window:  21
Percentage:  4.935320857547251 %
window:  22
Percentage:  5.170336136478072 %
window:  23
Percentage:  5.405351415408894 %
window:  24
Percentage:  5.640366694339716 %
window:  25
Percentage:  5.875381973270537 %
window:  26
Percentage:  6.110397252201358 %
window:  27
Percentage:  6.3454125311321805 %
window:  28
Percentage:  6.580427810063001 %
window:  29
Percentage:  6.815443088993824 %
window:  30
Percentage:  7.050458367924644 %
window:  31
Percentage:  7.285473646855466 %
window:  32
Percentage:  7.520488925786288 %
window:  33
Percentage:  7.755504204717109 %
window:  34
Percentage:  7.9905194836479305 %
window:  35
Percentage:  8.225534762578752 %
window:  36
Percentage:  8.460550041509574 %
window:  37
Percentage:  8.695565320440394 %
window: 

window:  198
Percentage:  46.533025228302655 %
window:  199
Percentage:  46.76804050723348 %
window:  200
Percentage:  47.003055786164296 %
window:  201
Percentage:  47.23807106509511 %
window:  202
Percentage:  47.47308634402594 %
window:  203
Percentage:  47.70810162295676 %
window:  204
Percentage:  47.943116901887585 %
window:  205
Percentage:  48.17813218081841 %
window:  206
Percentage:  48.413147459749226 %
window:  207
Percentage:  48.64816273868004 %
window:  208
Percentage:  48.88317801761087 %
window:  209
Percentage:  49.11819329654169 %
window:  210
Percentage:  49.353208575472514 %
window:  211
Percentage:  49.58822385440334 %
window:  212
Percentage:  49.823239133334155 %
window:  213
Percentage:  50.05825441226498 %
window:  214
Percentage:  50.293269691195796 %
window:  215
Percentage:  50.52828497012661 %
window:  216
Percentage:  50.763300249057444 %
window:  217
Percentage:  50.99831552798826 %
window:  218
Percentage:  51.233330806919085 %
window:  219
Percentage: 

window:  381
Percentage:  89.54082127264299 %
window:  382
Percentage:  89.7758365515738 %
window:  383
Percentage:  90.01085183050462 %
window:  384
Percentage:  90.24586710943545 %
window:  385
Percentage:  90.48088238836627 %
window:  386
Percentage:  90.7158976672971 %
window:  387
Percentage:  90.95091294622792 %
window:  388
Percentage:  91.18592822515873 %
window:  389
Percentage:  91.42094350408956 %
window:  390
Percentage:  91.65595878302038 %
window:  391
Percentage:  91.8909740619512 %
window:  392
Percentage:  92.12598934088201 %
window:  393
Percentage:  92.36100461981285 %
window:  394
Percentage:  92.59601989874366 %
window:  395
Percentage:  92.83103517767448 %
window:  396
Percentage:  93.06605045660531 %
window:  397
Percentage:  93.30106573553613 %
window:  398
Percentage:  93.53608101446696 %
window:  399
Percentage:  93.77109629339778 %
window:  400
Percentage:  94.00611157232859 %
window:  401
Percentage:  94.24112685125941 %
window:  402
Percentage:  94.47614213

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [56]:
"""
modes 
0 - time domain only
1 - time + frequency domain
2 - time + frequency + stats
3 - statistical methods only
"""
def featurize_window_opt(df_fw, feature_list, mode, window_size_in_minutes):
    local_dict = OrderedDict()

    
    
    if mode > 0 and mode < 3:
        if df_fw.index.size >= (30*window_size_in_minutes):
            df_fw.double_x = df_fw.double_x.replace({0:1e-08})
            df_fw.double_y = df_fw.double_y.replace({0:1e-08})
            df_fw.double_z = df_fw.double_z.replace({0:1e-08})
            f_x = scipy.interpolate.interp1d(df_fw.timestamp, df_fw.double_x)
            f_y = scipy.interpolate.interp1d(df_fw.timestamp, df_fw.double_y)
            f_z = scipy.interpolate.interp1d(df_fw.timestamp, df_fw.double_z)
            r = (np.sqrt(df_fw.double_x**2 + df_fw.double_y**2 + df_fw.double_z**2)).replace({0:1e-08})
            f_r = scipy.interpolate.interp1d(df_fw.timestamp, r)
            xnew = []
            step = (df_fw.timestamp.iloc[-1] - df_fw.timestamp.iloc[0]) /df_fw.index.size
            for ti in range(df_fw.timestamp.iloc[0], df_fw.timestamp.iloc[-1], int(step)):
                xnew.append(ti)
            
            f_fs = window_size_in_minutes * 60 / df_fw.index.size
            L = 512 # change it to 512
            local_dict.update({'skip_fft':False, 'fx': f_x(xnew), 'fy': f_y(xnew), 'fz': f_z(xnew), 'fr': f_r(xnew), 'fs': f_fs, 'L': L})
        else:
            local_dict.update({'skip_fft':True})
        if df_fw.index.size == 0:
            local_dict['skip_td'] = True
        else:
            local_dict['skip_td'] = False
            
    if mode == 0:
        local_dict['skip_fft'] = True
        if df_fw.index.size == 0:
            local_dict['skip_td'] = True
        else:
            local_dict['skip_td'] = False
    if mode == 3:
        local_dict['skip_fft'] = True
        local_dict['skip_td'] = True

        
    feat_dict = {}
    
    #window information:
    
    feat_dict.update({'start_timestamp':df_fw.timestamp[0]})
    feat_dict.update({'end_timestamp':df_fw.timestamp[0] + 6*10**3})
    feat_dict.update({'sample_count':df_fw.index.size})

    

    for feature in feature_list:
        if feature == 'int_desc':
            if not local_dict['skip_td']:
                int_desc = np.sqrt((df_fw.double_x ** 2).describe() + (df_fw.double_y **2).describe() + (df_fw.double_z ** 2).describe())
                feat_dict.update({'int_mean': int_desc[1], 'int_std': int_desc[2], 
                                  'int_min': int_desc[3],'int_25': int_desc[4], 'int_50': int_desc[5],'int_75': int_desc[6]})
            else:
                feat_dict.update({'int_mean': np.nan, 'int_std': np.nan, 
                                  'int_min': np.nan,'int_25': np.nan, 'int_50': np.nan,'int_75': np.nan})
        elif feature == 'int_rms':
            if not local_dict['skip_td']:
                int_rms = np.sqrt((df_fw.double_x**2).sum() + (df_fw.double_y**2).sum() + (df_fw.double_z**2).sum()) / np.sqrt(df_fw.index.size)
                feat_dict.update({'int_rms':int_rms})
            else:
                feat_dict.update({'int_rms': np.nan})
        elif feature == 'mag_desc':
            if not local_dict['skip_td']:
                mag_desc = np.sqrt(df_fw.double_x**2 + df_fw.double_y**2 + df_fw.double_z**2).describe()
                feat_dict.update({'mag_mean': mag_desc[1], 'mag_std': mag_desc[2], 'mag_min': mag_desc[3],
                                  'mag_25': mag_desc[4], 'mag_50': mag_desc[5],'mag_75': mag_desc[6]})
            else:
                feat_dict.update({'mag_mean': np.nan, 'mag_std': np.nan, 'mag_min': np.nan,
                  'mag_25': np.nan, 'mag_50': np.nan,'mag_75': np.nan})
        elif feature == 'pear_coef':
            if not local_dict['skip_td']:
                cov_matrix =  np.cov(np.stack((df_fw.double_x,df_fw.double_y, df_fw.double_z), axis=0))
                pear_coef_xy = cov_matrix[0,1] / (df_fw.double_x.std() * df_fw.double_y.std())
                pear_coef_yz = cov_matrix[1,2] / (df_fw.double_y.std() * df_fw.double_z.std())
                pear_coef_xz = cov_matrix[0,2] / (df_fw.double_x.std() * df_fw.double_z.std())
                feat_dict.update({'pear_coef_xy':pear_coef_xy, 'pear_coef_yz':pear_coef_yz,'pear_coef_xz':pear_coef_xz })
            else:
                feat_dict.update({'pear_coef_xy':np.nan, 'pear_coef_yz':np.nan,'pear_coef_xz':np.nan})
        elif feature == 'sma':
            if not local_dict['skip_td']:
                sma = (np.abs(df_fw.double_x.to_numpy()).sum() + np.abs(df_fw.double_y.to_numpy()).sum() + np.abs(df_fw.double_z.to_numpy()).sum()) / df_fw.index.size
                feat_dict.update({'sma':sma})
            else:
                feat_dict.update({'sma':np.nan})
        elif feature == 'svm':
            if not local_dict['skip_td']:
                svm = np.sqrt(df_fw.double_x**2 + df_fw.double_y**2 + df_fw.double_z**2).sum() / df_fw.index.size
                feat_dict.update({'svm':svm})
            else:
                feat_dict.update({'svm':np.nan})
        elif feature == 'fft':
            if not local_dict['skip_fft']:
                L = local_dict['L']
                dfx = fftpack.fft(local_dict['fx'], 512)
                dfy = fftpack.fft(local_dict['fy'], 512)
                dfz = fftpack.fft(local_dict['fz'], 512)
                dfr = fftpack.fft(local_dict['fr'], 512)
                # DC component
                # Remove the L part!
                feat_dict.update({'fdc_x': np.mean(np.real(dfx)), 'fdc_y': np.mean(np.real(dfy)),
                                  'fdc_z':  np.mean(np.real(dfz)), 'fdc_r':  np.mean(np.real(dfr))})
                # Energy
                feat_dict.update({'feng_x': (np.sum(np.real(dfx)**2 + np.imag(dfx)**2)) / L, 'feng_y': (np.sum(np.real(dfy)**2 + np.imag(dfy)**2)) / L,
                                  'feng_z':  (np.sum(np.real(dfz)**2 + np.imag(dfz)**2)) / L, 'feng_r':  (np.sum(np.real(dfr)**2 + np.imag(dfr)**2)) / L})
                # Entropy
                ck_x = np.sqrt(np.real(dfx)**2  + np.imag(dfx)**2)
                cj_x = ck_x / np.sum(ck_x)
                e_x = np.sum(cj_x * np.log(cj_x))
                
                ck_y = np.sqrt(np.real(dfy)**2  + np.imag(dfy)**2)
                cj_y = ck_y / np.sum(ck_y)
                e_y = np.sum(cj_y * np.log(cj_y))
                
                ck_z = np.sqrt(np.real(dfz)**2  + np.imag(dfz)**2)
                cj_z = ck_z / np.sum(ck_z)
                e_z = np.sum(cj_z * np.log(cj_z))
                
                ck_r = np.sqrt(np.real(dfr)**2  + np.imag(dfr)**2)
                cj_r = ck_r / np.sum(ck_r)
                e_r = np.sum(cj_r * np.log(cj_r))
                
                feat_dict.update({'fent_x': e_x, 'fent_y':  e_y,'fent_z':  e_z, 'fent_r': e_r})
                
                # Correlation
                # Fix the length, should be FFT wndow size 512
            
                fcorr_xy = np.dot(np.real(dfx) / L, np.real(dfy) / L)
                fcorr_xz = np.dot(np.real(dfx) / L, np.real(dfz) / L)
                fcorr_yz = np.dot(np.real(dfy) / L, np.real(dfz) / L)
                
                feat_dict.update({'fcorr_xy': fcorr_xy,'fcorr_xz':  fcorr_xz, 'fcorr_yz': fcorr_yz})
                
            else:
                feat_dict.update({'fdc_x': np.nan, 'fdc_y':  np.nan,'fdc_z':  np.nan, 'fdc_r': np.nan})
                feat_dict.update({'feng_x':  np.nan, 'feng_y':  np.nan, 'feng_z':   np.nan, 'feng_r':   np.nan})
                feat_dict.update({'fent_x': np.nan, 'fent_y':  np.nan,'fent_z':  np.nan, 'fent_r': np.nan})
                feat_dict.update({'fcorr_xy': np.nan,'fcorr_xz':  np.nan, 'fcorr_yz': np.nan})
        elif feature == 'psd':
            if not local_dict['skip_fft']:
                fs = local_dict['fs']
                psd_window = signal.get_window('boxcar', len(local_dict['fx'])) # do not pass this window
                freqs_x, pxx_denx = signal.periodogram(local_dict['fx'], window=psd_window, fs=fs)
                freqs_y, pxx_deny = signal.periodogram(local_dict['fy'], window=psd_window, fs=fs)
                freqs_z, pxx_denz = signal.periodogram(local_dict['fz'], window=psd_window, fs=fs)
                freqs_r, pxx_denr = signal.periodogram(local_dict['fr'], window=psd_window, fs=fs)
                feat_dict.update({'psd_mean_x': np.mean(pxx_denx), 'psd_mean_y': np.mean(pxx_deny),
                                  'psd_mean_z': np.mean(pxx_denz), 'psd_mean_r': np.mean(pxx_denr)})
                
                feat_dict.update({'psd_max_x': np.max(pxx_denx), 
                                  'psd_max_y': np.max(pxx_deny),
                                  'psd_max_z': np.max(pxx_denz), 
                                  'psd_max_r': np.max(pxx_denr)})
                
                
                freqs_05_3_x = np.argwhere((freqs_x >= 0.5) & (freqs_x <= 3))
                freqs_05_3_y = np.argwhere((freqs_y >= 0.5) & (freqs_y <= 3))
                freqs_05_3_z = np.argwhere((freqs_z >= 0.5) & (freqs_z <= 3))
                freqs_05_3_r = np.argwhere((freqs_r >= 0.5) & (freqs_r <= 3))
                
                
                # max b/w 0.3 - 3Hz 
                # 0.5 - 3 Hz if missing, maybe not 0.0
                feat_dict.update({'psd_max_x_05_3': np.max(pxx_denx[freqs_05_3_x]) if freqs_05_3_x.any() else 0.0, 
                  'psd_max_y_05_3': np.max(pxx_deny[freqs_05_3_y]) if freqs_05_3_y.any() else 0.0,
                  'psd_max_z_05_3': np.max(pxx_denz[freqs_05_3_z]) if freqs_05_3_z.any() else 0.0,
                  'psd_max_r_05_3': np.max(pxx_denr[freqs_05_3_r]) if freqs_05_3_r.any() else 0.0})
            else:
                feat_dict.update({'psd_mean_x': np.nan, 'psd_mean_y':np.nan,
                                  'psd_mean_z': np.nan, 'psd_mean_r': np.nan})
                feat_dict.update({'psd_max_x': np.nan, 
                                  'psd_max_y': np.nan,
                                  'psd_max_z': np.nan, 
                                  'psd_max_r': np.nan})
                feat_dict.update({'psd_max_x_05_3': np.nan, 
                  'psd_max_y_05_3': np.nan,
                  'psd_max_z_05_3': np.nan,
                  'psd_max_r_05_3': np.nan})
        elif feature == 'lmbs':
            if not local_dict['skip_td']:
                lmb_f_05_3 = np.linspace(0.5, 3, 10000)
                lmb_psd_x = signal.lombscargle(df_fw.timestamp, df_fw.double_x, lmb_f_05_3, normalize=False)
                lmb_psd_y = signal.lombscargle(df_fw.timestamp, df_fw.double_y, lmb_f_05_3, normalize=False)
                lmb_psd_z = signal.lombscargle(df_fw.timestamp, df_fw.double_z, lmb_f_05_3, normalize=False)
                
                feat_dict.update({'lmb_psd_max_x_05_3': np.max(lmb_psd_x) if lmb_psd_x.any() else 0.0, 
                  'lmb_psd_max_y_05_3': np.max(lmb_psd_y) if lmb_psd_y.any() else 0.0,
                  'lmb_psd_max_z_05_3': np.max(lmb_psd_z) if lmb_psd_z.any() else 0.0})
            else:
                feat_dict.update({'lmb_psd_max_x_05_3': np.nan, 
                  'lmb_psd_max_y_05_3': np.nan,
                  'lmb_psd_max_z_05_3': np.nan})
                
                
    return pd.Series(feat_dict)

In [22]:
df_acc_sample['time'] = pd.to_datetime(df_acc_sample.timestamp, unit='ms')

df_acc_sample['window'] = ((df_acc_sample.time - df_acc_sample.time[0]).dt.total_seconds()/ (60)).sort_values().round(0).astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [24]:
df_acc_sample_feature_windows = df_acc_sample.groupby(['window'])[['double_x', 'double_y', 'double_z', 'timestamp']].apply(featurize_window_opt, acc_features, 2, 1)

In [47]:
df_acc['time'] = pd.to_datetime(df_acc.timestamp, unit='ms')

df_acc['window'] = ((df_acc.time - df_acc.time[0]).dt.total_seconds()/ (60)).sort_values().round(0).astype(int)

In [49]:
df_acc_feature_windows = df_acc.groupby(['window'])[['double_x', 'double_y', 'double_z', 'timestamp']].apply(featurize_window_opt, acc_features, 2, 1)

KeyboardInterrupt: 

In [31]:
df_acc_feature_windows

Unnamed: 0_level_0,start_timestamp,end_timestamp,sample_count,int_mean,int_std,int_min,int_25,int_50,int_75,int_rms,...,psd_mean_z,psd_mean_r,psd_max_x,psd_max_y,psd_max_z,psd_max_r,psd_max_x_05_3,psd_max_y_05_3,psd_max_z_05_3,psd_max_r_05_3
window,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.550607e+12,1.550607e+12,152.0,1.003335,0.132639,0.983094,1.001129,1.002360,1.003878,1.003335,...,0.000189,0.000191,0.000159,0.000156,0.000947,0.000954,0.0,0.0,0.0,0.0
1,1.550607e+12,1.550607e+12,303.0,1.003545,0.176204,0.935165,1.001395,1.002838,1.004179,1.003545,...,0.001650,0.001674,0.000998,0.001043,0.006499,0.006603,0.0,0.0,0.0,0.0
2,1.550607e+12,1.550607e+12,302.0,1.003043,0.135142,0.943832,1.001177,1.002456,1.004111,1.003043,...,0.000398,0.000418,0.001649,0.006405,0.003400,0.003648,0.0,0.0,0.0,0.0
3,1.550607e+12,1.550607e+12,302.0,1.003239,0.134469,0.950378,1.001843,1.002931,1.004299,1.003239,...,0.000658,0.000660,0.000593,0.000700,0.002759,0.002784,0.0,0.0,0.0,0.0
4,1.550607e+12,1.550607e+12,303.0,1.003446,0.410931,0.427597,1.001094,1.002709,1.004004,1.003446,...,0.025452,0.019282,1.872125,0.043718,0.215597,0.053157,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66488,1.554596e+12,1.554596e+12,302.0,1.003276,0.057207,0.995834,1.002496,1.003161,1.003890,1.003276,...,0.000017,0.000017,0.000368,0.002654,0.000075,0.000073,0.0,0.0,0.0,0.0
66489,1.554596e+12,1.554596e+12,302.0,1.003061,0.072568,0.987869,1.002096,1.003025,1.003848,1.003061,...,0.000035,0.000035,0.001108,0.001981,0.000197,0.000195,0.0,0.0,0.0,0.0
66490,1.554596e+12,1.554596e+12,302.0,1.002629,0.090097,0.964783,1.002127,1.003066,1.004017,1.002629,...,0.000134,0.000135,0.001252,0.003741,0.000870,0.000865,0.0,0.0,0.0,0.0
66491,1.554596e+12,1.554596e+12,302.0,1.003348,0.067722,0.993469,1.002374,1.003222,1.004099,1.003348,...,0.000037,0.000037,0.000589,0.001772,0.000176,0.000180,0.0,0.0,0.0,0.0


In [40]:
len(df_acc.window.unique())

37508

In [43]:
df_acc_feature_windows.dropna()

Unnamed: 0_level_0,start_timestamp,end_timestamp,sample_count,int_mean,int_std,int_min,int_25,int_50,int_75,int_rms,...,psd_mean_z,psd_mean_r,psd_max_x,psd_max_y,psd_max_z,psd_max_r,psd_max_x_05_3,psd_max_y_05_3,psd_max_z_05_3,psd_max_r_05_3
window,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1.550607e+12,1.550607e+12,152.0,1.003335,0.132639,0.983094,1.001129,1.002360,1.003878,1.003335,...,0.000189,0.000191,0.000159,0.000156,0.000947,0.000954,0.0,0.0,0.0,0.0
1,1.550607e+12,1.550607e+12,303.0,1.003545,0.176204,0.935165,1.001395,1.002838,1.004179,1.003545,...,0.001650,0.001674,0.000998,0.001043,0.006499,0.006603,0.0,0.0,0.0,0.0
2,1.550607e+12,1.550607e+12,302.0,1.003043,0.135142,0.943832,1.001177,1.002456,1.004111,1.003043,...,0.000398,0.000418,0.001649,0.006405,0.003400,0.003648,0.0,0.0,0.0,0.0
3,1.550607e+12,1.550607e+12,302.0,1.003239,0.134469,0.950378,1.001843,1.002931,1.004299,1.003239,...,0.000658,0.000660,0.000593,0.000700,0.002759,0.002784,0.0,0.0,0.0,0.0
4,1.550607e+12,1.550607e+12,303.0,1.003446,0.410931,0.427597,1.001094,1.002709,1.004004,1.003446,...,0.025452,0.019282,1.872125,0.043718,0.215597,0.053157,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66488,1.554596e+12,1.554596e+12,302.0,1.003276,0.057207,0.995834,1.002496,1.003161,1.003890,1.003276,...,0.000017,0.000017,0.000368,0.002654,0.000075,0.000073,0.0,0.0,0.0,0.0
66489,1.554596e+12,1.554596e+12,302.0,1.003061,0.072568,0.987869,1.002096,1.003025,1.003848,1.003061,...,0.000035,0.000035,0.001108,0.001981,0.000197,0.000195,0.0,0.0,0.0,0.0
66490,1.554596e+12,1.554596e+12,302.0,1.002629,0.090097,0.964783,1.002127,1.003066,1.004017,1.002629,...,0.000134,0.000135,0.001252,0.003741,0.000870,0.000865,0.0,0.0,0.0,0.0
66491,1.554596e+12,1.554596e+12,302.0,1.003348,0.067722,0.993469,1.002374,1.003222,1.004099,1.003348,...,0.000037,0.000037,0.000589,0.001772,0.000176,0.000180,0.0,0.0,0.0,0.0
