In [1]:
from pathlib import Path
import pandas as pd
import os
import numpy as np
import time
import datetime
import h5py

%load_ext autoreload
%autoreload 2

In [2]:
root_dir = Path.cwd().parent
print(root_dir)
path_data_raw_folder = Path(root_dir / 'data/raw/airbus/' )
print(path_data_raw_folder)

c:\_Python\PyPHM
c:\_Python\PyPHM\data\raw\airbus


**Training Data Description**

The training dataset is composed of 1677 one-minute-sequences @1024Hz of accelerometer data measured on test helicopters at various locations, in various angles (X, Y, Z), on different flights. All data has been multiplied by a factor so that absolute values are meaningless, but no other normalization procedure was carried out. All sequences are considered as normal and should be used to learn normal behaviour of accelerometer data.

In [32]:
# load dftrain.h5 with pandas
df_train = pd.read_hdf(path_data_raw_folder / 'dftrain.h5', 'dftrain')

# add y column of all zeros (indicating no anomaly)
df_train['y'] = 0

df_train.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,61431,61432,61433,61434,61435,61436,61437,61438,61439,y
0,0.041259,0.041259,0.032573,0.023887,0.029315,0.041259,0.045602,0.038001,0.030401,0.032573,...,-0.0076,-0.003257,0.065145,0.047773,-0.009772,0.031487,0.096632,0.077089,0.040173,0
1,-0.211722,-0.264924,-0.274696,-0.236694,-0.156349,-0.059716,0.005429,0.046687,0.153091,0.28121,...,-0.004343,-0.241037,-0.636252,-0.953292,-0.980436,-0.846888,-0.838202,-0.880546,-0.739398,0
2,0.214105,0.15493,0.13664,0.013987,-0.038733,-0.015063,-0.111894,-0.104363,0.04734,-0.054871,...,0.37549,0.699337,0.965085,1.086662,1.132926,1.279249,1.296464,0.937112,0.451879,0
3,-0.154837,-0.127768,-0.217638,-0.28477,-0.299929,-0.270694,-0.07796,0.092036,0.076877,0.154837,...,-0.171079,-0.036814,-0.024904,0.0314,0.140761,-0.011911,-0.173244,-0.063884,0.081208,0
4,-1.02278,-0.916376,-0.676425,-0.461445,-0.330069,-0.12269,0.178064,0.489675,0.799115,0.931577,...,1.134613,0.83603,0.479903,0.109661,-0.285553,-0.628651,-0.916376,-1.010837,-0.804544,0


In [19]:
# the frequency of data sampling is 1024 Hz
freq = 1024

# each signal is 1 minute in length
meas_time = 60

print(freq * meas_time)
df_train.shape

61440


(1677, 61440)

In [28]:
# load dfvalid.h5 with pandas
df_val = pd.read_hdf(path_data_raw_folder / 'dfvalid.h5', 'dfvalid')
print(df_val.shape)
df_val.head()

(594, 61440)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,61430,61431,61432,61433,61434,61435,61436,61437,61438,61439
0,0.07065,0.085524,0.076847,0.085524,0.120229,0.140061,0.107834,0.04834,0.014874,0.030987,...,0.018592,-0.017353,-0.049579,-0.061974,-0.022311,0.052058,0.091721,0.075608,0.045861,0.027268
1,-0.300685,-0.882656,-1.733789,-2.181179,-1.942328,-1.71924,-1.981126,-2.323034,-2.325459,-2.349708,...,0.454665,-0.782024,-1.725302,-1.829572,-1.170004,-0.278861,-0.152767,-1.035424,-2.312123,-3.199628
2,-0.056398,-0.152028,-0.05885,0.140994,0.170419,0.15448,0.270954,0.359228,0.334708,0.34942,...,0.023295,-0.019617,-0.062528,-0.094405,-0.105439,-0.12383,-0.166741,-0.159385,-0.106665,-0.131186
3,0.030618,0.020412,0.058747,0.124712,0.155828,0.161553,0.167777,0.170515,0.165038,0.141141,...,-0.073682,-0.052524,-0.023897,-0.00921,0.016429,0.058,0.097828,0.114755,0.09036,0.054764
4,-1.071597,-6.91254,-13.677612,-16.463519,-15.790084,-11.65362,-7.668311,-9.942997,-14.321553,-13.704648,...,-5.227723,-6.107613,-6.577051,-6.123589,-5.324806,-4.965969,-4.464579,-3.442138,-3.794831,-6.13342


In [29]:
# load the dfvalid_groundtruth.csv as dataframe
df_val_labels = pd.read_csv(path_data_raw_folder / 'dfvalid_groundtruth.csv', dtype={'seqID': int, 'anomaly': int}, )
df_val_labels.head()

Unnamed: 0,seqID,anomaly
0,0,0
1,1,1
2,2,0
3,3,0
4,4,1


In [30]:
# append the anomaly label to the df_val dataframe
df_val = df_val.merge(df_val_labels, left_index=True, right_on='seqID')

# drop the seqID column and rename the anomaly column to y
df_val = df_val.drop('seqID', axis=1).rename(columns={'anomaly': 'y'})

In [31]:
df_val.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,61431,61432,61433,61434,61435,61436,61437,61438,61439,y
0,0.07065,0.085524,0.076847,0.085524,0.120229,0.140061,0.107834,0.04834,0.014874,0.030987,...,-0.017353,-0.049579,-0.061974,-0.022311,0.052058,0.091721,0.075608,0.045861,0.027268,0
1,-0.300685,-0.882656,-1.733789,-2.181179,-1.942328,-1.71924,-1.981126,-2.323034,-2.325459,-2.349708,...,-0.782024,-1.725302,-1.829572,-1.170004,-0.278861,-0.152767,-1.035424,-2.312123,-3.199628,1
2,-0.056398,-0.152028,-0.05885,0.140994,0.170419,0.15448,0.270954,0.359228,0.334708,0.34942,...,-0.019617,-0.062528,-0.094405,-0.105439,-0.12383,-0.166741,-0.159385,-0.106665,-0.131186,0
3,0.030618,0.020412,0.058747,0.124712,0.155828,0.161553,0.167777,0.170515,0.165038,0.141141,...,-0.052524,-0.023897,-0.00921,0.016429,0.058,0.097828,0.114755,0.09036,0.054764,0
4,-1.071597,-6.91254,-13.677612,-16.463519,-15.790084,-11.65362,-7.668311,-9.942997,-14.321553,-13.704648,...,-6.107613,-6.577051,-6.123589,-5.324806,-4.965969,-4.464579,-3.442138,-3.794831,-6.13342,1


In [10]:
60*1024

61440