In [1]:
import pandas as pd
import glob
import numpy as np
from tsfresh import extract_features, extract_relevant_features
%matplotlib inline

import os
cdir = os.getcwd()
print(cdir)
os.chdir(cdir + '/train/train')
print(os.getcwd())

N:\activity-recognition-abc
N:\activity-recognition-abc\train\train


In [2]:
# Multi-head CNN–RNN for multi-time series anomaly detection: An industrial case study

In [3]:
def process(sensordata, timestamps):
    # process
    
    return sensordata

In [4]:
import tensorflow as tf
import random
import numpy as np
random.seed(1)
np.random.seed(1)
tf.random.set_seed(1997)

In [5]:
# there are 3 subjects
subject1 = {}
subject2 = {}
subject3 = {}

# we will load every single right arm data, separate based on subject id, do feature extraction, run t-SNE

all_sensors = ['right_arm', 'right_wrist', 'left_hip', 'left_wrist'] #, 'mocap']

data_folder = [f'{sensor}/*.csv' for sensor in all_sensors]

print(data_folder)

files = glob.glob(data_folder[0])

# processing will be done as numpy array

subject1['data'] = []
subject2['data'] = []
subject3['data'] = []
 
# data format : data -> id, process(timeseries, timestamps) : right_arm as numpy array (missing data will be imputed)


# id is generated by concatenating INT(subject+trial)

for f in files:
    #print(f)
    c_sub = {} # current subject
    if f.split(os.sep)[1].split('_')[0] == 'subject1':
        
        c_sub['id'] = int(f.split(os.sep)[1].split('_')[0][-1] + f.split(os.sep)[1].split('_')[2].split('.')[0])
        #print(ra['id'])
        
        for sensor in all_sensors:
            #print(sensor)
            ra = pd.read_csv(sensor + os.sep + f.split(os.sep)[1])
            #print(ra)
            ra.sort_values(by=['timestamp'], inplace = True)
            c_sub[sensor] = {}
            if sensor == 'mocap':
                for axis in mocap_axis:
                    c_sub[sensor][axis] = process(ra[axis], ra['timestamp'])
            else:
                for axis in ['X', 'Y', 'Z']:
                    c_sub[sensor][axis] = process(ra[axis], ra['timestamp'])
                
        subject1['data'].append(c_sub)

            
    elif f.split(os.sep)[1].split('_')[0] == 'subject2':
        
        c_sub['id'] = int(f.split(os.sep)[1].split('_')[0][-1] + f.split(os.sep)[1].split('_')[2].split('.')[0])
        #print(ra['id'])
        
        for sensor in all_sensors:
            ra = pd.read_csv(sensor + os.sep + f.split(os.sep)[1])
            #print(ra)
            ra.sort_values(by=['timestamp'], inplace = True)
            c_sub[sensor] = {}
            if sensor == 'mocap':
                for axis in mocap_axis:
                    c_sub[sensor][axis] = process(ra[axis], ra['timestamp'])
            else:
                for axis in ['X', 'Y', 'Z']:
                    c_sub[sensor][axis] = process(ra[axis], ra['timestamp'])
                
        subject2['data'].append(c_sub)
            
    elif f.split(os.sep)[1].split('_')[0] == 'subject3':
        
        c_sub['id'] = int(f.split(os.sep)[1].split('_')[0][-1] + f.split(os.sep)[1].split('_')[2].split('.')[0])
        #print(ra['id'])
        
        for sensor in all_sensors:
            ra = pd.read_csv(sensor + os.sep + f.split(os.sep)[1])
            #print(ra)
            ra.sort_values(by=['timestamp'], inplace = True)
            c_sub[sensor] = {}
            if sensor == 'mocap':
                for axis in mocap_axis:
                    try:
                        c_sub[sensor][axis] = process(ra[axis], ra['timestamp'])
                    except:
                        c_sub[sensor][axis] = [0]
            else:
                for axis in ['X', 'Y', 'Z']:
                    c_sub[sensor][axis] = process(ra[axis], ra['timestamp'])
        
        subject3['data'].append(c_sub)
    else:
        print('either new subject or a bug')
        
        
labels = pd.read_csv("labels.txt", sep=' ', header=None)
print(labels.head())
labels = labels[0].str.split(",", n=2, expand=True)
labels.columns = ['file_id', 'macro', 'micro'] #give names to the columns
labels.index = labels['file_id'] #use the file id as index to make it searchable by file_id
print(labels.head())

# label generation
subject1['label_mac'] = {}
subject2['label_mac'] = {}
subject3['label_mac'] = {}

subject1['label_mic'] = {}
subject2['label_mic'] = {}
subject3['label_mic'] = {}

for i in range(len(labels)):
    #print(labels.iloc[i]['file_id'])
    #print(labels.iloc[i]['file_id'].split('_')[0][-1])
    tid = int(labels.iloc[i]['file_id'].split('_')[0][-1] + labels.iloc[i]['file_id'].split('_')[-1])
    #print(tid)
    label = labels.iloc[i]['macro']
    label_mic = labels.iloc[i]['micro'].split(',')[:-1]
    #print(label)
    if labels.iloc[i]['file_id'].split('_')[0][-1] == '1':
        subject1['label_mac'][tid] = label
        subject1['label_mic'][tid] = label_mic
    elif labels.iloc[i]['file_id'].split('_')[0][-1] == '2':
        subject2['label_mac'][tid] = label
        subject2['label_mic'][tid] = label_mic
    elif labels.iloc[i]['file_id'].split('_')[0][-1] == '3':
        subject3['label_mac'][tid] = label
        subject3['label_mic'][tid] = label_mic
    else:
        print('some bug')
        
        
# re-formatting dataset for training

X = []
y = []
y_ml = []  # multi-label

for i in range(len(subject1['data'])):
    tid = subject1['data'][i]['id']
    y.append(subject1['label_mac'][tid])
    y_ml.append(subject1['label_mic'][tid])
    # X shape -> [ip1, ip2, ip3, ip4] ip1 = (80, len, channel) -> 4, 80, len

for sensor in all_sensors:
    cs_data = []
    for i in range(len(subject1['data'])):
        sub_data = []
        if sensor == 'mocap':
            for sig in mocap_axis:
                sub_data.append(np.array(subject1['data'][i][sensor][sig]))
        else:
            for sig in ['X', 'Y', 'Z']:
                sub_data.append(np.array(subject1['data'][i][sensor][sig]))
        sub_data = np.array(sub_data)
        # print(sub_data.shape)
        sub_data = np.swapaxes(sub_data, 0, 1)
        cs_data.append(sub_data)
    X.append(cs_data)
    
    
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import numpy as np

label_encoder = LabelEncoder()
vec = label_encoder.fit_transform(y)

y_ohe = to_categorical(vec,len(set(vec)))

from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
y_ml_ohe = mlb.fit_transform(y_ml)

# validatin on subject2, 3

# re-formatting dataset for training

X2 = []
y2 = []
y_ml2 = []

for i in range(len(subject2['data'])):
    tid = subject2['data'][i]['id']
    y2.append(subject2['label_mac'][tid])
    y_ml2.append(subject2['label_mic'][tid])
    # X shape -> [ip1, ip2, ip3, ip4] ip1 = (80, len, channel) -> 4, 80, len

for sensor in all_sensors:
    cs_data = []
    for i in range(len(subject2['data'])):
        sub_data = []
        if sensor == 'mocap':
            for sig in mocap_axis:
                sub_data.append(np.array(subject2['data'][i][sensor][sig]))
        else:
            for sig in ['X', 'Y', 'Z']:
                sub_data.append(np.array(subject2['data'][i][sensor][sig]))
        sub_data = np.array(sub_data)
        # print(sub_data.shape)
        sub_data = np.swapaxes(sub_data, 0, 1)
        cs_data.append(sub_data)
    X2.append(cs_data)
    
label_encoder = LabelEncoder()
vec = label_encoder.fit_transform(y2)

y2_ohe = to_categorical(vec,len(set(vec)))

mlb = MultiLabelBinarizer()
y_ml2_ohe = mlb.fit_transform(y_ml2)

# validatin on subject2, 3

# re-formatting dataset for training

X3 = []
y3 = []
y_ml3 = []

for i in range(len(subject3['data'])):
    tid = subject3['data'][i]['id']
    y3.append(subject3['label_mac'][tid])
    y_ml3.append(subject3['label_mic'][tid])
    # X shape -> [ip1, ip2, ip3, ip4] ip1 = (80, len, channel) -> 4, 80, len

for sensor in all_sensors:
    cs_data = []
    for i in range(len(subject3['data'])):
        sub_data = []
        if sensor == 'mocap':
            for sig in mocap_axis:
                sub_data.append(np.array(subject3['data'][i][sensor][sig]))
        else:
            for sig in ['X', 'Y', 'Z']:
                sub_data.append(np.array(subject3['data'][i][sensor][sig]))
        sub_data = np.array(sub_data)
        # print(sub_data.shape)
        sub_data = np.swapaxes(sub_data, 0, 1)
        cs_data.append(sub_data)
    X3.append(cs_data)
    

label_encoder = LabelEncoder()
vec = label_encoder.fit_transform(y3)

y3_ohe = to_categorical(vec,len(set(vec)))

mlb = MultiLabelBinarizer()
y_ml3_ohe = mlb.fit_transform(y_ml3)

['right_arm/*.csv', 'right_wrist/*.csv', 'left_hip/*.csv', 'left_wrist/*.csv']
                                           0
0           subject2_file_457,sandwich,Take,
1      subject2_file_679,sandwich,Wash,Take,
2        subject2_file_95,sandwich,Cut,Wash,
3  subject2_file_899,sandwich,other,Cut,Put,
4            subject2_file_368,sandwich,Put,
                             file_id     macro           micro
file_id                                                       
subject2_file_457  subject2_file_457  sandwich           Take,
subject2_file_679  subject2_file_679  sandwich      Wash,Take,
subject2_file_95    subject2_file_95  sandwich       Cut,Wash,
subject2_file_899  subject2_file_899  sandwich  other,Cut,Put,
subject2_file_368  subject2_file_368  sandwich            Put,


In [6]:
label_encoder.classes_

array(['cereal', 'fruitsalad', 'sandwich'], dtype='<U10')

In [7]:
mlb.classes_

array(['Add', 'Cut', 'Mix', 'Open', 'Peel', 'Pour', 'Put', 'Take', 'Wash',
       'other'], dtype=object)

In [8]:
len(X2)

4

In [9]:
3*29

87

In [10]:
# 4, 80, len, 3

In [11]:
import tensorflow as tf
from tensorflow.keras.layers import Add, Activation, Conv1D, Dense, LSTM, Bidirectional, Input, GlobalMaxPooling1D, Concatenate, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model

In [12]:
all_sensors

['right_arm', 'right_wrist', 'left_hip', 'left_wrist']

In [13]:
from tensorflow.keras.layers import LeakyReLU

In [14]:
# CNN-head
import tensorflow.keras as keras
def create_model():

    n_sensors = 4
    n_filters = [3, 3, 3, 3] # X, Y, Z


    # make the input layers
    ips = []

    n_f = 0
    for sensor in all_sensors:
        ips.append(Input(shape = (None, n_filters[n_f]), name = sensor + '_ipX'))
        n_f += 1

    print(f'Input tensors: {ips}')
    
    # kernel size = 1 == embedding layer

    convs = []
    for ip in ips:
        conv = Conv1D(filters = 32, kernel_size = 1, padding = 'same', activation = None, name = ip.name.split(':')[0] + '_conv')(ip)
        convs.append(conv)
        
    # batch-normalization
    bns = []
    for conv in convs:
        bn = BatchNormalization()(conv)
        bns.append(bn)

    convs2 = []
    for bn in bns:
        conv2 = Conv1D(filters = 16, kernel_size = 3, dilation_rate = 2, 
                       kernel_regularizer = keras.regularizers.l1_l2(l1=0.01, l2=0.002),
                       bias_regularizer = keras.regularizers.l1_l2(l1=0.01, l2=0.002), 
                       padding = 'same', activation = 'relu')(bn)
        convs2.append(conv2)
        
    # batch-normalization
    bns2 = []
    for conv2 in convs2:
        bn2 = BatchNormalization()(conv2)
        bns2.append(bn2)
        
    # residual
    
    res = []
    cnt_i = 0
    for bn2 in bns2:
        rs = bn2 #Add()([bn2, bns[cnt_i]])
        rs = LeakyReLU(alpha=0.3)(rs)
        cnt_i += 1
        res.append(rs)



    # concatenation

    concat = Concatenate(axis=1, name = 'concat')(res)

    concat_conv = Conv1D(filters = 16, kernel_size = 1, padding = 'same', dilation_rate = 2,
                        kernel_regularizer = keras.regularizers.l1_l2(l1=0.01, l2=0.002),
                        bias_regularizer = keras.regularizers.l1_l2(l1=0.01, l2=0.002),
                        activation = 'relu', name = 'concat_conv')(concat)
    
    bn3 = BatchNormalization()(concat_conv)
    con_conv2 = Conv1D(filters = 32, kernel_size = 3, padding = 'same', activation = None, name = 'con_conv2')(bn3)
    bn4 = BatchNormalization()(con_conv2)
    
    #res2 = Add()([bn4, bn3])
    res2 = LeakyReLU(alpha=0.3)(bn4)
    
    gmp = GlobalMaxPooling1D()(res2)

    # simple FC
    final_softmax = Dense(3, activation = 'sigmoid')(gmp)
    simple_conv = Model(ips, final_softmax)
    return simple_conv

In [15]:
simple_conv = create_model()

Input tensors: [<tf.Tensor 'right_arm_ipX:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'right_wrist_ipX:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_hip_ipX:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_wrist_ipX:0' shape=(None, None, 3) dtype=float32>]


In [16]:
simple_conv.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
right_arm_ipX (InputLayer)      [(None, None, 3)]    0                                            
__________________________________________________________________________________________________
right_wrist_ipX (InputLayer)    [(None, None, 3)]    0                                            
__________________________________________________________________________________________________
left_hip_ipX (InputLayer)       [(None, None, 3)]    0                                            
__________________________________________________________________________________________________
left_wrist_ipX (InputLayer)     [(None, None, 3)]    0                                            
______________________________________________________________________________________________

In [17]:
simple_conv.compile(loss='categorical_crossentropy', metrics=['acc', 'mae'], optimizer=Adam(lr=0.001))

In [18]:
X[0][0].shape

(1399, 3)

In [19]:
len(X[0])

80

In [20]:
y

['sandwich',
 'sandwich',
 'cereal',
 'fruitsalad',
 'cereal',
 'sandwich',
 'sandwich',
 'sandwich',
 'sandwich',
 'fruitsalad',
 'sandwich',
 'sandwich',
 'fruitsalad',
 'cereal',
 'cereal',
 'cereal',
 'sandwich',
 'sandwich',
 'sandwich',
 'cereal',
 'cereal',
 'cereal',
 'fruitsalad',
 'sandwich',
 'cereal',
 'sandwich',
 'cereal',
 'cereal',
 'sandwich',
 'cereal',
 'cereal',
 'cereal',
 'sandwich',
 'fruitsalad',
 'fruitsalad',
 'sandwich',
 'fruitsalad',
 'cereal',
 'fruitsalad',
 'fruitsalad',
 'cereal',
 'sandwich',
 'sandwich',
 'sandwich',
 'sandwich',
 'sandwich',
 'cereal',
 'cereal',
 'fruitsalad',
 'cereal',
 'fruitsalad',
 'cereal',
 'fruitsalad',
 'cereal',
 'sandwich',
 'sandwich',
 'sandwich',
 'sandwich',
 'sandwich',
 'sandwich',
 'sandwich',
 'sandwich',
 'fruitsalad',
 'sandwich',
 'cereal',
 'sandwich',
 'fruitsalad',
 'fruitsalad',
 'sandwich',
 'sandwich',
 'sandwich',
 'fruitsalad',
 'sandwich',
 'sandwich',
 'cereal',
 'cereal',
 'cereal',
 'sandwich',
 'sa

In [21]:
len(X[0])

80

In [22]:
np.array([[0, 0, 0]]).shape

(1, 3)

In [21]:
def train(simple_conv, X, y_ohe, EPOCH, train_split=1.0):
    # single zero padding for NULL datas

    for i in range(len(X)):
        for j in range(len(X[0])):
            if len(X) == 5 and i == len(X)-1: # assuming mocap always comes in the last index
                if len(X[i][j]) == 0:
                    X[i][j] = np.zeros((1, 87))
            else:
                if len(X[i][j]) == 0:
                    X[i][j] = np.array([[0., 0., 0.]], dtype = np.float32)

    report_acc = 0.0
    for epoch in range(EPOCH):
        print(f'Training epoch {epoch} ...')
        avg_loss = 0.0
        avg_acc = 0.0
        avg_mae = 0.0

        for sample_i in tqdm(range(int(len(X[0])*train_split))):
            [train_loss, acc, mae]   =  simple_conv.train_on_batch([X[0][sample_i].reshape(1,len(X[0][sample_i]),3), 
                                        X[1][sample_i].reshape(1,len(X[1][sample_i]),3), 
                                        X[2][sample_i].reshape(1,len(X[2][sample_i]),3), 
                                        X[3][sample_i].reshape(1,len(X[3][sample_i]),3)],
                                        #X[4][sample_i].reshape(1,len(X[4][sample_i]),3*29)], 
                                        y_ohe[sample_i].reshape(1,3))
            avg_loss += train_loss/(len(X[0])*train_split)
            avg_acc += acc/(len(X[0])*train_split)
            avg_mae += mae/(len(X[0])*train_split)
        report_acc = avg_acc
        print(f'acc: {avg_acc} mae: {avg_mae} loss: {avg_loss}')
        #print('Running validation ...')
        avg_loss = 0.0
        avg_acc = 0.0
        avg_mae = 0.0
        for sample_i in tqdm(range(int(len(X[0])*train_split), len(X[0]))):
            [test_loss, acc, mae]   =  simple_conv.test_on_batch([X[0][sample_i].reshape(1,len(X[0][sample_i]),3), 
                                        X[1][sample_i].reshape(1,len(X[1][sample_i]),3), 
                                        X[2][sample_i].reshape(1,len(X[2][sample_i]),3), 
                                        X[3][sample_i].reshape(1,len(X[3][sample_i]),3)],
                                        #X[4][sample_i].reshape(1,len(X[4][sample_i]),3*29)], 
                                        y_ohe[sample_i].reshape(1,3))
            avg_loss += test_loss/(len(X[0])*(1-train_split))
            avg_acc += acc/(len(X[0])*(1-train_split))
            avg_mae += mae/(len(X[0])*(1-train_split))
            
        #print(f'test acc: {avg_acc} mae: {avg_mae} loss: {avg_loss}')
        
    return simple_conv, report_acc

In [22]:
def test(simple_conv, X2, y2_ohe):
    for i in range(len(X2)):
        for j in range(len(X2[0])):
            if len(X2) == 5 and i == len(X2)-1: # assuming mocap always comes in the last index
                if len(X2[i][j]) == 0:
                    X2[i][j] = np.zeros((1, 87))
            else:
                if len(X2[i][j]) == 0:
                    X2[i][j] = np.array([[0., 0., 0.]], dtype=np.float32)

    print('Running test on subject ...')
    avg_loss = 0.0
    avg_acc = 0.0
    avg_mae = 0.0
    for sample_i in tqdm(range(int(len(X2[0])))):
        [test_loss, acc, mae]   =  simple_conv.test_on_batch([X2[0][sample_i].reshape(1,len(X2[0][sample_i]),3), 
                                    X2[1][sample_i].reshape(1,len(X2[1][sample_i]),3), 
                                    X2[2][sample_i].reshape(1,len(X2[2][sample_i]),3), 
                                    X2[3][sample_i].reshape(1,len(X2[3][sample_i]),3)],
                                    #X2[4][sample_i].reshape(1,len(X2[4][sample_i]),3*29)], 
                                    y2_ohe[sample_i].reshape(1,3))
        avg_loss += test_loss/(len(X2[0]))
        avg_acc += acc/(len(X2[0]))
        avg_mae += mae/(len(X2[0]))
    print(f'test acc: {avg_acc} mae: {avg_mae} loss: {avg_loss}')
    return avg_acc

In [23]:
from tensorflow.keras.activations import *

In [24]:
# train on subject1
# validate on subject2, 3

from tqdm import tqdm
from tensorflow.keras.optimizers import RMSprop

report = []

for xytst in [(X, y_ohe, 'X1'), (X2, y2_ohe, 'X2'), (X3, y3_ohe, 'X3')]:
    
    simple_conv = create_model()
    
    xytr = [a for a in [(X, y_ohe, 'X1'), (X2, y2_ohe, 'X2'), (X3, y3_ohe, 'X3')] if a!=xytst]

    EPOCH = 3
    train_split = 1.0
    lr = 0.001

    for e in range(EPOCH):
        simple_conv.compile(loss='mse', metrics=['acc', 'mae'], optimizer=Adam(learning_rate=lr))
        simple_conv, acc = train(simple_conv, xytr[0][0], xytr[0][1], 1, train_split)
        lr /= 1.5
        simple_conv.compile(loss='mse', metrics=['acc', 'mae'], optimizer=Adam(learning_rate=lr))
        simple_conv, acc2 = train(simple_conv, xytr[1][0], xytr[1][1], 1, train_split)
        lr /= 2.0
    

    print('-------------------------------------------------')
    print('Test phase')
    print(f'Testing {xytst[2]}')

    t_acc = test(simple_conv, xytst[0], xytst[1])
    report.append(f'TR: {[a[2] for a in xytr]}   TS: {xytst[2]} acc1: {acc} acc2: {acc2} test_acc: {t_acc}')


print('--------------------report----------------------')
for ta in report:
    print(ta)

Input tensors: [<tf.Tensor 'right_arm_ipX_1:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'right_wrist_ipX_1:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_hip_ipX_1:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_wrist_ipX_1:0' shape=(None, None, 3) dtype=float32>]


  1%|▊                                                                                       | 1/105 [00:00<00:18,  5.70it/s]

Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 105/105 [00:08<00:00, 12.24it/s]
0it [00:00, ?it/s]
  0%|                                                                                                | 0/103 [00:00<?, ?it/s]

acc: 0.5841269952910287 mae: 0.4165441257701744 loss: 7.960649926321845
Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 103/103 [00:08<00:00, 12.45it/s]
0it [00:00, ?it/s]
  0%|                                                                                                | 0/105 [00:00<?, ?it/s]

acc: 0.7184466123580934 mae: 0.3162275338962925 loss: 7.137739056522407
Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 105/105 [00:08<00:00, 12.18it/s]
0it [00:00, ?it/s]
  0%|                                                                                                | 0/103 [00:00<?, ?it/s]

acc: 0.6888888980661114 mae: 0.3370692828692319 loss: 6.859201980772474
Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 103/103 [00:08<00:00, 12.58it/s]
0it [00:00, ?it/s]
  0%|                                                                                                | 0/105 [00:00<?, ?it/s]

acc: 0.873786413553849 mae: 0.19596958197232267 loss: 6.261079413219563
Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 105/105 [00:08<00:00, 12.35it/s]
0it [00:00, ?it/s]


acc: 0.7777777861981158 mae: 0.2886730817190949 loss: 6.27485939661662
Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 103/103 [00:08<00:00, 12.65it/s]
0it [00:00, ?it/s]
  5%|████▍                                                                                    | 4/80 [00:00<00:02, 37.14it/s]

acc: 0.9352750836066828 mae: 0.1467826740025558 loss: 5.901500970414539
-------------------------------------------------
Test phase
Testing X1
Running test on subject ...


100%|████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:01<00:00, 40.07it/s]


test acc: 0.47916667759418474 mae: 0.5073156227460457 loss: 8.28281902074814
Input tensors: [<tf.Tensor 'right_arm_ipX_2:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'right_wrist_ipX_2:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_hip_ipX_2:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_wrist_ipX_2:0' shape=(None, None, 3) dtype=float32>]


  1%|█                                                                                        | 1/80 [00:00<00:13,  5.83it/s]

Training epoch 0 ...


100%|████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:07<00:00, 11.29it/s]
0it [00:00, ?it/s]
  0%|                                                                                                | 0/103 [00:00<?, ?it/s]

acc: 0.6291666790843006 mae: 0.40242204898968353 loss: 9.108709448575974
Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 103/103 [00:08<00:00, 12.15it/s]
0it [00:00, ?it/s]
  0%|                                                                                                 | 0/80 [00:00<?, ?it/s]

acc: 0.6990291357619093 mae: 0.3307151691705548 loss: 7.21288380576569
Training epoch 0 ...


100%|████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:07<00:00, 11.27it/s]
0it [00:00, ?it/s]
  0%|                                                                                                | 0/103 [00:00<?, ?it/s]

acc: 0.6875000111758701 mae: 0.3541442323417868 loss: 6.878899931907656
Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 103/103 [00:08<00:00, 12.45it/s]
0it [00:00, ?it/s]
  0%|                                                                                                 | 0/80 [00:00<?, ?it/s]

acc: 0.8478964463020991 mae: 0.2168475120151484 loss: 6.270198872945841
Training epoch 0 ...


100%|████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:07<00:00, 11.17it/s]
0it [00:00, ?it/s]
  0%|                                                                                                | 0/103 [00:00<?, ?it/s]

acc: 0.7791666764765968 mae: 0.299077669205144 loss: 6.3053110063076
Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 103/103 [00:08<00:00, 12.78it/s]
0it [00:00, ?it/s]
  4%|███▎                                                                                    | 4/105 [00:00<00:02, 39.71it/s]

acc: 0.8867313968903808 mae: 0.17497054910546092 loss: 5.954891806667292
-------------------------------------------------
Test phase
Testing X2
Running test on subject ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 105/105 [00:02<00:00, 42.05it/s]


test acc: 0.5841269950071971 mae: 0.41364800979915456 loss: 7.041183866773334
Input tensors: [<tf.Tensor 'right_arm_ipX_3:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'right_wrist_ipX_3:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_hip_ipX_3:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_wrist_ipX_3:0' shape=(None, None, 3) dtype=float32>]


  1%|█                                                                                        | 1/80 [00:00<00:13,  6.08it/s]

Training epoch 0 ...


100%|████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:06<00:00, 11.44it/s]
0it [00:00, ?it/s]
  0%|                                                                                                | 0/105 [00:00<?, ?it/s]

acc: 0.6291666779667134 mae: 0.39992236653342866 loss: 7.711719053983687
Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 105/105 [00:08<00:00, 12.56it/s]
0it [00:00, ?it/s]
  0%|                                                                                                 | 0/80 [00:00<?, ?it/s]

acc: 0.5968254072325571 mae: 0.4149003315805681 loss: 7.323923615046913
Training epoch 0 ...


100%|████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:07<00:00, 11.20it/s]
0it [00:00, ?it/s]
  0%|                                                                                                | 0/105 [00:00<?, ?it/s]

acc: 0.7416666764765969 mae: 0.31140857713762676 loss: 6.685868310928345
Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 105/105 [00:08<00:00, 12.02it/s]
0it [00:00, ?it/s]
  0%|                                                                                                 | 0/80 [00:00<?, ?it/s]

acc: 0.7904761984234757 mae: 0.29069217500897754 loss: 6.465133153824581
Training epoch 0 ...


100%|████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:07<00:00, 11.07it/s]
0it [00:00, ?it/s]


acc: 0.8333333402872075 mae: 0.24898188443621616 loss: 6.255499327182772


  1%|▊                                                                                       | 1/105 [00:00<00:17,  6.04it/s]

Training epoch 0 ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 105/105 [00:08<00:00, 12.27it/s]
0it [00:00, ?it/s]
  5%|████▎                                                                                   | 5/103 [00:00<00:02, 41.78it/s]

acc: 0.8730158794493892 mae: 0.24442570486017282 loss: 6.162036795843216
-------------------------------------------------
Test phase
Testing X3
Running test on subject ...


100%|██████████████████████████████████████████████████████████████████████████████████████| 103/103 [00:02<00:00, 42.34it/s]

test acc: 0.4789644129646634 mae: 0.5110544837396411 loss: 7.145417518986082
--------------------report----------------------
TR: ['X2', 'X3']   TS: X1 acc1: 0.7777777861981158 acc2: 0.9352750836066828 test_acc: 0.47916667759418474
TR: ['X1', 'X3']   TS: X2 acc1: 0.7791666764765968 acc2: 0.8867313968903808 test_acc: 0.5841269950071971
TR: ['X1', 'X2']   TS: X3 acc1: 0.8333333402872075 acc2: 0.8730158794493892 test_acc: 0.4789644129646634





#### report summary

sigmoid + binary_crossentropy -> major improvement in accuracy

--------------------report----------------------


acc1: 0.7124999999999995 acc2: 0.4761904761904766 test_acc: 0.5375000000000001
acc1: 0.37500000000000017 acc2: 0.638095238095238 test_acc: 0.6666666666666665
acc1: 0.6249999999999998 acc2: 0.6666666666666665 test_acc: 0.466019417475728

softmax

--------------------report----------------------
TR: ['X2', 'X3']   TS: X1 acc1: 0.695238095238095 acc2: 0.961165048543689 test_acc: 0.3250000000000001
TR: ['X1', 'X3']   TS: X2 acc1: 0.7499999999999993 acc2: 0.9708737864077667 test_acc: 0.3047619047619048
TR: ['X1', 'X2']   TS: X3 acc1: 0.7624999999999993 acc2: 0.8476190476190468 test_acc: 0.262135922330097

In [25]:
len(mlb.classes_)

10

In [26]:
# task 2, micro-activity

# CNN-head

def create_model_task2():
    n_sensors = 4
    n_filters = [3, 3, 3, 3] # X, Y, Z


    # make the input layers
    ips = []

    n_f = 0
    for sensor in all_sensors:
        ips.append(Input(shape = (None, n_filters[n_f]), name = sensor + '_ipX'))
        n_f += 1

    print(f'Input tensors: {ips}')
    
    # kernel size = 1 == embedding layer

    convs = []
    for ip in ips:
        conv = Conv1D(filters = 32, kernel_size = 1, padding = 'same', activation = None, name = ip.name.split(':')[0] + '_conv')(ip)
        convs.append(conv)
        
    # batch-normalization
    bns = []
    for conv in convs:
        bn = BatchNormalization()(conv)
        bns.append(bn)

    convs2 = []
    for bn in bns:
        conv2 = Conv1D(filters = 16, kernel_size = 3, dilation_rate = 2, 
                       kernel_regularizer = keras.regularizers.l1_l2(l1=0.01, l2=0.002),
                       bias_regularizer = keras.regularizers.l1_l2(l1=0.01, l2=0.002), 
                       padding = 'same', activation = 'relu')(bn)
        convs2.append(conv2)
        
    # batch-normalization
    bns2 = []
    for conv2 in convs2:
        bn2 = BatchNormalization()(conv2)
        bns2.append(bn2)
        
    # residual
    
    res = []
    cnt_i = 0
    for bn2 in bns2:
        rs = bn2 #Add()([bn2, bns[cnt_i]])
        rs = LeakyReLU(alpha=0.3)(rs)
        cnt_i += 1
        res.append(rs)



    # concatenation

    concat = Concatenate(axis=1, name = 'concat')(res)

    concat_conv = Conv1D(filters = 16, kernel_size = 1, padding = 'same', dilation_rate = 2,
                        kernel_regularizer = keras.regularizers.l1_l2(l1=0.01, l2=0.002),
                        bias_regularizer = keras.regularizers.l1_l2(l1=0.01, l2=0.002),
                        activation = 'relu', name = 'concat_conv')(concat)
    
    bn3 = BatchNormalization()(concat_conv)
    con_conv2 = Conv1D(filters = 32, kernel_size = 3, padding = 'same', activation = None, name = 'con_conv2')(bn3)
    bn4 = BatchNormalization()(con_conv2)
    
    #res2 = Add()([bn4, bn3])
    res2 = LeakyReLU(alpha=0.3)(bn4)
    
    gmp = GlobalMaxPooling1D()(res2)

    # simple FC
    final_softmax = Dense(10, activation = 'sigmoid')(gmp)
    simple_conv = Model(ips, final_softmax)
    return simple_conv

In [27]:
task2_model = create_model_task2()

Input tensors: [<tf.Tensor 'right_arm_ipX_4:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'right_wrist_ipX_4:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_hip_ipX_4:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_wrist_ipX_4:0' shape=(None, None, 3) dtype=float32>]


In [28]:
task2_model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
right_arm_ipX (InputLayer)      [(None, None, 3)]    0                                            
__________________________________________________________________________________________________
right_wrist_ipX (InputLayer)    [(None, None, 3)]    0                                            
__________________________________________________________________________________________________
left_hip_ipX (InputLayer)       [(None, None, 3)]    0                                            
__________________________________________________________________________________________________
left_wrist_ipX (InputLayer)     [(None, None, 3)]    0                                            
____________________________________________________________________________________________

In [29]:
def train_task2(simple_conv, X, y_ohe, EPOCH, train_split=1.0):
    # single zero padding for NULL datas

    for i in range(len(X)):
        for j in range(len(X[0])):
            if len(X) == 5 and i == len(X)-1: # assuming mocap always comes in the last index
                if len(X[i][j]) == 0:
                    X[i][j] = np.zeros((1, 87))
            else:
                if len(X[i][j]) == 0:
                    X[i][j] = np.array([[0., 0., 0.]], dtype = np.float32)

    report_acc = 0.0
    for epoch in range(EPOCH):
        print(f'Training epoch {epoch} ...')
        avg_loss = 0.0
        avg_acc = 0.0
        avg_mae = 0.0

        for sample_i in tqdm(range(int(len(X[0])*train_split))):
            [train_loss, acc, mae]   =  simple_conv.train_on_batch([X[0][sample_i].reshape(1,len(X[0][sample_i]),3), 
                                        X[1][sample_i].reshape(1,len(X[1][sample_i]),3), 
                                        X[2][sample_i].reshape(1,len(X[2][sample_i]),3), 
                                        X[3][sample_i].reshape(1,len(X[3][sample_i]),3)],
                                        #X[4][sample_i].reshape(1,len(X[4][sample_i]),3*29)], 
                                        y_ohe[sample_i].reshape(1,10))
            avg_loss += train_loss/(len(X[0])*train_split)
            avg_acc += acc/(len(X[0])*train_split)
            avg_mae += mae/(len(X[0])*train_split)
        report_acc = avg_acc
        print(f'acc: {avg_acc} mae: {avg_mae} loss: {avg_loss}')
        #print('Running validation ...')
        avg_loss = 0.0
        avg_acc = 0.0
        avg_mae = 0.0
        for sample_i in tqdm(range(int(len(X[0])*train_split), len(X[0]))):
            [test_loss, acc, mae]   =  simple_conv.test_on_batch([X[0][sample_i].reshape(1,len(X[0][sample_i]),3), 
                                        X[1][sample_i].reshape(1,len(X[1][sample_i]),3), 
                                        X[2][sample_i].reshape(1,len(X[2][sample_i]),3), 
                                        X[3][sample_i].reshape(1,len(X[3][sample_i]),3)],
                                        #X[4][sample_i].reshape(1,len(X[4][sample_i]),3*29)], 
                                        y_ohe[sample_i].reshape(1,10))
            avg_loss += test_loss/(len(X[0])*(1-train_split))
            avg_acc += acc/(len(X[0])*(1-train_split))
            avg_mae += mae/(len(X[0])*(1-train_split))
            
        #print(f'test acc: {avg_acc} mae: {avg_mae} loss: {avg_loss}')
        
    return simple_conv, report_acc

def test_task2(simple_conv, X2, y2_ohe):
    for i in range(len(X2)):
        for j in range(len(X2[0])):
            if len(X2) == 5 and i == len(X2)-1: # assuming mocap always comes in the last index
                if len(X2[i][j]) == 0:
                    X2[i][j] = np.zeros((1, 87))
            else:
                if len(X2[i][j]) == 0:
                    X2[i][j] = np.array([[0., 0., 0.]], dtype = np.float32)

    print('Running test on subject ...')
    avg_loss = 0.0
    avg_acc = 0.0
    avg_mae = 0.0
    for sample_i in tqdm(range(int(len(X2[0])))):
        [test_loss, acc, mae]   =  simple_conv.test_on_batch([X2[0][sample_i].reshape(1,len(X2[0][sample_i]),3), 
                                    X2[1][sample_i].reshape(1,len(X2[1][sample_i]),3), 
                                    X2[2][sample_i].reshape(1,len(X2[2][sample_i]),3), 
                                    X2[3][sample_i].reshape(1,len(X2[3][sample_i]),3)],
                                    #X2[4][sample_i].reshape(1,len(X2[4][sample_i]),3*29)], 
                                    y2_ohe[sample_i].reshape(1,10))
        avg_loss += test_loss/(len(X2[0]))
        avg_acc += acc/(len(X2[0]))
        avg_mae += mae/(len(X2[0]))
    print(f'test acc: {avg_acc} mae: {avg_mae} loss: {avg_loss}')
    return avg_acc

In [None]:
# train on subject1
# validate on subject2, 3

from tqdm import tqdm

report = []

for xytst in [(X, y_ml_ohe, 'X1'), (X2, y_ml2_ohe, 'X2'), (X3, y_ml3_ohe, 'X3')]:
    
    simple_conv_task2 = create_model_task2()
    
    xytr = [a for a in [(X, y_ml_ohe, 'X1'), (X2, y_ml2_ohe, 'X2'), (X3, y_ml3_ohe, 'X3')] if a!=xytst]
    
    EPOCH = 1
    train_split = 1.0
    lr = 0.001

    for e in range(EPOCH):
        simple_conv_task2.compile(loss='mse', metrics=['acc', 'mae'], optimizer=Adam(learning_rate=lr))
        simple_conv_task2, acc = train_task2(simple_conv_task2, xytr[0][0], xytr[0][1], 1, train_split)
        lr /= 1.5
        simple_conv_task2.compile(loss='mse', metrics=['acc', 'mae'], optimizer=Adam(learning_rate=lr))
        simple_conv_task2, acc2 = train_task2(simple_conv_task2, xytr[1][0], xytr[1][1], 1, train_split)
        lr /= 2.0
    

    print('-------------------------------------------------')
    print('Test phase')
    print(f'Testing {xytst[2]}')

    t_acc = test_task2(simple_conv_task2, xytst[0], xytst[1])
    report.append(f'TR: {[a[2] for a in xytr]}   TS: {xytst[2]} acc1: {acc} acc2: {acc2} test_acc: {t_acc}')


print('--------------------report----------------------')
for ta in report:
    print(ta)

Input tensors: [<tf.Tensor 'right_arm_ipX_8:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'right_wrist_ipX_8:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_hip_ipX_8:0' shape=(None, None, 3) dtype=float32>, <tf.Tensor 'left_wrist_ipX_8:0' shape=(None, None, 3) dtype=float32>]


  1%|▊                                                                                       | 1/105 [00:00<00:20,  5.17it/s]

Training epoch 0 ...


 71%|██████████████████████████████████████████████████████████████▏                        | 75/105 [00:07<00:02, 11.17it/s]

In [26]:
# generating labels for test set

In [31]:
cdir = os.getcwd()
print(cdir)
os.chdir(cdir + '/../../')
print(os.getcwd())
cdir = os.getcwd()
print(cdir)
os.chdir(cdir + '/../../')
print(os.getcwd())

N:\activity-recognition-abc\train\train
N:\activity-recognition-abc
N:\activity-recognition-abc
N:\


In [32]:
os.chdir(cdir + '/test/test')
print(os.getcwd())

N:\activity-recognition-abc\test\test


In [33]:
# reading test set

# there's 1 subject
subject4 = {}


# we will load every single right arm data, separate based on subject id, do feature extraction, run t-SNE

all_sensors = ['right_arm', 'right_wrist', 'left_hip', 'left_wrist'] #, 'mocap']

data_folder = [f'{sensor}/*.csv' for sensor in all_sensors]

print(data_folder)

files = glob.glob(data_folder[0])

# processing will be done as numpy array

subject4['data'] = []

 
# data format : data -> id, process(timeseries, timestamps) : right_arm as numpy array (missing data will be imputed)


# id is generated by concatenating INT(subject+trial)

for f in files:
    #print(f)
    c_sub = {} # current subject
    if f.split(os.sep)[1].split('_')[0] == 'subject4':
        
        c_sub['id'] = int(f.split(os.sep)[1].split('_')[0][-1] + f.split(os.sep)[1].split('_')[2].split('.')[0])
        #print(ra['id'])
        
        for sensor in all_sensors:
            #print(sensor)
            ra = pd.read_csv(sensor + os.sep + f.split(os.sep)[1])
            #print(ra)
            ra.sort_values(by=['timestamp'], inplace = True)
            c_sub[sensor] = {}
            if sensor == 'mocap':
                for axis in mocap_axis:
                    c_sub[sensor][axis] = process(ra[axis], ra['timestamp'])
            else:
                for axis in ['X', 'Y', 'Z']:
                    c_sub[sensor][axis] = process(ra[axis], ra['timestamp'])
                
        subject4['data'].append(c_sub)
    else:
        print('either new subject or a bug')
        
 

        
        
# re-formatting dataset for training

X_gen = []


for sensor in all_sensors:
    cs_data = []
    for i in range(len(subject4['data'])):
        sub_data = []
        if sensor == 'mocap':
            for sig in mocap_axis:
                sub_data.append(np.array(subject4['data'][i][sensor][sig]))
        else:
            for sig in ['X', 'Y', 'Z']:
                sub_data.append(np.array(subject4['data'][i][sensor][sig]))
        sub_data = np.array(sub_data)
        # print(sub_data.shape)
        sub_data = np.swapaxes(sub_data, 0, 1)
        cs_data.append(sub_data)
    X_gen.append(cs_data)

['right_arm/*.csv', 'right_wrist/*.csv', 'left_hip/*.csv', 'left_wrist/*.csv']


In [86]:
X[0][4][0]

array([-0.0741, -0.0117, -0.0749])

In [89]:
len(X_gen[0][10])

1385

In [85]:
X_gen[0][4][0]

array([ 0.71569997, -0.052     , -0.0355    ])

In [53]:
len(X_gen)

4

In [54]:
len(X_gen[0])

180

In [76]:
X_gen[3][2]

array([[0, 0, 0]])

In [66]:
sample_i = 0
type(X2[3][sample_i].reshape(1,len(X2[3][sample_i]),3)[0,0,0])

numpy.float64

In [98]:
X_gen[3]

[array([[-1.23925781, -4.1015625 ,  0.20788574],
        [-1.36816406, -3.81445313,  0.19592285],
        [-1.63183594, -3.3671875 , -0.1751709 ],
        ...,
        [-0.69677734, -1.53710938, -1.52832031],
        [-0.89404297, -1.61035156, -1.73339844],
        [-0.89599609, -1.65722656, -1.77832031]]),
 array([[-0.64746094,  1.11328125,  1.00097656],
        [-0.38037109,  0.15148926,  0.99609375],
        [-0.17565918,  0.03363037,  1.08496094],
        ...,
        [-2.34765625, -5.4375    , -4.        ],
        [-3.484375  , -5.34765625, -3.80859375],
        [-3.484375  , -5.34765625, -3.80859375]]),
 array([[0, 0, 0]]),
 array([[0, 0, 0]]),
 array([[0, 0, 0]]),
 array([[ 3.17773438, -3.2109375 ,  1.92675781],
        [ 3.17773438, -3.2109375 ,  1.92675781],
        [ 3.28710938, -2.17382813,  2.14648438],
        ...,
        [-7.63671875,  4.8515625 , -2.671875  ],
        [-7.65234375,  4.54296875, -2.41796875],
        [-5.04296875,  3.51171875, -1.89648438]]),
 array([[0

In [53]:
def inference(simple_conv, simple_conv_task2, X2):
    for i in range(len(X2)):
        for j in range(len(X2[0])):
            if len(X2) == 5 and i == len(X2)-1: # assuming mocap always comes in the last index
                if len(X2[i][j]) == 0:
                    X2[i][j] = np.zeros((1, 87))
            else:
                if len(X2[i][j]) == 0:
                    X2[i][j] = np.array([[0., 0., 0.]], dtype=np.float32)

    print('Running inference on subject ...')
    y = []
    y_raw = []
    for sample_i in range(int(len(X2[0]))):

        y_c   =  simple_conv.predict([X2[0][sample_i].reshape(1,len(X2[0][sample_i]),3), 
                                    X2[1][sample_i].reshape(1,len(X2[1][sample_i]),3), 
                                    X2[2][sample_i].reshape(1,len(X2[2][sample_i]),3), 
                                    X2[3][sample_i].reshape(1,len(X2[3][sample_i]),3)]
                                    #X2[4][sample_i].reshape(1,len(X2[4][sample_i]),3*29)], 
                                    )
        y_raw.append(y_c)
        y.append(list(label_encoder.classes_)[np.argmax(y_c)])
        
    y_ml = []
    y_ml_raw = []
    for sample_i in range(int(len(X2[0]))):

        y_c   =  simple_conv_task2.predict([X2[0][sample_i].reshape(1,len(X2[0][sample_i]),3), 
                                    X2[1][sample_i].reshape(1,len(X2[1][sample_i]),3), 
                                    X2[2][sample_i].reshape(1,len(X2[2][sample_i]),3), 
                                    X2[3][sample_i].reshape(1,len(X2[3][sample_i]),3)]
                                    #X2[4][sample_i].reshape(1,len(X2[4][sample_i]),3*29)], 
                               )
        idx = 0
        label_str = ''
        #print(y_c)
        #print(y_c[0])
        for yc in y_c[0]:
            #print(yc)
            thresh = 0.5 # min(0.4, max(y_c[0]))
            if yc >= thresh:
                label_str += list(mlb.classes_)[idx] + ','
            idx += 1
        y_ml_raw.append(y_c)
        y_ml.append(label_str)
    return y, y_ml, y_raw, y_ml_raw

In [54]:
y_gen, y_ml_gen, y_r, y_ml_r = inference(simple_conv, simple_conv_task2, X)

Running inference on subject ...






















In [55]:
y_ml_r

[array([[0.        , 0.00514898, 0.014467  , 0.00644138, 0.04214099,
         0.01134187, 0.2169294 , 0.25272346, 0.26883382, 0.04454542]],
       dtype=float32),
 array([[0.0000000e+00, 6.3014358e-02, 4.8130751e-05, 5.0902367e-05,
         2.3861697e-01, 7.0005655e-05, 4.0683746e-03, 2.5163528e-01,
         1.1639750e-02, 3.1226585e-03]], dtype=float32),
 array([[0.0000000e+00, 4.4110650e-01, 5.4627657e-04, 3.8594127e-02,
         1.7943949e-01, 2.5970867e-01, 1.6543418e-02, 6.6722125e-01,
         4.7730368e-02, 3.9513996e-01]], dtype=float32),
 array([[0.        , 0.08281502, 0.04728296, 0.04836208, 0.16475454,
         0.03048316, 0.17773145, 0.68477654, 0.1552486 , 0.31701866]],
       dtype=float32),
 array([[0.        , 0.43166733, 0.00258806, 0.00045097, 0.18489626,
         0.03318402, 0.01083055, 0.11209774, 0.01088745, 0.00154024]],
       dtype=float32),
 array([[0.0000000e+00, 1.0112235e-01, 1.2329221e-03, 1.4901161e-07,
         9.1320181e-01, 1.2755394e-05, 3.9869547e-04

In [40]:
mlb.classes_

array(['Add', 'Cut', 'Mix', 'Open', 'Peel', 'Pour', 'Put', 'Take', 'Wash',
       'other'], dtype=object)

In [52]:
y_ml_gen

['Peel,Put,Wash,',
 '',
 'Put,Take,',
 'other,',
 '',
 'Peel,Wash,other,',
 'Cut,',
 'Open,Take,',
 '',
 'Cut,Put,Take,other,',
 '',
 'Peel,Wash,',
 'Cut,Peel,Wash,',
 'Put,Take,',
 'Mix,Take,Wash,',
 'Cut,Peel,',
 'Cut,Open,Peel,Put,Take,other,',
 'Peel,',
 'Cut,Peel,',
 'Cut,Peel,Put,Take,other,',
 '',
 'Put,Take,',
 '',
 'Put,Take,',
 '',
 '',
 'Cut,Peel,Pour,Take,',
 '',
 'Cut,Peel,Take,',
 'Wash,',
 'Cut,Put,Take,Wash,other,',
 'Peel,Put,',
 '',
 'Cut,Peel,Wash,',
 'Cut,Peel,Put,',
 'Cut,Peel,',
 'Mix,Put,other,',
 'Cut,Peel,Put,Take,',
 'Cut,Peel,Put,Take,other,',
 'Put,Take,',
 'Cut,Peel,Wash,',
 'Take,',
 'Cut,Open,Peel,Put,Take,other,',
 'Peel,Wash,',
 'Put,',
 'Cut,Peel,Put,',
 '',
 'Put,',
 'Cut,Peel,Wash,other,',
 'Peel,Put,Wash,',
 'Cut,Open,Peel,Put,Take,other,',
 'Peel,',
 'Cut,Put,',
 'Cut,Peel,Wash,other,',
 'Take,',
 'Cut,Peel,',
 'Cut,Peel,Take,',
 'Cut,Open,Peel,Pour,Take,',
 '',
 'Peel,',
 '',
 'Cut,Mix,Peel,Put,Take,Wash,other,',
 'Cut,',
 'other,',
 'Peel,',
 'Cu

In [42]:
file_ids = [f.split('/')[-1].split('.')[0] for f in files]

In [43]:
file_ids

['subject4_file_100',
 'subject4_file_360',
 'subject4_file_313',
 'subject4_file_363',
 'subject4_file_776',
 'subject4_file_262',
 'subject4_file_623',
 'subject4_file_117',
 'subject4_file_112',
 'subject4_file_739',
 'subject4_file_783',
 'subject4_file_777',
 'subject4_file_209',
 'subject4_file_505',
 'subject4_file_335',
 'subject4_file_539',
 'subject4_file_327',
 'subject4_file_63',
 'subject4_file_115',
 'subject4_file_470',
 'subject4_file_366',
 'subject4_file_235',
 'subject4_file_918',
 'subject4_file_237',
 'subject4_file_38',
 'subject4_file_538',
 'subject4_file_186',
 'subject4_file_61',
 'subject4_file_225',
 'subject4_file_325',
 'subject4_file_174',
 'subject4_file_256',
 'subject4_file_553',
 'subject4_file_165',
 'subject4_file_984',
 'subject4_file_41',
 'subject4_file_633',
 'subject4_file_521',
 'subject4_file_267',
 'subject4_file_164',
 'subject4_file_843',
 'subject4_file_652',
 'subject4_file_693',
 'subject4_file_746',
 'subject4_file_829',
 'subject4_fil

In [47]:
dict_data = {'file_id': file_ids, 'macro': y_gen, 'micro': y_ml_gen}
out_csv = pd.DataFrame(dict_data)

In [49]:
out_csv.to_csv('cc_PseudoEmpirical_submission1.csv', index = False, header = False, sep = ';')