# Tensorflow - Loading data - Adjusting the layer to our needs

https://www.kaggle.com/code/roberthatch/gislr-feature-data-on-the-shoulders/notebook

This notebook generates a tensorflow preprocessing layer that transforms the whole dataset of single parquet files into numpy arrays.

The flow is as follow:

***preprocessing layer:***
1. drop z coordinate if desired (this would reduce number of coordinates to 2; following array shapes show number for 3 coordinates)
2. Convert input into x containing avg face, lips, upper pose landmarks, left hand, right hand for every frame, 
    e.g. [23, 106, 3] = (number of frames, landmarks, coordinates)
3. Pad x or cut x such that the length is as defined (e.g. length = 30 --> [30, 106, 3])
4. replace NaN values with zero
4. Resize it to either flattened or 3 dimensional array ( [1, 30, 318] or flattened: [1, 9540])

***looping through csv file ***

By looping through the csv file each parquet file will be loaded and the required data will be extracted and transformed by running it through the preprocessing layer.
Then the data will be added to our final numpy array of all recordings.

* final shape of data: flattened: x (94477, 5796) and y (94477,) or unflattened: (94477, 30, 212) (94477,)
* shape also depends on selected landmarks and coordinates and migth vary

## Import libraries

In [76]:
%pip install tqdm
import os

import json
from tqdm import tqdm
import numpy as np
import pandas as pd

import tensorflow as tf

Note: you may need to restart the kernel to use updated packages.


## Setup

In [77]:
#limit dataset for quick test
QUICK_TEST = False
QUICK_LIMIT = 100

In [78]:
#for Kaggle
# LANDMARK_FILES_DIR = "/kaggle/input/asl-signs/train_landmark_files"
# TRAIN_FILE = "/kaggle/input/asl-signs/train.csv"
# label_map = json.load(open("/kaggle/input/asl-signs/sign_to_prediction_index_map.json", "r"))

#for local notebook
LANDMARK_FILES_DIR = "../data/asl-signs/train_landmark_files"
TRAIN_FILE = "../data/asl-signs/train.csv"
label_map = json.load(open("../data/asl-signs/sign_to_prediction_index_map.json", "r"))

## Configuration

In [79]:
#Define length of sequences for padding or cutting
LENGTH = 30

#final data will be flattened, if false data will be 3 dimensional
FLATTEN = False

#define if z coordinate will be dropped
DROP_Z = True

#Landmarks for specific types (hand, face, pose)
FACE = [0, 468]
LEFT_HAND_OFFSET = 468
POSE_OFFSET = LEFT_HAND_OFFSET+21
RIGHT_HAND_OFFSET = POSE_OFFSET+33

##defining landmarks that will be merged
averaging_sets = [[0, 468]]
#40 landmarks for lips
lip_landmarks = [61, 185, 40, 39, 37,  0, 267, 269, 270, 409,
                 291,146, 91,181, 84, 17, 314, 405, 321, 375, 
                 78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 
                 95, 88, 178, 87, 14,317, 402, 318, 324, 308]

#defines landmarks for hands
pose_landmarks = list(range(POSE_OFFSET, POSE_OFFSET+23))
left_hand_landmarks = list(range(LEFT_HAND_OFFSET, LEFT_HAND_OFFSET+21))
right_hand_landmarks = list(range(RIGHT_HAND_OFFSET, RIGHT_HAND_OFFSET+21))

#generating list with all landmarks selected for preprocessing
point_landmarks = [item for sublist in [pose_landmarks, lip_landmarks, left_hand_landmarks, right_hand_landmarks] for item in sublist]

#calculating sum of total landmarks used
LANDMARKS = len(point_landmarks) + len(averaging_sets)
print(LANDMARKS)

#defining input shape for model
if DROP_Z:
    INPUT_SHAPE = (LENGTH,LANDMARKS*2)
else:
    INPUT_SHAPE = (LENGTH,LANDMARKS*3)


106


### Helper Functions

In [80]:
ROWS_PER_FRAME = 543
def load_relevant_data_subset(pq_path):
    #defines which columns will be read from the file
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    #calculates the number of frames in the data by dividing the length of the data by the number of rows per frame
    n_frames = int(len(data) / ROWS_PER_FRAME)
    #reshapes the data into a 3D array with shape (n_frames, ROWS_PER_FRAME, len(data_columns))
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

In [81]:
#unnecessary function for now; if we want to define handedness of user
def right_hand_percentage(x):
    #calculates percentage of right hand usage
    right = tf.gather(x, right_hand_landmarks, axis=1)
    left = tf.gather(x, left_hand_landmarks, axis=1)
    right_count = tf.reduce_sum(tf.where(tf.math.is_nan(right), tf.zeros_like(right), tf.ones_like(right)))
    left_count = tf.reduce_sum(tf.where(tf.math.is_nan(left), tf.zeros_like(left), tf.ones_like(left)))
    return right_count / (left_count+right_count)

In [82]:
def tf_nan_mean(x, axis=0):
    #calculates the mean of a TensorFlow tensor x along a specified axis while ignoring any NaN values in the tensor.
    return tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), x), axis=axis) / tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), tf.ones_like(x)), axis=axis)

def tf_nan_std(x, axis=0):
    #calculates the standard deviation of a tensor x along a specified axis, while ignoring any NaN values in the tensor
    d = x - tf_nan_mean(x, axis=axis)
    return tf.math.sqrt(tf_nan_mean(d * d, axis=axis))

#this function is only required if mean and std will be calculated for specific segments of the data
def flatten_means_and_stds(x, axis=0):
    #Get means and stds
    x_mean = tf_nan_mean(x, axis=0)
    x_std  = tf_nan_std(x,  axis=0)
    #concats mean and std values for each sequence
    x_out = tf.concat([x_mean, x_std], axis=0)
    x_out = tf.reshape(x_out, (1, INPUT_SHAPE[1]*2))
    #replaces NaN values with zeros
    x_out = tf.where(tf.math.is_finite(x_out), x_out, tf.zeros_like(x_out))
    return x_out


## TensorFlow Feature Preprocessing Layer

In [83]:
#generating preprocessing layer that will be added to final model
class FeatureGen(tf.keras.layers.Layer):
    #defines custom tensorflow layer 
    def __init__(self):
        #initializes layer
        super(FeatureGen, self).__init__()
    
    def call(self, x_in):
        #drop z coordinates if required
        if DROP_Z:
            x_in = x_in[:, :, 0:2]
        
        #generates list with mean values for landmarks that will be merged
        x_list = [tf.expand_dims(tf_nan_mean(x_in[:, av_set[0]:av_set[0]+av_set[1], :], axis=1), axis=1) for av_set in averaging_sets]
        #extracts specific columns from input x_in defined by landmarks
        x_list.append(tf.gather(x_in, point_landmarks, axis=1))
        #concatenates the two tensors from above along axis 1/columns
        x = tf.concat(x_list, 1)

        #padding to desired length of sequence (defined by LENGTH)
        #get current number of rows
        x_padded = x
        current_rows = tf.shape(x_padded)[0]
        #if current number of rows is greater than desired number of rows, truncate excess rows
        if current_rows > LENGTH:
            x_padded = x_padded[:LENGTH, :, :]
        #if current number of rows is less than desired number of rows, add padding
        elif current_rows < LENGTH:
            #calculate amount of padding needed
            pad_rows = LENGTH - current_rows
            #specify amount of padding to be added to each dimension
            if pad_rows %2 == 0: #if pad_rows is even
                paddings = [[pad_rows//2, pad_rows//2], [0, 0], [0, 0]]
            else: #if pad_rows is odd
                paddings = [[pad_rows//2+1, pad_rows//2], [0, 0], [0, 0]]  
            # mode constant: zeros are added
            #TODO: change mode so first and last frame are copied?
            x_padded = tf.pad(x_padded, paddings, mode='CONSTANT', constant_values=0)
        x = x_padded
        
        #reshape data to 2D or 3D array
        if FLATTEN:
            x = tf.reshape(x, (1, INPUT_SHAPE[0]*INPUT_SHAPE[1]))
        else:
            x = tf.reshape(x, (1, INPUT_SHAPE[0], INPUT_SHAPE[1]))
        
        #replaces NaN values with zero
        x = tf.where(tf.math.is_nan(x), tf.zeros_like(x), x)

        return x

#define converter using generated layer
feature_converter = FeatureGen()

In [84]:
#Tests for generated layer
#One tests symbolic tensor, the other tests real data.
print(feature_converter(tf.keras.Input((543, 3), dtype=tf.float32, name="inputs")))

#file path for kaggle
#feature_converter(load_relevant_data_subset(f'/kaggle/input/asl-signs/{pd.read_csv(TRAIN_FILE).path[1]}'))

#file path for local notebook
#tests preprocessing layer with parquet file
feature_converter(load_relevant_data_subset(f'../data/asl-signs/{pd.read_csv(TRAIN_FILE).path[1]}'))

KerasTensor(type_spec=TensorSpec(shape=(1, 30, 212), dtype=tf.float32, name=None), name='feature_gen_28/SelectV2_2:0', description="created by layer 'feature_gen_28'")


<tf.Tensor: shape=(1, 30, 212), dtype=float32, numpy=
array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]], dtype=float32)>

In [85]:
def convert_row(row, right_handed=True):
    #for kaggle
    #x = load_relevant_data_subset(os.path.join("/kaggle/input/asl-signs", row[1].path))

    #for local notebook
    #loads data from parquet file
    x = load_relevant_data_subset(os.path.join("../data/asl-signs", row[1].path))

    #applies preprocessing layer to loaded data
    x = feature_converter(tf.convert_to_tensor(x)).cpu().numpy()
    #returns transformed x data and label of sign
    return x, row[1].label

#unnecessary information?! might be useful for later feature engineering
right_handed_signer = [26734, 28656, 25571, 62590, 29302, 
                       49445, 53618, 18796,  4718,  2044, 
                       37779, 30680]
left_handed_signer  = [16069, 32319, 36257, 22343, 27610, 
                       61333, 34503, 55372, ]
both_hands_signer   = [37055, ]
messy = [29302, ]

def convert_and_save_data():
    #reads csv file
    df = pd.read_csv(TRAIN_FILE)
    #maps label number to sign column
    df['label'] = df['sign'].map(label_map)
    #sets number of total rows
    total = df.shape[0]
    #limits number of rows if quick_test is activated
    if QUICK_TEST:
        total = QUICK_LIMIT
    
    #generates numpy array with zeros in shape (total number of rows, number of expected columns)
    if FLATTEN:
        npdata = np.zeros((total, INPUT_SHAPE[0]*INPUT_SHAPE[1]))
    else:
        npdata = np.zeros((total, INPUT_SHAPE[0], INPUT_SHAPE[1]))
    nplabels = np.zeros(total)

    #for loop iterates through each row in df dataframe; i is index of the row and row accesses information in the row of df
    #tqdm is used for showing progress bar
    for i, row in tqdm(enumerate(df.iterrows()), total=total):
        #load specific parquet file, run preprocessing layer and save x and y data
        (x,y) = convert_row(row)
        #save x and y to specific row in prepared numpy arrays
        npdata[i,:] = x
        nplabels[i] = y
        #break if quick test is activated
        if QUICK_TEST and i == QUICK_LIMIT - 1:
            break
        
    #save as np file
    np.save("../data/feature_data.npy", npdata)
    np.save("../data/feature_labels.npy", nplabels)
        
convert_and_save_data()

100%|██████████| 94477/94477 [06:12<00:00, 253.39it/s]


In [86]:
#test of loading data
X = np.load("../data/feature_data.npy")
y = np.load("../data/feature_labels.npy")

print(X.shape, y.shape)

(94477, 30, 212) (94477,)
