# Tensorflow - Loading data

https://www.kaggle.com/code/roberthatch/gislr-feature-data-on-the-shoulders/notebook

This is the original notebook from one kaggle user.


this notebook generates a preprocessing layer that transforms the whole dataset into numpy arrays.
Some landmarks are merged and sequences are cut in 3 time blocks and averaged.



If I understand it correctly, the flow is as follow:

1. Convert x_ into x containing avg face, avg pose, lips, left hand, right hand for every frame, e.g. [23, 84, 3]
2. Pad x such that first dim is divisible by 3: [23, 84, 3] -> [24, 84, 3]
3. Split padded x into three parts, and for each part, compute mean and std over frames, concat and flatten: [8, 84, 3] -> [2, 84, 3] -> [1, 504]
4. Add flattened mean and std of x to features list, which now contains means and stds of the three parts + overall mean and std
5. Resize x to [15, 84, 3], flatten and append it to features list

* final shape of data: x (94477, 5796) and y (94477,)
* takes ca 15 minutes to compile whole dataset

In [1]:
%pip install tqdm
import os

import json
from tqdm import tqdm
import numpy as np
import pandas as pd

import tensorflow as tf

Note: you may need to restart the kernel to use updated packages.


In [2]:
# limit dataset for quick test
QUICK_TEST = True
QUICK_LIMIT = 100

In [3]:
#for Kaggle
# LANDMARK_FILES_DIR = "/kaggle/input/asl-signs/train_landmark_files"
# TRAIN_FILE = "/kaggle/input/asl-signs/train.csv"
# label_map = json.load(open("/kaggle/input/asl-signs/sign_to_prediction_index_map.json", "r"))

#for local notebook
LANDMARK_FILES_DIR = "../data/asl-signs/train_landmark_files"
TRAIN_FILE = "../data/asl-signs/train.csv"
label_map = json.load(open("../data/asl-signs/sign_to_prediction_index_map.json", "r"))

In [18]:
# code from kaggle competition to load test data
ROWS_PER_FRAME = 543
def load_relevant_data_subset(pq_path):
    #defines which columns will be read from the file
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    #calculates the number of frames in the data by dividing the length of the data by the number of rows per frame
    n_frames = int(len(data) / ROWS_PER_FRAME)
    #reshapes the data into a 3D array with shape (n_frames, ROWS_PER_FRAME, len(data_columns))
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)

In [5]:
def right_hand_percentage(x):
    #calculates percentage of right hand usage
    right = tf.gather(x, right_hand_landmarks, axis=1)
    left = tf.gather(x, left_hand_landmarks, axis=1)
    right_count = tf.reduce_sum(tf.where(tf.math.is_nan(right), tf.zeros_like(right), tf.ones_like(right)))
    left_count = tf.reduce_sum(tf.where(tf.math.is_nan(left), tf.zeros_like(left), tf.ones_like(left)))
    return right_count / (left_count+right_count)

## Configuration

In [6]:
# NUmber of frames that will be used for averaging - after resizing (?)
NUM_FRAMES = 15
#count of segments the sequences will be cut into
SEGMENTS = 3

# Landmarks for specific types (hand, face, pose)
FACE = [0, 468]
LEFT_HAND_OFFSET = 468
POSE_OFFSET = LEFT_HAND_OFFSET+21
RIGHT_HAND_OFFSET = POSE_OFFSET+33

## defining landmarks that will be merged
averaging_sets = [[0, 468], [POSE_OFFSET, 33]]
# 40 landmarks for lips
lip_landmarks = [61, 185, 40, 39, 37,  0, 267, 269, 270, 409,
                 291,146, 91,181, 84, 17, 314, 405, 321, 375, 
                 78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 
                 95, 88, 178, 87, 14,317, 402, 318, 324, 308]
#defines landmarks for hands
left_hand_landmarks = list(range(LEFT_HAND_OFFSET, LEFT_HAND_OFFSET+21))
right_hand_landmarks = list(range(RIGHT_HAND_OFFSET, RIGHT_HAND_OFFSET+21))

#generating list with all landmarks used for preprocessing
point_landmarks = [item for sublist in [lip_landmarks, left_hand_landmarks, right_hand_landmarks] for item in sublist]

#calculating sum of total landmarks used
LANDMARKS = len(point_landmarks) + len(averaging_sets)
print(LANDMARKS)

#defining input shape for model
INPUT_SHAPE = (NUM_FRAMES,LANDMARKS*3)

84


### Helper Functions

In [16]:
def tf_nan_mean(x, axis=0):
    #calculates the mean of a TensorFlow tensor x along a specified axis while ignoring any NaN values in the tensor.
    return tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), x), axis=axis) / tf.reduce_sum(tf.where(tf.math.is_nan(x), tf.zeros_like(x), tf.ones_like(x)), axis=axis)

def tf_nan_std(x, axis=0):
    #calculates the standard deviation of a tensor x along a specified axis, while ignoring any NaN values in the tensor
    d = x - tf_nan_mean(x, axis=axis)
    return tf.math.sqrt(tf_nan_mean(d * d, axis=axis))

def flatten_means_and_stds(x, axis=0):
    # Get means and stds
    x_mean = tf_nan_mean(x, axis=0)
    x_std  = tf_nan_std(x,  axis=0)
    #concats mean and std values for each sequence
    x_out = tf.concat([x_mean, x_std], axis=0)
    x_out = tf.reshape(x_out, (1, INPUT_SHAPE[1]*2))
    # replaces NaN values with zeros
    x_out = tf.where(tf.math.is_finite(x_out), x_out, tf.zeros_like(x_out))
    return x_out

## TensorFlow Feature Preprocessing Layer

In [8]:
# generating preprocessing layer that will be added to final model
class FeatureGen(tf.keras.layers.Layer):
    #defines custom tensorflow layer 
    def __init__(self):
        #initializes layer
        super(FeatureGen, self).__init__()
    
    def call(self, x_in):
        #generates list with mean values for landmarks that will be merged
        x_list = [tf.expand_dims(tf_nan_mean(x_in[:, av_set[0]:av_set[0]+av_set[1], :], axis=1), axis=1) for av_set in averaging_sets]
        #extracts specific columns from a 3D input tensor x_in defined by landmarks
        x_list.append(tf.gather(x_in, point_landmarks, axis=1))
        #concatenates the two tensors from above along axis 1/columns
        x = tf.concat(x_list, 1)
        #x is now our current tensor with mean values for those landmarks that were merged; and coordinates for all other landmarks
        # its shape is n, 84, 3 with n being number of frames
        
        #padding of sequences so length can be divided by 3
        x_padded = x
        for i in range(SEGMENTS):
            #p0 is equal to 1 if the number of rows cannot be divided by 3 and the current iteration index i is odd, and 0 otherwise
            p0 = tf.where( ((tf.shape(x_padded)[0] % SEGMENTS) > 0) & ((i % 2) != 0) , 1, 0)
            #p1 is equal to 1 if the number of rows cannot be divided by 3 and the current iteration index i is even, and 0 otherwise
            p1 = tf.where( ((tf.shape(x_padded)[0] % SEGMENTS) > 0) & ((i % 2) == 0) , 1, 0)
            #specifies the amount of padding to be added to each dimension. The first dimension is padded with p0 zeros at the top and p1 zeros at the bottom. 
            #The second and third dimensions are not padded.
            paddings = [[p0, p1], [0, 0], [0, 0]]
            #mode symmetric: the values at the edges of the tensor are mirrored
            x_padded = tf.pad(x_padded, paddings, mode="SYMMETRIC")
            
        #cut sequence in 3 parts and calculate mean and std for each part
        x_list = tf.split(x_padded, SEGMENTS)
        x_list = [flatten_means_and_stds(_x, axis=0) for _x in x_list]

        #Add mean and std for whole sequence (?)
        x_list.append(flatten_means_and_stds(x, axis=0))
        
        ## Resize only dimension 0. Resize can't handle nan, so replace nan with that dimension's avg value to reduce impact.
        x = tf.image.resize(tf.where(tf.math.is_finite(x), x, tf_nan_mean(x, axis=0)), [NUM_FRAMES, LANDMARKS])
        x = tf.reshape(x, (1, INPUT_SHAPE[0]*INPUT_SHAPE[1]))
        x = tf.where(tf.math.is_nan(x), tf.zeros_like(x), x)
        x_list.append(x)
        x = tf.concat(x_list, axis=1)
        return x
    
#define converter using generated layer
feature_converter = FeatureGen()

## One tests symbolic tensor, the other tests real data.
print(feature_converter(tf.keras.Input((543, 3), dtype=tf.float32, name="inputs")))

#file path for kaggle
#feature_converter(load_relevant_data_subset(f'/kaggle/input/asl-signs/{pd.read_csv(TRAIN_FILE).path[1]}'))

#file path for local notebook
#tests preprocessing layer with parquet file
feature_converter(load_relevant_data_subset(f'../data/asl-signs/{pd.read_csv(TRAIN_FILE).path[1]}'))


KerasTensor(type_spec=TensorSpec(shape=(1, 5796), dtype=tf.float32, name=None), name='feature_gen/concat_5:0', description="created by layer 'feature_gen'")


<tf.Tensor: shape=(1, 5796), dtype=float32, numpy=
array([[ 5.7140142e-01,  4.6732509e-01, -2.4906856e-05, ...,
         3.8143307e-01,  8.0332047e-01, -1.0697230e-01]], dtype=float32)>

In [9]:
def convert_row(row, right_handed=True):
    #for kaggle
    #x = load_relevant_data_subset(os.path.join("/kaggle/input/asl-signs", row[1].path))

    #for local notebook
    #loads data from parquet file
    x = load_relevant_data_subset(os.path.join("../data/asl-signs", row[1].path))

    #applies preprocessing layer to loaded data
    x = feature_converter(tf.convert_to_tensor(x)).cpu().numpy()
    #returns transformed x data and label of sign
    return x, row[1].label

#unnecessary information?!
right_handed_signer = [26734, 28656, 25571, 62590, 29302, 
                       49445, 53618, 18796,  4718,  2044, 
                       37779, 30680]
left_handed_signer  = [16069, 32319, 36257, 22343, 27610, 
                       61333, 34503, 55372, ]
both_hands_signer   = [37055, ]

messy = [29302, ]

def convert_and_save_data():
    #reads csv file
    df = pd.read_csv(TRAIN_FILE)
    #maps label number to sign column
    df['label'] = df['sign'].map(label_map)
    #sets number of total rows
    total = df.shape[0]
    #limits number of rows if quick_test is activated
    if QUICK_TEST:
        total = QUICK_LIMIT
    #generates numpy array with zeros in shape (total number of rows, number of expected columns)
    npdata = np.zeros((total, INPUT_SHAPE[0]*INPUT_SHAPE[1] + (SEGMENTS+1)*INPUT_SHAPE[1]*2))
    nplabels = np.zeros(total)
    #for loop iterates through each row in df dataframe
    for i, row in tqdm(enumerate(df.iterrows()), total=total):
        #load specific parquet file and save x and y data
        (x,y) = convert_row(row)
        #save x and y to specific row in numpy arrays
        npdata[i,:] = x
        nplabels[i] = y
        #break if quick test is activated
        if QUICK_TEST and i == QUICK_LIMIT - 1:
            break
        
    #save as np file
    np.save("../data/feature_data.npy", npdata)
    np.save("../data/feature_labels.npy", nplabels)
        
convert_and_save_data()

  0%|          | 0/100 [00:00<?, ?it/s]

Instructions for updating:
Use tf.identity with explicit device placement instead.


 99%|█████████▉| 99/100 [00:00<00:00, 124.77it/s]


In [10]:
#load data
X = np.load("../data/feature_data.npy")
y = np.load("../data/feature_labels.npy")

print(X.shape, y.shape)

print(X[0, :].shape, X[0, :])

(100, 5796) (100,)
(5796,) [ 5.18924236e-01  3.42620254e-01  1.48732506e-05 ...  6.35599792e-02
  5.70323110e-01 -1.20788895e-01]
