#### Reproduction of the results
______
***WLASL Dataset!***

##### Please follow the procedures described on WLASL official web page: https://dxli94.github.io/WLASL/ . We recommend getting the full dataset from authors directly rather than doing the video downloads yourself 

##### You have to put all WLASL video samples (21095 files) in WLASL2000 directory and the cooresponding WLASL_v0.3.json file in WLASL-100 directory
______

We used Python 3.10.11

Please make sure to install the requirements before executing this notebook: 

`pip install -r requirements.txt`

The reproduction flow:
1. Obtaine the full WLASL dataset and place videos and JSON file in the directories mentioned above
2. Run this notebook and perform preprocessing and training and test dataset creation
3. Run the wlasl_ml.ipynb to train the model and reproduce the results
4. Results might slightly be different due to the initial model seed on your hardware

##### Preprocessing and reproducing WLASL results will take a lot of time, we recommend doing it on a separate machene  

In [None]:
import sys
sys.path.insert(1,'..')
import mp2signal.mp2s as mp2s
import numpy as np
import json
import matplotlib.pyplot as plt
import pandas as pd
import joblib
import mediapipe as mp
import torch
import torch.nn.functional as F
import gc

videos_path = 'WLASL2000/'
save_path = 'data/'
fjs = open('WLASL_v0.3.json')


In [None]:
js = json.load(fjs)

vid = 0
mp_holistic = mp.solutions.holistic

with mp_holistic.Holistic(
    smooth_landmarks=True,
    model_complexity=2,
    min_detection_confidence=0.1,
    refine_face_landmarks=True,
    min_tracking_confidence=0.1) as holistic:
    for i in js:
        text = i['gloss']
        for s in i['instances']:
            try:
                vid_path  = videos_path + s['video_id']+ '.mp4'
                
                sample_m = mp2s.Movement(vid_path,False,holistic)
                
                pg = sample_m.posegram()
                np.save(save_path+'raw_grams/'+ s['video_id'],pg)

                basic_coords = sample_m._basic_normalization_mov_data(to_uint8=False)
                basic_coords = basic_coords.astype(np.float16)
                np.save(save_path+'basic_norm_grams/'+ s['video_id'],basic_coords)

                del(pg)
                del(basic_coords)
                gc.collect()
            except:
                base_m = mp2s.Movement()
                base_m_data = base_m.movement_from_mediapipe(videos_path + '45669' + '.mp4',False,holistic)
                vid_path  = videos_path + s['video_id']+ '.mp4'

                sample_m = mp2s.Movement()
                sample_m_d = sample_m.movement_from_mediapipe(vid_path,False,holistic)
                print(len(sample_m_d['MP_Face']))
                if len(sample_m_d['MP_Face'])>=len(base_m_data['MP_Face']):
                    sample_m_d['MP_Face'][:len(base_m_data['MP_Face'])] = base_m_data['MP_Face']
                else:
                    sample_m_d['MP_Face'] = base_m_data['MP_Face'][:len(sample_m_d['MP_Face'])]
                sample_m.process(sample_m_d)
                
                pg = sample_m.posegram()
                np.save(save_path+'raw_grams/'+ s['video_id'],pg)

                basic_coords = sample_m._basic_normalization_mov_data(to_uint8=False)
                basic_coords = basic_coords.astype(np.float16)
                np.save(save_path+'basic_norm_grams/'+ s['video_id'],basic_coords)

                del(pg)
                del(basic_coords)
                print('Faled face: ',i['gloss'], s['video_id'])
            vid += 1
            print('Total Progress ',(int((vid/21083)*100)),"%", str(vid)+"/"+ str(21083), end='\r')

In [None]:
vid = 0
data_pd = []
class_i = 0
for i in js:
    text = i['gloss']
    for s in i['instances']:
        pg_path  = save_path +'raw_grams/'+ s['video_id']+ '.npy'
        basic_norm_path = save_path +'basic_norm_grams/'+ s['video_id']+ '.npy'
        pg = np.load(pg_path)
        bngr = np.load(basic_norm_path)
        set_split = s['split']
        part_id = s['signer_id']
        class_id = class_i
        var_id = s['variation_id']
        d = (set_split, text, part_id, class_id, var_id, bngr, pg)
        data_pd.append(d)
        del(pg)
        vid += 1
        print('Total Progress ',(int((vid/21083)*100)),"%", str(vid)+"/"+ str(21083), end='\r')
    class_i+=1

In [None]:
df = pd.DataFrame(data_pd,columns=[ 'Set', 'Text', 'Participant ID', 'Class ID', 'Variation ID','Basic Norm PG', 'Posegram'])
df = df.astype({'Set': 'category',
                'Text': 'category',
                'Participant ID': 'category',
                'Class ID': 'category',
                'Variation ID': 'string'})
df.info()

In [None]:
joblib.dump(df,save_path+'df_wlasl_grams.joblib')

In [None]:
df['Posegram'][0].shape

In [None]:
df['Basic Norm PG'][0].shape

In [None]:
import torch
import torch.nn.functional as F

def cut_posegram(posegram,i = 0):
    posegram_trim = mp2s.trim_posegram(posegram)
    pg = (torch.tensor(posegram_trim).T.float()-128) / 128

    hands_detect = torch.zeros((posegram.shape[1]))
    for fr in range(posegram.shape[1]-1):
        hands_detect[fr] = 1 - abs(F.cosine_similarity(F.normalize(pg[fr][6:24],p=2,dim=-1),F.normalize(pg[fr][29:47],p=2,dim=-1),dim=0))
        
    points = np.where(hands_detect>0)[0]
    
    if len(points)>2:
        start_point,furthest_point  = points[0], points[-1]  
        cut_posegram = np.transpose(posegram)[start_point:furthest_point]
        return  cut_posegram
    else:
        print(i, hands_detect)
        return np.transpose(posegram)

def cut_bngp(bnpg,posegram,i=0):
    
    posegram_trim = mp2s.trim_posegram(posegram)
    pg = (torch.tensor(posegram_trim).T.float()-128) / 128

    hands_detect = torch.zeros((posegram.shape[1]))
    for fr in range(posegram.shape[1]-1):
        hands_detect[fr] = 1 - abs(F.cosine_similarity(F.normalize(pg[fr][6:24],p=2,dim=-1),F.normalize(pg[fr][29:47],p=2,dim=-1),dim=0))
        
    points = np.where(hands_detect>0)[0]
    
    if len(points)>2:
        start_point,furthest_point  = points[0], points[-1]  
        cut_bnpg = np.transpose(bnpg)[start_point:furthest_point]
        return  cut_bnpg
    else:
        print(i, hands_detect)
        return np.transpose(bnpg)
    



idx =0
posegram = df['Posegram'].iloc[idx]

cp = cut_posegram(posegram)
cbn = cut_bngp(df['Basic Norm PG'].iloc[idx],df['Posegram'].iloc[idx],idx)
#plt.figure(figsize=(5,5),dpi=200)
plt.imshow(posegram,cmap='brg')
plt.show()
plt.imshow(cp,cmap='brg')
plt.show()
plt.imshow(df['Basic Norm PG'].iloc[idx],cmap='brg')
plt.show()
plt.imshow(cbn,cmap='brg')
plt.show()


In [None]:
df['PosegramC'][0].shape

In [None]:
df['BNPGC'] = list(cut_bngp(df['Basic Norm PG'].iloc[idx],df['Posegram'].iloc[idx],idx) for idx in range(len(df['Posegram'])))

In [None]:
df['BNPGC'][0].shape

In [None]:
def resize_sample(gram, factor):
    gram = np.transpose(gram)
    new_shape = list(gram.shape)
    new_shape[1] = round(new_shape[1]*factor)
    res_gram = np.zeros(new_shape,dtype=gram.dtype)
    for i in range(gram.shape[0]):
        res_gram[i] = np.interp(np.arange(0, round(gram[i].shape[0]*factor)), np.arange(0, gram[i].shape[0])*factor, gram[i])

    res_gram = np.transpose(res_gram)
    return res_gram

In [None]:
df['PosegramCR'] = list(resize_sample(p,100/p.shape[0]) for p in df['PosegramC'])

In [None]:
df['BNPGCR'] = list(resize_sample(p,100/p.shape[0]) for p in df['BNPGC'])

In [None]:
df['PosegramCRT'] = list(np.transpose(mp2s.trim_posegram(np.transpose(df['PosegramCR'].iloc[p]))) for p in range(len(df)))

In [None]:
df['BNPGCRT'] = list(np.transpose(mp2s.trim_posegram(np.transpose(df['BNPGCR'].iloc[p]))) for p in range(len(df)))

In [None]:
df['PosegramCRT'][0].shape

In [None]:
import matplotlib.pyplot as plt
def plot(x):
    plt.figure(dpi=200)
    plt.axis('off')
    plt.imshow(x,cmap='brg')
    plt.show()

idx = 6

plot(df['Basic Norm PG'][idx])
plot(df['BNPGC'][idx].T)
plot(df['BNPGCR'][idx].T)
plot(df['BNPGCRT'][idx].T)

plot(df['Posegram'][idx])
plot(df['PosegramC'][idx].T)
plot(df['PosegramCR'][idx].T)
plot(df['PosegramCRT'][idx].T)

In [None]:
df.head()

In [None]:
joblib.dump(df,save_path+'df_wlasl_grams_cut_res_trim.joblib')

In [None]:
import joblib
import torch
import pandas as pd 

def train_test_val(df,set_size, column, data_path, file_prefix):
    tr_df = pd.DataFrame()

    train_smlps = df.loc[(df['Set']=='train')&(df['Class ID']<set_size)][[column,'Class ID']]

    tr_df['X'] = list([torch.from_numpy(train_smlps.iloc[p][0]) for p in range(len(train_smlps))])
    #tr_df['X455'] = list([torch.tensor(train_smlps.iloc[p][1]-128).float()/128 for p in range(len(train_smlps))])
    tr_df['Y'] = list([torch.tensor([train_smlps.iloc[p][1]]) for p in range(len(train_smlps))])

    #print(len(tr_df))
    #print(tr_df.head())

    tr_df.to_pickle(data_path+file_prefix+'train'+str(set_size)+'.pkl')

    ts_df = pd.DataFrame()

    test_smlps = df.loc[(df['Set']=='test')&(df['Class ID']<set_size)][[column,'Class ID']]

    ts_df['X'] = list([torch.from_numpy(test_smlps.iloc[p][0]) for p in range(len(test_smlps))])
    #ts_df['X455'] = list([torch.tensor(test_smlps.iloc[p][1]-128).float()/128 for p in range(len(test_smlps))])
    ts_df['Y'] = list([torch.tensor([test_smlps.iloc[p][1]]) for p in range(len(test_smlps))])

    #print(len(ts_df))
    #print(ts_df.head())

    ts_df.to_pickle(data_path+file_prefix+'test'+str(set_size)+'.pkl')


    val_df = pd.DataFrame()

    val_smlps = df.loc[(df['Set']=='val')&(df['Class ID']<set_size)][[column,'Class ID']]

    val_df['X'] = list([torch.from_numpy(val_smlps.iloc[p][0]) for p in range(len(val_smlps))])
    #val_df['X455'] = list([torch.tensor(val_smlps.iloc[p][1]-128).float()/128 for p in range(len(val_smlps))])
    val_df['Y'] = list([torch.tensor([val_smlps.iloc[p][1]]) for p in range(len(val_smlps))])

    #print(len(val_df))
    #print(val_df.head())

    val_df.to_pickle(data_path+file_prefix+'val'+str(set_size)+'.pkl')

    print('Done TRAIN TEXT VAL',set_size, column, data_path, file_prefix,len(tr_df),len(ts_df),len(val_df) )

data_path = 'D:/WLASL/data/'

#load from memory if needed
#df = joblib.load(data_path+'df_wlasl_grams_cut_res_trim.joblib')

df = df.astype({'Class ID': 'int'})

train_test_val(df,100,'PosegramCRT',save_path,'')

train_test_val(df,100,'BNPGCRT',save_path,'bnpg_')