# Downloading and Preprocessing Datasets

## Import necesary modules

In [2]:
from tqdm.notebook import tqdm

import os

from torch_skeleton.datasets import UCLA, Apply, DiskCache
import torch_skeleton.transforms as T

## Download and Preprocess Dataset

In [10]:
preprocess = T.Compose(
    [
        T.CenterJoint(joint_id=1, all=False),
        T.ParallelBone(first_id=0, second_id=1, axis=2),
        T.ParallelBone(first_id=8, second_id=4, axis=0),
    ]
)

dataset = UCLA(
    root="data",
    transform=preprocess
)

print(os.listdir("data/NW-UCLA/all_sqe")[:5])

# apply preprocessing at creation
dataset = UCLA(
    root="data",
    transform=preprocess
)

x, y = dataset[0]
print(f"returns x with M=1, T=17, V=20, C=3 {x.shape}")

# or apply them after creation
dataset = UCLA(root="data")
dataset = Apply(dataset, transform=preprocess)

all_sqe.zip exists, skipping download
['a12_s06_e03_v03.json', 'a12_s06_e02_v03.json', 'a08_s08_e03_v01.json', 'a01_s05_e02_v02.json', 'a04_s07_e04_v01.json']
all_sqe.zip exists, skipping download
returns x with M=1, T=17, V=20, C=3 (1, 17, 20, 3)
all_sqe.zip exists, skipping download


## Cache Preprocessed Dataset to Disk

In [None]:
cache = DiskCache(dataset, root="/tmp/ucla")

def list_temp_dir():
    tmp_dir = os.listdir("/tmp/ucla")[0]
    os.listdir(os.path.join("/tmp/ucla", tmp_dir))[:10]

list_temp_dir()
cache[0]
list_temp_dir()

for x, y in cache:
    pass

list_temp_dir()

## Add augmentations for training

In [None]:
dataset = Apply(cache,
transform = T.Compose(
        [
            T.SampleFrames(num_frames=20),
            T.RandomRotate(degrees=17),
            T.PadFrames(max_frames=20),
        ]
    ))