In [None]:
# Configuration - Docker container paths
# When running in NNV Docker container, data is at /nnv/code/nnv/examples/Submission/FORMALISE2025/data
BASE_DIR = "/nnv/code/nnv/examples/Submission/FORMALISE2025"
DATA_DIR = f"{BASE_DIR}/data"
MODELS_DIR = f"{BASE_DIR}/models"

# Frame count configuration
MAX_FRAMES = 16  # Options: 8, 16, 32

# Download dataset

In [None]:
! wget https://www.csc.kth.se/cvap/actions/walking.zip
! wget https://www.csc.kth.se/cvap/actions/jogging.zip
! wget https://www.csc.kth.se/cvap/actions/running.zip
! wget https://www.csc.kth.se/cvap/actions/boxing.zip
! wget https://www.csc.kth.se/cvap/actions/handwaving.zip
! wget https://www.csc.kth.se/cvap/actions/handclapping.zip

In [3]:
! wget https://www.csc.kth.se/cvap/actions/00sequences.txt

--2025-08-29 06:39:23--  https://www.csc.kth.se/cvap/actions/00sequences.txt
Resolving www.csc.kth.se (www.csc.kth.se)... 130.237.28.41, 2001:6b0:1:11c2::82ed:1c29
Connecting to www.csc.kth.se (www.csc.kth.se)|130.237.28.41|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 38292 (37K) [text/plain]
Saving to: ‘00sequences.txt’

00sequences.txt       0%[                    ]       0  --.-KB/s               ^C


In [None]:
! mkdir data
! unzip -qq walking.zip -d data
! unzip -qq jogging.zip -d data
! unzip -qq running.zip -d data
! unzip -qq boxing.zip -d data
! unzip -qq handwaving.zip -d data
! unzip -qq handclapping.zip -d data

In [None]:
! mkdir data/train
! mkdir data/val
! mkdir data/test

In [None]:
# split the videos into train/val/test by people ID as described by 00sequences.txt
train_samples = [11, 12, 13, 14, 15, 16, 17, 18]
val_samples = [19, 20, 21, 23, 24, 25, 1, 4]
test_samples = [22, 2, 3, 5, 6, 7, 8, 9, 10]

# task names
tasks = ['boxing', 'handclapping', 'handwaving', 'jogging', 'running', 'walking']

# various setting labels (1-4)
settings = [1, 2, 3, 4]

train_samples_names = []
val_samples_names = []
test_samples_names = []

for task in tasks:
  for setting in settings:
    for sample in train_samples:
      sample_str = str(sample).zfill(2) # add 0 to front if necessary
      train_samples_names.append(f'person{sample_str}_{task}_d{setting}')

    for sample in val_samples:
      sample_str = str(sample).zfill(2) # add 0 to front if necessary
      val_samples_names.append(f'person{sample_str}_{task}_d{setting}')

    for sample in test_samples:
      sample_str = str(sample).zfill(2) # add 0 to front if necessary
      test_samples_names.append(f'person{sample_str}_{task}_d{setting}')

print(train_samples_names)
print(val_samples_names)
print(test_samples_names)

In [None]:
import re

samples = {}

with open('00sequences.txt', 'r') as f:
  lines = f.readlines()
  lines = lines[21:]
  for line in lines:
    # print(repr(line))
    line = line.replace("\t\tframes\t", ",")
    line = line.replace("\tframes\t", ",")
    # print(repr(line))
    line = line.replace("\n", "")
    # print(repr(line))
    line_split = line.split(',')

    if len(line_split) == 1 and line_split[0] == '':
      continue

    line_split = list(map(lambda x: x.strip(), line_split))

    # print(line_split)
    # after all this processing, line_split looks like:
    # ['person01_boxing_d1', '1-95', '96-185', '186-245', '246-360']

    video_name = line_split[0]

    # process video_frames
    samples[video_name] = [list(map(int, video_frames.split('-'))) for video_frames in line_split[1:]] # skip video_name

    # example entry in samples looks like:
    # {'person01_boxing_d1': [[1, 95], [96, 185], [186, 245], [246, 360]]}

In [None]:
import cv2
import numpy as np

label_map = {
    "walking": 0,
    "jogging": 1,
    "running": 2,
    "boxing": 3,
    "handwaving": 4,
    "handclapping": 5
}

def build_dataset(sample_names, max_frames):
  data_samples = []
  labels = []

  for sample_name in sample_names:

    if sample_name == 'person13_handclapping_d3':
      continue

    # open the video
    cap = cv2.VideoCapture(f'data/{sample_name}_uncomp.avi')

    # Check if the video opened successfully
    if not cap.isOpened():
        raise IOError(f"Cannot open video file: {sample_name}")

    # Read frames and store them
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)

    # release the video capture object
    cap.release()

    # get the list of frames for each subvideo
    subset_video_frames = samples[sample_name] # samples is a global object

    for subset_video in subset_video_frames:
      subset_video_list = frames[subset_video[0]-1:subset_video[1]-1]

      # uniform frame count
      if len(subset_video_list) >= max_frames:
          indices = np.linspace(0, len(subset_video_list) - 1, num=max_frames, dtype=int) # sample frames
          capped_subset_video_list = [subset_video_list[i] for i in indices]
      else:
          # Pad with black frames if too short
          pad_count = max_frames - len(subset_video_list)
          capped_subset_video_list = subset_video_list + ([np.zeros((120, 160, 3), dtype=np.uint8)] * pad_count)

      # convert to np array
      subset_video_array = np.array(capped_subset_video_list)

      # add the data
      data_samples.append(subset_video_array)

      # add the label
      labels.append(label_map[sample_name.split('_')[1]])

  return data_samples, labels

In [None]:
import os

# Use MAX_FRAMES from configuration cell
save_dir = os.path.join(DATA_DIR, 'KTHActions', str(MAX_FRAMES))
os.makedirs(save_dir, exist_ok=True)

In [None]:
training_data, training_labels = build_dataset(train_samples_names, MAX_FRAMES)
print(len(training_data))
training_data = np.array(training_data)
training_labels = np.array(training_labels)
np.save(os.path.join(save_dir, f'kthactions_training_{MAX_FRAMES}.npy'), training_data)
np.save(os.path.join(save_dir, f'kthactions_training_labels_{MAX_FRAMES}.npy'), training_labels)

760


In [None]:
val_data, val_labels = build_dataset(val_samples_names, MAX_FRAMES)
print(len(val_data))
val_data = np.array(val_data)
val_labels = np.array(val_labels)
np.save(os.path.join(save_dir, f'kthactions_val_{MAX_FRAMES}.npy'), val_data)
np.save(os.path.join(save_dir, f'kthactions_val_labels_{MAX_FRAMES}.npy'), val_labels)

768


In [None]:
test_data, test_labels = build_dataset(test_samples_names, MAX_FRAMES)
print(len(test_data))
test_data = np.array(test_data)
test_labels = np.array(test_labels)
np.save(os.path.join(save_dir, f'kthactions_test_{MAX_FRAMES}.npy'), test_data)
np.save(os.path.join(save_dir, f'kthactions_test_labels_{MAX_FRAMES}.npy'), test_labels)

863


# Normalized

In [None]:
# training
normalized_training_data = training_data / 255
np.save(os.path.join(save_dir, f'kthactions_normalized_training_{MAX_FRAMES}.npy'), normalized_training_data)

# validation
normalized_val_data = val_data / 255
np.save(os.path.join(save_dir, f'kthactions_normalized_val_{MAX_FRAMES}.npy'), normalized_val_data)

# testing
normalized_test_data = test_data / 255
np.save(os.path.join(save_dir, f'kthactions_normalized_test_{MAX_FRAMES}.npy'), normalized_test_data)