In [2]:
import pandas as pd
import numpy as np
import os
import cv2
import natsort
import matplotlib.pyplot as plt
from tqdm import tqdm
import glob
import zipfile
import csv

# data 1 : URFD
- step 1: unzip files and save frames
- step 2: read CSV where frame-level labels are given
- step 3: segment frames based on frame-level labels
- step 4: pack each directory containing segmented .jpg files as .avi file(to follow UCF data pipeline)


In [170]:
# step 1 : unzip files and save frames

raw_folder = "/data/FallDownData/URFD_new/raw/"
data_folder = "/data/FallDownData/URFD_new/frames_not_segmented/"

adl_folder = "adl/"
fall_folder = "fall/"

# Path to save the frames
output_path = "/data/FallDownData/URFD_new/frames/"

if not os.path.exists(data_folder):
    os.makedirs(data_folder + fall_folder)
    os.makedirs(data_folder + adl_folder)

adl_zipped_files = glob.glob(raw_folder + 'adl-*-cam0-rgb.zip')
fall_zipped_files = glob.glob(raw_folder + 'fall-*-cam0-rgb.zip')

content = [
    [adl_zipped_files, data_folder + adl_folder],
    [fall_zipped_files, data_folder + fall_folder]
]
for zipped_files, dst_folder in content:
    for zipped_file in zipped_files:
        zfile = zipfile.ZipFile(zipped_file)
        zfile.extractall(dst_folder)


# step 2 : read CSV where frame-level labels are given

falls_labels = "/data/FallDownData/URFD/urfall-cam0-falls.csv"
notfalls_labels = "/data/FallDownData/URFD/urfall-cam0-adls.csv"

labels = {'falls': dict(), 'notfalls': dict()}

# For falls videos: read the CSV where frame-level labels are given
with open(falls_labels, 'r') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
    event_type = 'falls'
    for row in spamreader:
        elems = row[0].split(',')  # read a line in the csv
        if not elems[0] in labels[event_type]:
            labels[event_type][elems[0]] = []
        if int(elems[2]) == 1 or int(elems[2]) == -1:
            labels[event_type][elems[0]].append(0)
        elif int(elems[2]) == 0:
            labels[event_type][elems[0]].append(1)

# For ADL videos: read the CSV where frame-level labels are given
with open(notfalls_labels, 'r') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
    event_type = 'notfalls'
    for row in spamreader:
        elems = row[0].split(',')  # read a line in the csv
        if not elems[0] in labels[event_type]:
            labels[event_type][elems[0]] = []
        if int(elems[2]) == 1 or int(elems[2]) == -1:
            labels[event_type][elems[0]].append(0)
        elif int(elems[2]) == 0:
            labels[event_type][elems[0]].append(1)

# step 3 : segment frames based on frame-level labels

# Get all folders: each one contains the set of images of the video
folders = [f for f in os.listdir(data_folder)
           if os.path.isdir(os.path.join(data_folder, f))]

for folder in folders:
    print('{} videos =============='.format(folder))
    events = [f for f in os.listdir(data_folder + folder)
              if os.path.isdir(os.path.join(data_folder + folder, f))]
    events.sort()
    for nb_event, event, in enumerate(events):
        # Create the appropriate folder
        if folder == 'adl':
            event_id = event[:6]
            new_folder = output_path + 'adl/{}'.format(event)
            if not os.path.exists(new_folder):
                os.makedirs(new_folder)
        elif folder == 'fall':
            event_id = event[:7]
            new_folder = output_path + 'fall/{}'.format(event)
            if not os.path.exists(new_folder):
                os.makedirs(new_folder)

        path_to_images = data_folder + folder + '/' + event + '/'

        # Load all the images of the video
        images = [f for f in os.listdir(path_to_images)
                  if os.path.isfile(os.path.join(path_to_images, f))]
        images.sort()
        fall_detected = False  # whether a fall has been detected in the video
        frame_counts = {
            "post": 0,
            "pre": 0,
            "fall": 0
        }
        for nb_image, image in enumerate(images):
            x = cv2.imread(path_to_images + image)

            # If the image is part of an ADL video no fall need to be
            # considered
            if folder == 'adl':
                # Save the image
                save_path = (output_path +
                             'adl/{}'.format(event) +
                             '/thumb{:05}.jpg'.format(nb_image+1))
                cv2.imwrite(save_path, x)
            elif folder == 'fall':
                event_type = 'falls'
                if labels[event_type][event_id][nb_image] == 0:  # ADL
                    if fall_detected:
                        # Create another folder for an ADL event,
                        # i.e. the post-fall ADL event
                        new_folder = (output_path +
                                      'adl/{}_post'.format(event))
                        if not os.path.exists(new_folder):
                            os.makedirs(new_folder)
                        
                        frame_counts["post"] += 1

                        save_path = (output_path +
                                     'adl/{}_post'.format(event) +
                                     '/thumb{:05}.jpg'.format(frame_counts["post"]))
                    else:
                        new_folder = (output_path +
                                      'adl/{}_pre'.format(event))
                        if not os.path.exists(new_folder):
                            os.makedirs(new_folder)
                        
                        frame_counts["pre"] += 1
                        
                        save_path = (output_path +
                                     'adl/{}_pre'.format(event) +
                                     '/thumb{:05}.jpg'.format(frame_counts["pre"]))
                    cv2.imwrite(save_path, x)

                elif labels[event_type][event_id][nb_image] == 1:  # actual fall
                    new_folder = (output_path +
                                      'fall/{}'.format(event))
                    if not os.path.exists(new_folder):
                        os.makedirs(new_folder)

                    frame_counts["fall"] += 1

                    save_path = (output_path +
                                 'fall/{}'.format(event) +
                                 '/thumb{:05}.jpg'.format(frame_counts["fall"]))
                    cv2.imwrite(save_path, x)
                    # If fall is detected in a video set the variable to True
                    # used to discern between pre- and post-fall ADL events
                    fall_detected = True


# step 4 : pack each directory containing segmented .jpg files as .avi file (to follow UCF data pipeline)

root = '/data/FallDownData/URFD_new/frames'
video_files = natsort.natsorted(glob.glob(root + '/*/*'))
for video in tqdm(video_files):
    prefix, ext = os.path.splitext(video)
    save_path = prefix.replace('frames', 'video') + '.avi'

    frame_list = natsort.natsorted(glob.glob(video+'/*.jpg'))
    first_frame = cv2.imread(frame_list[0])
    out = cv2.VideoWriter(save_path,
                          cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
                          30, (first_frame.shape[:-1][::-1]))
    for img_file in frame_list:
        img_arr = cv2.imread(img_file)
        out.write(img_arr)

    os.system('mkdir -p {}'.format(os.path.dirname(save_path)))
    out.release()
    
print("Done!")



HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=130.0), HTML(value='')))


Done!


# data 2 : Multicam FDD
- this dataset is bullshit!
- syncronization is not matched well

In [172]:
import json
from tqdm.notebook import trange, tqdm

fall_annotation_file = "/data/FallDownData/MulticamFD/Multicam_Annotations.csv"
delays_file = "/data/FallDownData/MulticamFD_new/delays_multicam.json"
data_folder = "/data/FallDownData/MulticamFD_new/raw/"
output_folder = "/data/FallDownData/MulticamFD_new/frames/"

num_cameras = 8
num_scenraios = 24

# read annotation files(falling time intervals & camera delay for sync)
with open(fall_annotation_file, "r") as f:
    annotations = pd.read_csv(fall_annotation_file)
with open(delays_file, "r") as f:
    delays = json.load(f)
        
for s in trange(1, num_scenraios+1, desc="scenario loop"):
    # get all videos
    videos = glob.glob(data_folder+"chute{:02d}/*".format(s))
    videos.sort()
    
    cur_anno = annotations[annotations.id==s].values
        
    for cam, video in tqdm(list(enumerate(videos, 1)), desc="camera loop", leave=False):
        cap = cv2.VideoCapture(video)
        length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        # Delay of this camara for this scenario
        delay = delays["camera{}".format(cam)][str(s)]
        delay_pos = 0
        for seq_pos in range(len(cur_anno)):
            *_, start, end, code = cur_anno[seq_pos]
            
            # apply delay
            start += delay if not isinstance(delay, list) else delay[min(delay_pos, len(delay)-1)]
            end += delay if not isinstance(delay, list) else delay[min(delay_pos, len(delay)-1)]
            
            # cursor which starts from delayed "start"
            pos = start
            
            # move cursor to start
            cap.set(cv2.CAP_PROP_POS_FRAMES, start)
            
            # gt-label
            label = 'fall' if code == 2 else 'adl'
            
            if label == 'fall':
                # increase delay_pos
                delay_pos += 1
                
            while pos < end:
                ret, frame = cap.read()
                pos += 1
                output_path = (
                    output_folder +
                    '{}/chute{:02}-cam{}-s{:02}/'.format(
                    label, s, cam, seq_pos+1
                ))
                if not os.path.exists(output_path):
                    os.makedirs(output_path, exist_ok=True)
                cv2.imwrite(output_path + 'thumb{:05d}.jpg'.format(pos-start), frame)
                

# pack each directory containing segmented .jpg files as .avi file (to follow UCF data pipeline)
root = '/data/FallDownData/MulticamFD_new/frames'
video_files = natsort.natsorted(glob.glob(root + '/*/*'))
for video in tqdm(video_files, desc="packing frames as a video..."):
    prefix, ext = os.path.splitext(video)
    save_path = prefix.replace('frames', 'video') + '.avi'

    frame_list = natsort.natsorted(glob.glob(video+'/*.jpg'))
    first_frame = cv2.imread(frame_list[0])
    out = cv2.VideoWriter(save_path,
                          cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
                          120, (first_frame.shape[:-1][::-1]))
    for img_file in frame_list:
        img_arr = cv2.imread(img_file)
        out.write(img_arr)

    os.system('mkdir -p {}'.format(os.path.dirname(save_path)))
    out.release()
    
print("Done!")

HBox(children=(HTML(value='packing frames as a video...'), FloatProgress(value=0.0, max=1304.0), HTML(value=''…


Done!


In [193]:
ix2label = {1: "Walking, standing up",
2 : "Falling",
3 : "Lying on the ground",
4 : "Crounching",
5 : "Moving down",
6 : "Moving up",
7 : "Sitting",
8 : "Lying on a sofa",
9 : "Moving horizontaly"}


from collections import Counter
from pprint import pprint

foo = Counter(annotations.code.map(ix2label))

print("-"*40)
print("Statistics of detailed action sequences!")
print("-"*40)

pprint(foo)

n_falls = foo.pop("Falling")
n_normals = sum(foo.values())

print()
print("### number of normal sequences: ", n_normals, "// number of falling sequences: ", n_falls)


----------------------------------------
Statistics of detailed action sequences!
----------------------------------------
Counter({'Walking, standing up': 36,
         'Moving up': 29,
         'Moving down': 26,
         'Falling': 25,
         'Lying on the ground': 23,
         'Crounching': 11,
         'Sitting': 9,
         'Lying on a sofa': 4})

### number of normal sequences:  138 // number of falling sequences:  25


# Create Traintests-plit for cross-validation

In [90]:
from sklearn.model_selection import StratifiedKFold, LeaveOneOut
import itertools

In [218]:
def create_traintestList(root, annotation_path, random_state=0, n_splits=5):
    dataset_name = os.path.basename(os.path.dirname(root.rstrip('/')))
    
    os.system(f'mkdir -p {annotation_path}')

    video_dirs = natsort.natsorted(glob.glob(root + '/*/*'))

    np.random.seed(random_state)
    np.random.shuffle(video_dirs)

    class2idx = {'adl':0, 'fall':1}

    formated_video_dirs = np.array([ x[len(root.rstrip('/'))+1:] for x in video_dirs ]).tolist()
    
    kf = StratifiedKFold(n_splits=n_splits)

    training_data = []
    leaveOut_data = []
    
    if dataset_name == 'URFD_new':
        for j in range(len(formated_video_dirs)):
            vid = formated_video_dirs[j]
            if "cam0" in vid:
                training_data.append(vid)
    elif dataset_name == 'MulticamFD_new':
        for j in range(len(formated_video_dirs)):
            vid = formated_video_dirs[j]
            if "chute23" in vid or "chute24" in vid:
                # do not consider mixed sequences
                continue
            training_data.append(vid)
            
            #if "chute23" in vid or "chute24" in vid:
            #    leaveOut_data.append(vid) # confounding samples are excluded for training data
            #else:
            #    training_data.append(vid)
    
    for k, (train_ix, test_ix) in enumerate(kf.split(training_data, [os.path.dirname(x) for x in training_data])):
        print()
        _train, _test = np.array(training_data)[train_ix], np.array(training_data)[test_ix]
        
        # include leaveOut_data into test data
        if len(leaveOut_data) > 0:
            _test = np.append(_test, leaveOut_data)
        
        _train_lab = [ os.path.dirname(x) for x in _train ]
        _test_lab = [ os.path.dirname(x) for x in _test ]
        print()
        print(f'[splist-{k}] train : {np.unique(_train_lab, return_counts=True)}, test : {np.unique(_test_lab, return_counts=True)}')
        for _split, _data in zip(['train', 'test'], [_train, _test]):
            lines = []
            for i in range(len(_data)):
                line = os.path.splitext(_data[i])[0] + " "
                line += str(class2idx[os.path.dirname(_data[i])])

                lines.append(line + '\n')

            with open(os.path.join(annotation_path, f"{_split}list{k+1:02d}.txt"), 'w') as fp:
                fp.writelines(lines)

In [195]:
# URFD
create_traintestList(root = '/data/FallDownData/URFD_new/video',
                     annotation_path = '/data/FallDownData/URFD_new/TrainTestlist',
                     n_splits=5)



[splist-0] train : (array(['adl', 'fall'], dtype='<U4'), array([80, 24])), test : (array(['adl', 'fall'], dtype='<U4'), array([20,  6]))


[splist-1] train : (array(['adl', 'fall'], dtype='<U4'), array([80, 24])), test : (array(['adl', 'fall'], dtype='<U4'), array([20,  6]))


[splist-2] train : (array(['adl', 'fall'], dtype='<U4'), array([80, 24])), test : (array(['adl', 'fall'], dtype='<U4'), array([20,  6]))


[splist-3] train : (array(['adl', 'fall'], dtype='<U4'), array([80, 24])), test : (array(['adl', 'fall'], dtype='<U4'), array([20,  6]))


[splist-4] train : (array(['adl', 'fall'], dtype='<U4'), array([80, 24])), test : (array(['adl', 'fall'], dtype='<U4'), array([20,  6]))


In [219]:
# MulticamFD
create_traintestList(root = '/data/FallDownData/MulticamFD_new/video',
                     annotation_path = '/data/FallDownData/MulticamFD_new/TrainTestlist')



[splist-0] train : (array(['adl', 'fall'], dtype='<U4'), array([582, 147])), test : (array(['adl', 'fall'], dtype='<U4'), array([146,  37]))


[splist-1] train : (array(['adl', 'fall'], dtype='<U4'), array([582, 147])), test : (array(['adl', 'fall'], dtype='<U4'), array([146,  37]))


[splist-2] train : (array(['adl', 'fall'], dtype='<U4'), array([582, 148])), test : (array(['adl', 'fall'], dtype='<U4'), array([146,  36]))


[splist-3] train : (array(['adl', 'fall'], dtype='<U4'), array([583, 147])), test : (array(['adl', 'fall'], dtype='<U4'), array([145,  37]))


[splist-4] train : (array(['adl', 'fall'], dtype='<U4'), array([583, 147])), test : (array(['adl', 'fall'], dtype='<U4'), array([145,  37]))
