# Prepare mitosis time series data

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from cellpose import models
from cellpose.io import imread
import glob
from pathlib import Path
from PIL import Image, ImageSequence
from tqdm import tqdm
import os
import os.path
# from livecell_tracker import segment
from livecell_tracker import core
from livecell_tracker.core import datasets
from livecell_tracker.core.datasets import LiveCellImageDataset, SingleImageDataset
from skimage import measure
from livecell_tracker.core import SingleCellTrajectory, SingleCellStatic

In [2]:
# sample_json_dir = Path("./EBSS_starvation_24h_xy16_annotation")

sample_json_dirs = [Path(r"./datasets/test_scs_EBSS_starvation/XY1/annotations"), Path(r"./datasets/test_scs_EBSS_starvation/XY16/annotations")]

def load_class2samples_from_json_dir(sample_json_dir: Path, class_subfolders = ["mitosis", "apoptosis", "normal"]) -> dict:
    # sample_paths = glob.glob(str(sample_json_dir / "*.json"))
    class2samples = {}
    for subfolder in class_subfolders:
        class2samples[subfolder] = []
        sample_paths = glob.glob(str(sample_json_dir / subfolder / "*.json"))
        for sample_path in sample_paths:
            sample = SingleCellStatic.load_single_cells_json(sample_path)
            class2samples[subfolder].append(sample)
    return class2samples


all_class2samples = None
all_class2sample_extra_info = {}
for sample_json_dir in sample_json_dirs:
    _class2samples = load_class2samples_from_json_dir(sample_json_dir)
    print(_class2samples)
    for class_name in _class2samples:
        # report how many samples loaded from the sample json dir
        print(f"Loaded {len(_class2samples[class_name])} annotated samples from {sample_json_dir / class_name}")

    if all_class2samples is None:
        all_class2samples = _class2samples
    for class_name in _class2samples:

        all_class2samples[class_name] += _class2samples[class_name]
        _extra_info =  [{"src_dir": sample_json_dir} for _ in range(len(_class2samples[class_name]))]
        if class_name not in all_class2sample_extra_info:
            all_class2sample_extra_info[class_name] = _extra_info
        else:
            all_class2sample_extra_info[class_name] += _extra_info
           

{'mitosis': [[SingleCellStatic(id=171947ec-0dc3-4b2b-8f64-0cf544b2ec48, timeframe=57, bbox=[ 792. 1188.  899. 1285.]), SingleCellStatic(id=47615525-a322-4a43-aa22-f732f89377fc, timeframe=58, bbox=[ 786. 1192.  875. 1291.]), SingleCellStatic(id=f811c55f-f35a-4546-adf9-a6dd7967eb27, timeframe=59, bbox=[ 785. 1191.  867. 1285.]), SingleCellStatic(id=a46e3cb1-ff73-4c4b-8a03-a608f3ebb0d2, timeframe=61, bbox=[ 772. 1198.  851. 1281.]), SingleCellStatic(id=580159bb-1f74-4854-8886-25dba24b890f, timeframe=60, bbox=[ 777. 1196.  856. 1289.]), SingleCellStatic(id=8b736e91-6331-4052-8805-53cec59597f8, timeframe=62, bbox=[ 804. 1184.  867. 1263.]), SingleCellStatic(id=f9ff9e5b-7e4f-4596-a36e-60090e8c0467, timeframe=62, bbox=[ 771. 1222.  835. 1288.]), SingleCellStatic(id=89d2b1f2-9ffb-438d-8db8-6db44df5c759, timeframe=63, bbox=[ 776. 1223.  843. 1298.]), SingleCellStatic(id=701fcf1b-b7c9-4c9e-bc14-77654ba2b070, timeframe=63, bbox=[ 804. 1168.  877. 1253.]), SingleCellStatic(id=2521fbd2-271a-4a1e-92

In [3]:
all_class2sample_extra_info

{'mitosis': [{'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': Win

In [4]:
len(all_class2samples["mitosis"])

28

Automatically prepare normal samples

require tracking done

In [5]:
# get all scs from class_samples not in normal class
exclude_scs = []
total_non_normal_samples = 0
for class_name, samples in all_class2samples.items():
    if class_name != "normal":
        for sample in samples:
            exclude_scs.extend(sample)
            total_non_normal_samples += 1

exclude_scs = set(exclude_scs)
exclude_scs_ids = {str(sc.id) for sc in exclude_scs}

In [6]:
# from livecell_tracker.core.sct_operator import create_scs_edit_viewer
# sct_operator = create_scs_edit_viewer(exclude_scs, img_dataset = list(exclude_scs)[0].img_dataset)

load all scs

In [7]:
import json
from livecell_tracker.core.single_cell import SingleCellTrajectoryCollection
from livecell_tracker.track.sort_tracker_utils import (
    track_SORT_bbox_from_scs
)

all_scs_json_path = ["./datasets/test_scs_EBSS_starvation/XY1/single_cells.json", "./datasets/test_scs_EBSS_starvation/XY16/single_cells.json"]
# all_scs_json_path = "./datasets/test_scs_EBSS_starvation/XY16/tmp_corrected_scs.json"
sctc = SingleCellTrajectoryCollection()
for json_path in all_scs_json_path:
    _scs = SingleCellStatic.load_single_cells_json(json_path)
    tmp_sctc = track_SORT_bbox_from_scs(_scs, raw_imgs=_scs[0].img_dataset, min_hits=3, max_age=3)
    tids = set(sctc.get_all_tids())
    if len(tids) != 0:
        max_tid = max(tids)
    else:
        max_tid = 0
    for tid, traj in tmp_sctc:
        traj.meta["src_dir"] = json_path
        traj.track_id = tid + max_tid + 1
        sctc.add_trajectory(traj)
        traj_scs = traj.get_all_scs()
        for sc in traj_scs:
            sc.meta["src_dir"] = json_path
    del tmp_sctc

all_scs = SingleCellStatic.load_single_cells_jsons(all_scs_json_path)

In [8]:

# with open("./EBSS_starvation_24h_xy16_annotation/single_cell_trajectory_collection.json", "r") as file:
#     json_dict = json.load(file)
# sctc = SingleCellTrajectoryCollection().load_from_json_dict(json_dict)


In [9]:
# set numpy seed
seed = 0
np.random.seed(seed)

objective_sample_num = total_non_normal_samples * 10

normal_frame_len_range = (3, 10)
counter = 0
normal_samples = []
normal_samples_extra_info = []
max_trial_counter = 100000
while counter < objective_sample_num and max_trial_counter > 0:
    # randomly select a sct from sctc
    # generate a list of scs
    track_id = np.random.choice(list(sctc.track_id_to_trajectory.keys()))  
    sct = sctc.get_trajectory(track_id)
    # randomly select a length
    frame_len = np.random.randint(*normal_frame_len_range)
    # generate a sample
    times = list(sct.timeframe_to_single_cell.keys())
    times = sorted(times)
    if len(times) <= frame_len:
        continue
    start_idx = np.random.randint(0, len(times) - frame_len)
    start_time = times[start_idx]
    end_time = times[start_idx + frame_len - 1]

    sub_sct = sct.subsct(start_time, end_time)

    is_some_sc_in_exclude_scs = False
    for time, sc in sub_sct.timeframe_to_single_cell.items():
        # print("sc.id:", sc.id, type(sc.id))
        if str(sc.id) in exclude_scs_ids:
            is_some_sc_in_exclude_scs = True
            break
    if is_some_sc_in_exclude_scs:
        print("some sc in the exclude scs list")
        continue
    
    new_sample = []
    for time, sc in sub_sct.timeframe_to_single_cell.items():
        new_sample.append(sc)
    normal_samples.append(new_sample)
    normal_samples_extra_info.append({"src_dir": sub_sct.get_all_scs()[0].meta["src_dir"]})
    counter += 1
    max_trial_counter -= 1

normal_samples[:2]

some sc in the exclude scs list
some sc in the exclude scs list
some sc in the exclude scs list
some sc in the exclude scs list
some sc in the exclude scs list
some sc in the exclude scs list
some sc in the exclude scs list
some sc in the exclude scs list


[[SingleCellStatic(id=056b6e7b-d2cb-4501-b3d2-924e9d149e74, timeframe=73, bbox=[417 387 601 585]),
  SingleCellStatic(id=c6f083e2-fff1-47d9-a2b0-4feff712626b, timeframe=74, bbox=[421 392 603 580]),
  SingleCellStatic(id=3b10c5f7-966a-4282-a661-78da785fff0e, timeframe=75, bbox=[416 391 595 579]),
  SingleCellStatic(id=0816119c-6bdc-49cb-b857-0d43cbb03e61, timeframe=76, bbox=[420 390 600 586]),
  SingleCellStatic(id=2e83a2af-01bc-4453-9f45-49a3c0e1d975, timeframe=77, bbox=[437 402 601 576]),
  SingleCellStatic(id=34b6863b-baf4-4197-9554-f701711c59db, timeframe=78, bbox=[438 399 595 568]),
  SingleCellStatic(id=6ba2fc6f-9c02-4c7b-beda-3fa7a0a1e84a, timeframe=79, bbox=[444 406 595 581]),
  SingleCellStatic(id=97536b78-d230-4c98-9d79-ccac89189aed, timeframe=80, bbox=[443 406 595 576])],
 [SingleCellStatic(id=5e9559bc-f211-4481-b005-fb4ff850ed40, timeframe=173, bbox=[1302 1134 1403 1229]),
  SingleCellStatic(id=174a1612-6faa-4700-a02f-7ae1a3eecb50, timeframe=174, bbox=[1228 1107 1404 1233]),

In [10]:
all_class2samples["normal"].extend(normal_samples)
all_class2sample_extra_info["normal"].extend(normal_samples_extra_info)

In [11]:
len(all_class2samples["normal"]), len(all_class2sample_extra_info["normal"])

(280, 280)

## Prepare videos and annotations for MMDetection

In [12]:
classes = all_class2samples.keys()
classes

dict_keys(['mitosis', 'apoptosis', 'normal'])

In [13]:
from livecell_tracker.core.utils import gray_img_to_rgb, rgb_img_to_gray
from livecell_tracker.preprocess.utils import normalize_img_to_uint8

In [14]:
from livecell_tracker.track.classify_utils import video_frames_and_masks_from_sample, combine_video_frames_and_masks

In [15]:
from typing import List
import cv2
import numpy as np
import pandas as pd

from livecell_tracker.core.sc_video_utils import gen_mp4_from_frames, gen_samples_df, gen_samples_mp4s

ver = "10-st" # single trajectory ver
MAKE_SINGLE_CELL_TRAJ_SAMPLES = True
DROP_MITOSIS_DIV = False

# ver = "10-drop-div"
# DROP_MITOSIS_DIV = True
# ver = "-test"


data_dir = Path(f'notebook_results/mmaction_train_data_v{ver}')
class_labels = ['mitosis', 'apoptosis', 'normal']
class_label = "mitosis"
frame_types = ["video", "mask", "combined"]
fps = 3

padding_pixels = [0, 20, 40, 50, 100, 200, 400]



# split train and test data

# get #samples from all_class2samples
_split = 0.8

train_class2samples = {}
test_class2samples = {}
train_class2sample_extra_info = {}
test_class2sample_extra_info = {}

# randomize train and test data


for key in all_class2samples.keys():
    randomized_indices = np.random.permutation(len(all_class2samples[key])).astype(int)
    split_idx = int(len(all_class2samples[key]) * _split)
    _train_indices = randomized_indices[:split_idx]
    _test_indices = randomized_indices[split_idx:]
    train_class2samples[key] = np.array(all_class2samples[key], dtype=object)[_train_indices]
    test_class2samples[key] = np.array(all_class2samples[key], dtype=object)[_test_indices]

    train_class2sample_extra_info[key] = np.array(all_class2sample_extra_info[key], dtype=object)[_train_indices]
    test_class2sample_extra_info[key] = np.array(all_class2sample_extra_info[key], dtype=object)[_test_indices]



In [16]:
len(train_class2samples["normal"]), len(test_class2samples["normal"])

(224, 56)

In [17]:
len(train_class2samples["mitosis"]), len(test_class2samples["mitosis"])

(22, 6)

In [18]:
video_frames_and_masks_from_sample(train_class2samples["normal"][6])[0][0].shape
# train_class2samples["normal"][6][1].show_panel()

(95, 145, 3)

In [19]:
import importlib
import livecell_tracker
importlib.reload(livecell_tracker.track.classify_utils)

<module 'livecell_tracker.track.classify_utils' from 'D:\\LiveCellTracker-dev\\livecell_tracker\\track\\classify_utils.py'>

In [20]:
idx_to_check = 6
video_frames, video_frame_masks = video_frames_and_masks_from_sample(train_class2samples["normal"][idx_to_check], padding_pixels=0)
print("video frames dtype:", video_frames[0].dtype)
print("video frames shape:", video_frames[0].shape)
print("video frame masks dtype:", video_frame_masks[0].dtype)
print("video frame masks shape:", video_frame_masks[0].shape)
combined_frames = livecell_tracker.track.classify_utils.combine_video_frames_and_masks(video_frames, video_frame_masks, edt_transform=True)
combined_frames = np.array(combined_frames).astype(np.uint8)
# combined_frames = np.maximum(combined_frames - 1, 0).astype(np.uint8)
print("combined_frames shape: ", combined_frames[0].shape)
gen_mp4_from_frames(combined_frames, "./test_video_output.mp4", fps=1)

video frames dtype: uint8
video frames shape: (95, 145, 3)
video frame masks dtype: uint8
video frame masks shape: (95, 145, 3)
combined_frames shape:  (95, 145, 3)


Visually check the generated frames' values

In [21]:
# channel = 2
# plt.imshow(combined_frames[0][..., channel])
# combined_frames[1][..., channel].max(), combined_frames[1][..., 0].shape

In [22]:
np.array(combined_frames).flatten().min()

0

Make single cell trajectories only (ONE cell per time frame)

In [23]:
from typing import Dict
from livecell_tracker.track.data_prep_utils import check_one_sc_at_time


def make_one_cell_per_timeframe_helper(sc_by_time, times, cur_idx) -> List[List[SingleCellStatic]]:
    if cur_idx == len(times):
        return [[]]
    cur_time = times[cur_idx]
    cur_scs = sc_by_time[cur_time]
    return [[sc] + scs for sc in cur_scs for scs in make_one_cell_per_timeframe_helper(sc_by_time, times, cur_idx + 1)]


def make_one_cell_per_timeframe_samples(sample: List[SingleCellStatic]) -> List[List[SingleCellStatic]]:
    """if there are two single cells at a time frame, recursively generate new samples with one single cell at a time frame"""
    sc_by_time = {}
    for sc in sample:
        if sc.timeframe not in sc_by_time:
            sc_by_time[sc.timeframe] = []
        sc_by_time[sc.timeframe].append(sc)
    return make_one_cell_per_timeframe_helper(sc_by_time, sorted(sc_by_time.keys()), 0)


def make_one_cell_per_timeframe_for_class2samples(class2samples: Dict, class2sample_extra_info=None, tar_keys: List[str] = ["mitosis"]) -> Dict:
    class2samples = class2samples.copy()
    if class2sample_extra_info is not None:
        class2sample_extra_info = class2sample_extra_info.copy()
    for key in tar_keys:
        tmp_samples = []
        tmp_sample_extra_info = []
        key_samples = class2samples[key]
        for sample_idx, sample in enumerate(key_samples):
            sct_samples = make_one_cell_per_timeframe_samples(sample)
            tmp_samples.extend(sct_samples)
            if class2sample_extra_info is not None:
                tmp_sample_extra_info.extend([class2sample_extra_info[key][sample_idx] for _ in range(len(sct_samples))])

            # check the length of sample is the same as the length of tmp_samples[-1]
            sample_times = set([sc.timeframe for sc in sample])
            tmp_sample_times = set([sc.timeframe for sc in tmp_samples[-1]])
            assert len(sample_times) == len(tmp_sample_times), f"sample times: {sample_times}, tmp sample times: {tmp_sample_times}"
        class2samples[key] = tmp_samples
        if class2sample_extra_info is not None:
            class2sample_extra_info[key] = tmp_sample_extra_info
        assert all([check_one_sc_at_time(sample) for sample in class2samples[key]]), "there is more than one sc at the same timepoint"
    return class2samples, class2sample_extra_info

sample = train_class2samples["mitosis"][0]

In [24]:
[sc.timeframe for sc in sample]

[239, 240, 241, 242, 243, 244, 245, 246, 247, 247, 248, 248, 249, 249]

In [25]:
len(make_one_cell_per_timeframe_samples(sample))

8

In [26]:
if MAKE_SINGLE_CELL_TRAJ_SAMPLES:
    train_class2samples, train_class2sample_extra_info = make_one_cell_per_timeframe_for_class2samples(train_class2samples, train_class2sample_extra_info)
    test_class2samples, test_class2sample_extra_info = make_one_cell_per_timeframe_for_class2samples(test_class2samples, test_class2sample_extra_info)

Drop the cell divison part for easier inference durign testing

In [27]:
from livecell_tracker.track.data_prep_utils import drop_multiple_cell_frames_in_samples

if DROP_MITOSIS_DIV:
    train_class2samples = drop_multiple_cell_frames_in_samples(train_class2samples)
    test_class2samples = drop_multiple_cell_frames_in_samples(test_class2samples)


In [28]:
for key, val in train_class2samples.items():
    assert len(val) == len(train_class2sample_extra_info[key]), f"key: {key}, len(val): {len(val)}, len(train_class2sample_extra_info[key]): {len(train_class2sample_extra_info[key])}"

In [29]:

# # for debug
# train_class2samples = {key: value[:5] for key, value in all_class2samples.items()}
# test_class2samples = {key: value[:5] for key, value in all_class2samples.items()}
# padding_pixels = [20]


train_sample_info_df = gen_samples_df(train_class2samples, train_class2sample_extra_info, data_dir, class_labels, padding_pixels, frame_types, fps, prefix="train")
test_sample_info_df = gen_samples_df(test_class2samples, test_class2sample_extra_info, data_dir, class_labels, padding_pixels, frame_types, fps, prefix="test")

train_sample_info_df.to_csv(data_dir/f'train_data.txt', index=False, header=False, sep=' ', )
test_sample_info_df.to_csv(data_dir/f'test_data.txt', index=False, header=False, sep=' ', )

for selected_frame_type in frame_types:
    train_df_path = data_dir/f'mmaction_train_data_{selected_frame_type}.txt'
    train_selected_frame_type_df = train_sample_info_df[train_sample_info_df["frame_type"] == selected_frame_type]
    train_selected_frame_type_df = train_selected_frame_type_df.reset_index(drop=True)
    train_selected_frame_type_df = train_selected_frame_type_df[["path", "label_index"]]
    train_selected_frame_type_df.to_csv(train_df_path, index=False, header=False, sep=' ')
    
    test_df_path = data_dir/f'mmaction_test_data_{selected_frame_type}.txt'
    test_selected_frame_type_df = test_sample_info_df[test_sample_info_df["frame_type"] == selected_frame_type]
    test_selected_frame_type_df = test_selected_frame_type_df[["path", "label_index"]]
    test_selected_frame_type_df = test_selected_frame_type_df.reset_index(drop=True)
    test_selected_frame_type_df.to_csv(test_df_path, index=False, header=False, sep=' ')


# # the follwing code generates v1-v7 test data. The issue is that some of test data shows up in train data, through different padding values.
# data_df_path = data_dir/'all_data.txt'
# sample_df = gen_samples_df(data_dir, class_labels, padding_pixels, frame_types, fps)
# sample_df.to_csv(data_df_path, index=False, header=False, sep=' ')
# for selected_frame_type in frame_types:
#     selected_frame_type_df = sample_df[sample_df["frame_type"] == selected_frame_type]
#     selected_frame_type_df = selected_frame_type_df.reset_index(drop=True)
#     train_df_path = data_dir/f'train_data_{selected_frame_type}.txt'
#     test_df_path = data_dir/f'test_data_{selected_frame_type}.txt'
#     train_df = selected_frame_type_df.sample(frac=0.8, random_state=0, replace=False)
#     test_df = selected_frame_type_df.drop(train_df.index, inplace=False)

#     # only keep the path and label_index columns
#     train_df = train_df[["path", "label_index"]]
#     test_df = test_df[["path", "label_index"]]

#     train_df.to_csv(train_df_path, index=False, header=False, sep=' ')
#     test_df.to_csv(test_df_path, index=False, header=False, sep=' ')


100%|██████████| 148/148 [01:49<00:00,  1.35it/s]
100%|██████████| 148/148 [00:33<00:00,  4.35it/s]
100%|██████████| 148/148 [00:36<00:00,  4.06it/s]
100%|██████████| 148/148 [00:37<00:00,  3.90it/s]
100%|██████████| 148/148 [00:48<00:00,  3.05it/s]
100%|██████████| 148/148 [01:25<00:00,  1.74it/s]
100%|██████████| 148/148 [03:27<00:00,  1.40s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
100%|██████████| 224/224 [01:08<00:00,  3.27it/s]
100%|██████████| 224/224 [00:32<00:00,  6.79it/s]
100%|██████████| 224/224 [00:37<00:00,  5.98it/s]
100%|██████████| 224/224 [00:47<00:00,  4.72it/s]
100%|██████████| 224/224 [01:00<00:00,  3.70it/s]
100%|██████████| 224/224 [01:28<00:00,  2.54it/s]
100%|██████████| 224/224 [03:08<00:00,  1.19it/s]
100%|██████████| 38/38 [00:35<00:00,  1.06it/s]
100%|██████████| 38/38 [00:21<00:00,  1.81it/s]
100%|██████████| 38/38 [00:23<00:00,  1.65it/s]
100%|██████████| 38/38 

In [30]:
train_class2samples

{'mitosis': [[SingleCellStatic(id=5fc754b1-e519-43d7-8f8b-abefb8f15036, timeframe=239, bbox=[ 913. 1252.  999. 1358.]),
   SingleCellStatic(id=cd730a55-80e9-469b-93dc-ad6cb4711869, timeframe=240, bbox=[ 925. 1255.  998. 1340.]),
   SingleCellStatic(id=8ae495ec-d26b-4b7a-a9c5-cb3ea9bbdbc9, timeframe=241, bbox=[ 899. 1263.  983. 1348.]),
   SingleCellStatic(id=40e892e6-f592-480b-87cf-045e6c9c72bc, timeframe=242, bbox=[ 888. 1252.  967. 1322.]),
   SingleCellStatic(id=8038db47-52ac-4949-918a-e0875f683003, timeframe=243, bbox=[ 870. 1243.  943. 1316.]),
   SingleCellStatic(id=43d552dc-e884-493e-9795-5029953937e7, timeframe=244, bbox=[ 860. 1239.  924. 1312.]),
   SingleCellStatic(id=2ab70707-d572-49f8-88c7-f63cb5de1788, timeframe=245, bbox=[ 858. 1241.  918. 1309.]),
   SingleCellStatic(id=98ae0b9b-6a9a-48bf-bc71-05ad8c047b4d, timeframe=246, bbox=[ 863. 1239.  948. 1313.]),
   SingleCellStatic(id=c9ef0c37-52cf-4932-84a4-3c04fd361962, timeframe=247, bbox=[ 913. 1262.  965. 1320.]),
   Singl

Check the videos

In [31]:

video_paths = list(Path(data_dir/'videos').glob('*.mp4'))

Due to a `decord` package [issue](https://github.com/dmlc/decord/issues/150), to use mmaction2 we must check if the videos can be loaded by `decord` correctly.

In [32]:
import decord
for path in video_paths:
# for path in ["./notebook_results/train_normal_6_raw_padding-0.mp4"]:
# for path in ["./test_video_output.mp4"]:
    reader = decord.VideoReader(str(path))
    reader.seek(0)
    imgs = list()
    frame_inds = range(0, len(reader))
    for idx in frame_inds:
        reader.seek(idx)
        frame = reader.next()
        imgs.append(frame.asnumpy())
        frame = frame.asnumpy()

        num_channels = frame.shape[-1]
        if num_channels != 3:
            print("invalid video for decord (https://github.com/dmlc/decord/issues/150): ", path)
            break
        # fig, axes = plt.subplots(1, num_channels, figsize=(20, 10))
        # for i in range(num_channels):
        #     axes[i].imshow(frame[:, :, i])
        # plt.show()
    del reader

invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v10-st\videos\test_normal_33_combined_padding-0.mp4
invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v10-st\videos\test_normal_33_mask_padding-0.mp4
invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v10-st\videos\test_normal_33_raw_padding-0.mp4
invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v10-st\videos\train_normal_204_combined_padding-0.mp4
invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v10-st\videos\train_normal_204_mask_padding-0.mp4
invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v10-st\videos\train_normal_204_raw_padding-0.mp4


In [33]:
decord.__version__

'0.6.0'

check if videos can be loaded by cv2 correctly

In [34]:
import cv2

cap = cv2.VideoCapture("./test_video_output.mp4")

while True:
    ret, frame = cap.read()
    if not ret:
        break
    assert frame.shape[-1] == 3, "frame should be in RGB format"

cap.release()
cv2.destroyAllWindows()

In [35]:
# from sklearn.model_selection import train_test_split

# train_df_path = data_dir/'train_data.csv'
# test_df_path = data_dir/'test_data.csv'

# # split train and test from df
# train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
# train_df.to_csv(train_df_path, index=False, header=False, sep=' ')
# test_df.to_csv(test_df_path, index=False, header=False, sep=' ')
