# Prepare mitosis time series data

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from cellpose import models
from cellpose.io import imread
import glob
from pathlib import Path
from PIL import Image, ImageSequence
from tqdm import tqdm
import os
import os.path
# from livecell_tracker import segment
from livecell_tracker import core
from livecell_tracker.core import datasets
from livecell_tracker.core.datasets import LiveCellImageDataset, SingleImageDataset
from skimage import measure
from livecell_tracker.core import SingleCellTrajectory, SingleCellStatic

In [2]:
# sample_json_dir = Path("./EBSS_starvation_24h_xy16_annotation")

sample_json_dirs = [Path(r"./datasets/test_scs_EBSS_starvation/XY1/annotations"), Path(r"./datasets/test_scs_EBSS_starvation/XY16/annotations")]

def load_class2samples_from_json_dir(sample_json_dir: Path, class_subfolders = ["mitosis", "apoptosis", "normal"]) -> dict:
    # sample_paths = glob.glob(str(sample_json_dir / "*.json"))
    class2samples = {}
    for subfolder in class_subfolders:
        class2samples[subfolder] = []
        sample_paths = glob.glob(str(sample_json_dir / subfolder / "*.json"))
        for sample_path in sample_paths:
            sample = SingleCellStatic.load_single_cells_json(sample_path)
            class2samples[subfolder].append(sample)
    return class2samples


all_class2samples = None
all_class2sample_extra_info = {}
for sample_json_dir in sample_json_dirs:
    _class2samples = load_class2samples_from_json_dir(sample_json_dir)
    print(_class2samples)
    for class_name in _class2samples:
        # report how many samples loaded from the sample json dir
        print(f"Loaded {len(_class2samples[class_name])} annotated samples from {sample_json_dir / class_name}")

    if all_class2samples is None:
        all_class2samples = _class2samples
    for class_name in _class2samples:

        all_class2samples[class_name] += _class2samples[class_name]
        _extra_info =  [{"src_dir": sample_json_dir} for _ in range(len(_class2samples[class_name]))]
        if class_name not in all_class2sample_extra_info:
            all_class2sample_extra_info[class_name] = _extra_info
        else:
            all_class2sample_extra_info[class_name] += _extra_info
           

{'mitosis': [[SingleCellStatic(id=171947ec-0dc3-4b2b-8f64-0cf544b2ec48, timeframe=57, bbox=[ 792. 1188.  899. 1285.]), SingleCellStatic(id=47615525-a322-4a43-aa22-f732f89377fc, timeframe=58, bbox=[ 786. 1192.  875. 1291.]), SingleCellStatic(id=f811c55f-f35a-4546-adf9-a6dd7967eb27, timeframe=59, bbox=[ 785. 1191.  867. 1285.]), SingleCellStatic(id=a46e3cb1-ff73-4c4b-8a03-a608f3ebb0d2, timeframe=61, bbox=[ 772. 1198.  851. 1281.]), SingleCellStatic(id=580159bb-1f74-4854-8886-25dba24b890f, timeframe=60, bbox=[ 777. 1196.  856. 1289.]), SingleCellStatic(id=8b736e91-6331-4052-8805-53cec59597f8, timeframe=62, bbox=[ 804. 1184.  867. 1263.]), SingleCellStatic(id=f9ff9e5b-7e4f-4596-a36e-60090e8c0467, timeframe=62, bbox=[ 771. 1222.  835. 1288.]), SingleCellStatic(id=89d2b1f2-9ffb-438d-8db8-6db44df5c759, timeframe=63, bbox=[ 776. 1223.  843. 1298.]), SingleCellStatic(id=701fcf1b-b7c9-4c9e-bc14-77654ba2b070, timeframe=63, bbox=[ 804. 1168.  877. 1253.]), SingleCellStatic(id=2521fbd2-271a-4a1e-92

In [3]:
all_class2sample_extra_info

{'mitosis': [{'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': WindowsPath('datasets/test_scs_EBSS_starvation/XY1/annotations')},
  {'src_dir': Win

In [4]:
len(all_class2samples["mitosis"])

28

Automatically prepare normal samples

require tracking done

In [5]:
# get all scs from class_samples not in normal class
exclude_scs = []
total_non_normal_samples = 0
for class_name, samples in all_class2samples.items():
    if class_name != "normal":
        for sample in samples:
            exclude_scs.extend(sample)
            total_non_normal_samples += 1

exclude_scs = set(exclude_scs)
exclude_scs_ids = {str(sc.id) for sc in exclude_scs}

In [6]:
# from livecell_tracker.core.sct_operator import create_scs_edit_viewer
# sct_operator = create_scs_edit_viewer(exclude_scs, img_dataset = list(exclude_scs)[0].img_dataset)

load all scs

In [7]:
import json
from livecell_tracker.core.single_cell import SingleCellTrajectoryCollection
from livecell_tracker.track.sort_tracker_utils import (
    track_SORT_bbox_from_scs
)

all_scs_json_path = ["./datasets/test_scs_EBSS_starvation/XY1/single_cells.json", "./datasets/test_scs_EBSS_starvation/XY16/single_cells.json"]
# all_scs_json_path = "./datasets/test_scs_EBSS_starvation/XY16/tmp_corrected_scs.json"
sctc = SingleCellTrajectoryCollection()
for json_path in all_scs_json_path:
    _scs = SingleCellStatic.load_single_cells_json(json_path)
    tmp_sctc = track_SORT_bbox_from_scs(_scs, raw_imgs=_scs[0].img_dataset, min_hits=3, max_age=3)
    tids = set(sctc.get_all_tids())
    if len(tids) != 0:
        max_tid = max(tids)
    else:
        max_tid = 0
    for tid, traj in tmp_sctc:
        traj.meta["src_dir"] = json_path
        traj.track_id = tid + max_tid + 1
        sctc.add_trajectory(traj)
        traj_scs = traj.get_all_scs()
        for sc in traj_scs:
            sc.meta["src_dir"] = json_path
    del tmp_sctc

all_scs = SingleCellStatic.load_single_cells_jsons(all_scs_json_path)

In [8]:

# with open("./EBSS_starvation_24h_xy16_annotation/single_cell_trajectory_collection.json", "r") as file:
#     json_dict = json.load(file)
# sctc = SingleCellTrajectoryCollection().load_from_json_dict(json_dict)


In [11]:
# set numpy seed
seed = 0
np.random.seed(seed)

objective_sample_num = total_non_normal_samples * 10

normal_frame_len_range = (3, 10)
counter = 0
normal_samples = []
normal_samples_extra_info = []
max_trial_counter = 100000
while counter < objective_sample_num and max_trial_counter > 0:
    # randomly select a sct from sctc
    # generate a list of scs
    track_id = np.random.choice(list(sctc.track_id_to_trajectory.keys()))  
    sct = sctc.get_trajectory(track_id)
    # randomly select a length
    frame_len = np.random.randint(*normal_frame_len_range)
    # generate a sample
    times = list(sct.timeframe_to_single_cell.keys())
    times = sorted(times)
    if len(times) <= frame_len:
        continue
    start_idx = np.random.randint(0, len(times) - frame_len)
    start_time = times[start_idx]
    end_time = times[start_idx + frame_len - 1]

    sub_sct = sct.subsct(start_time, end_time)

    is_some_sc_in_exclude_scs = False
    for time, sc in sub_sct.timeframe_to_single_cell.items():
        # print("sc.id:", sc.id, type(sc.id))
        if str(sc.id) in exclude_scs_ids:
            is_some_sc_in_exclude_scs = True
            break
    if is_some_sc_in_exclude_scs:
        print("some sc in exclude scs")
        continue
    
    new_sample = []
    for time, sc in sub_sct.timeframe_to_single_cell.items():
        new_sample.append(sc)
    normal_samples.append(new_sample)
    normal_samples_extra_info.append({"src_dir": sub_sct.get_all_scs()[0].meta["src_dir"]})
    counter += 1
    max_trial_counter -= 1

normal_samples[:2]

some sc in exclude scs
some sc in exclude scs
some sc in exclude scs
some sc in exclude scs
some sc in exclude scs
some sc in exclude scs
some sc in exclude scs
some sc in exclude scs


[[SingleCellStatic(id=056b6e7b-d2cb-4501-b3d2-924e9d149e74, timeframe=73, bbox=[417 387 601 585]),
  SingleCellStatic(id=c6f083e2-fff1-47d9-a2b0-4feff712626b, timeframe=74, bbox=[421 392 603 580]),
  SingleCellStatic(id=3b10c5f7-966a-4282-a661-78da785fff0e, timeframe=75, bbox=[416 391 595 579]),
  SingleCellStatic(id=0816119c-6bdc-49cb-b857-0d43cbb03e61, timeframe=76, bbox=[420 390 600 586]),
  SingleCellStatic(id=2e83a2af-01bc-4453-9f45-49a3c0e1d975, timeframe=77, bbox=[437 402 601 576]),
  SingleCellStatic(id=34b6863b-baf4-4197-9554-f701711c59db, timeframe=78, bbox=[438 399 595 568]),
  SingleCellStatic(id=6ba2fc6f-9c02-4c7b-beda-3fa7a0a1e84a, timeframe=79, bbox=[444 406 595 581]),
  SingleCellStatic(id=97536b78-d230-4c98-9d79-ccac89189aed, timeframe=80, bbox=[443 406 595 576])],
 [SingleCellStatic(id=5e9559bc-f211-4481-b005-fb4ff850ed40, timeframe=173, bbox=[1302 1134 1403 1229]),
  SingleCellStatic(id=174a1612-6faa-4700-a02f-7ae1a3eecb50, timeframe=174, bbox=[1228 1107 1404 1233]),

In [12]:
all_class2samples["normal"].extend(normal_samples)
all_class2sample_extra_info["normal"].extend(normal_samples_extra_info)

In [13]:
len(all_class2samples["normal"]), len(all_class2sample_extra_info["normal"])

(280, 280)

## Prepare videos and annotations for MMDetection

In [14]:
classes = all_class2samples.keys()
classes

dict_keys(['mitosis', 'apoptosis', 'normal'])

In [15]:
from livecell_tracker.core.utils import gray_img_to_rgb, rgb_img_to_gray
from livecell_tracker.preprocess.utils import normalize_img_to_uint8

In [16]:
from livecell_tracker.track.classify_utils import video_frames_and_masks_from_sample, combine_video_frames_and_masks

In [17]:
from typing import List
import cv2
import numpy as np
import pandas as pd

from livecell_tracker.core.sc_video_utils import gen_mp4_from_frames, gen_samples_df, gen_samples_mp4s

ver = 9
# ver = "-test"
data_dir = Path(f'notebook_results/mmaction_train_data_v{ver}')
class_labels = ['mitosis', 'apoptosis', 'normal']
class_label = "mitosis"
frame_types = ["video", "mask", "combined"]
fps = 3

padding_pixels = [0, 20, 40, 50, 100, 200, 400]



# split train and test data

# get #samples from all_class2samples
_split = 0.8

train_class2samples = {}
test_class2samples = {}
train_class2sample_extra_info = {}
test_class2sample_extra_info = {}

# randomize train and test data


for key in all_class2samples.keys():
    randomized_indices = np.random.permutation(len(all_class2samples[key])).astype(int)
    split_idx = int(len(all_class2samples[key]) * _split)
    _train_indices = randomized_indices[:split_idx]
    _test_indices = randomized_indices[split_idx:]
    train_class2samples[key] = np.array(all_class2samples[key], dtype=object)[_train_indices]
    test_class2samples[key] = np.array(all_class2samples[key], dtype=object)[_test_indices]

    train_class2sample_extra_info[key] = np.array(all_class2sample_extra_info[key], dtype=object)[_train_indices]
    test_class2sample_extra_info[key] = np.array(all_class2sample_extra_info[key], dtype=object)[_test_indices]



In [18]:
len(train_class2samples["normal"]), len(test_class2samples["normal"])

(224, 56)

In [19]:
len(train_class2samples["mitosis"]), len(test_class2samples["mitosis"])

(22, 6)

In [20]:
video_frames_and_masks_from_sample(train_class2samples["normal"][6])[0][0].shape
# train_class2samples["normal"][6][1].show_panel()

(95, 145, 3)

In [21]:
import importlib
import livecell_tracker
importlib.reload(livecell_tracker.track.classify_utils)

<module 'livecell_tracker.track.classify_utils' from 'D:\\LiveCellTracker-dev\\livecell_tracker\\track\\classify_utils.py'>

In [22]:
idx_to_check = 6
video_frames, video_frame_masks = video_frames_and_masks_from_sample(train_class2samples["normal"][idx_to_check], padding_pixels=0)
print("video frames dtype:", video_frames[0].dtype)
print("video frames shape:", video_frames[0].shape)
print("video frame masks dtype:", video_frame_masks[0].dtype)
print("video frame masks shape:", video_frame_masks[0].shape)
combined_frames = livecell_tracker.track.classify_utils.combine_video_frames_and_masks(video_frames, video_frame_masks, edt_transform=True)
combined_frames = np.array(combined_frames).astype(np.uint8)
# combined_frames = np.maximum(combined_frames - 1, 0).astype(np.uint8)
print("combined_frames shape: ", combined_frames[0].shape)
gen_mp4_from_frames(combined_frames, "./test_video_output.mp4", fps=1)

video frames dtype: uint8
video frames shape: (95, 145, 3)
video frame masks dtype: uint8
video frame masks shape: (95, 145, 3)
combined_frames shape:  (95, 145, 3)


Visually check the generated frames' values

In [None]:
# channel = 2
# plt.imshow(combined_frames[0][..., channel])
# combined_frames[1][..., channel].max(), combined_frames[1][..., 0].shape

In [None]:
np.array(combined_frames).flatten().min()

In [23]:

# # for debug
# train_class2samples = {key: value[:5] for key, value in all_class2samples.items()}
# test_class2samples = {key: value[:5] for key, value in all_class2samples.items()}
# padding_pixels = [20]


train_sample_info_df = gen_samples_df(train_class2samples, train_class2sample_extra_info, data_dir, class_labels, padding_pixels, frame_types, fps, prefix="train")
test_sample_info_df = gen_samples_df(test_class2samples, test_class2sample_extra_info, data_dir, class_labels, padding_pixels, frame_types, fps, prefix="test")

train_sample_info_df.to_csv(data_dir/f'train_data.txt', index=False, header=False, sep=' ', )
test_sample_info_df.to_csv(data_dir/f'test_data.txt', index=False, header=False, sep=' ', )

for selected_frame_type in frame_types:
    train_df_path = data_dir/f'mmaction_train_data_{selected_frame_type}.txt'
    train_selected_frame_type_df = train_sample_info_df[train_sample_info_df["frame_type"] == selected_frame_type]
    train_selected_frame_type_df = train_selected_frame_type_df.reset_index(drop=True)
    train_selected_frame_type_df = train_selected_frame_type_df[["path", "label_index"]]
    train_selected_frame_type_df.to_csv(train_df_path, index=False, header=False, sep=' ')
    
    test_df_path = data_dir/f'mmaction_test_data_{selected_frame_type}.txt'
    test_selected_frame_type_df = test_sample_info_df[test_sample_info_df["frame_type"] == selected_frame_type]
    test_selected_frame_type_df = test_selected_frame_type_df[["path", "label_index"]]
    test_selected_frame_type_df = test_selected_frame_type_df.reset_index(drop=True)
    test_selected_frame_type_df.to_csv(test_df_path, index=False, header=False, sep=' ')


# # the follwing code generates v1-v7 test data. The issue is that some of test data shows up in train data, through different padding values.
# data_df_path = data_dir/'all_data.txt'
# sample_df = gen_samples_df(data_dir, class_labels, padding_pixels, frame_types, fps)
# sample_df.to_csv(data_df_path, index=False, header=False, sep=' ')
# for selected_frame_type in frame_types:
#     selected_frame_type_df = sample_df[sample_df["frame_type"] == selected_frame_type]
#     selected_frame_type_df = selected_frame_type_df.reset_index(drop=True)
#     train_df_path = data_dir/f'train_data_{selected_frame_type}.txt'
#     test_df_path = data_dir/f'test_data_{selected_frame_type}.txt'
#     train_df = selected_frame_type_df.sample(frac=0.8, random_state=0, replace=False)
#     test_df = selected_frame_type_df.drop(train_df.index, inplace=False)

#     # only keep the path and label_index columns
#     train_df = train_df[["path", "label_index"]]
#     test_df = test_df[["path", "label_index"]]

#     train_df.to_csv(train_df_path, index=False, header=False, sep=' ')
#     test_df.to_csv(test_df_path, index=False, header=False, sep=' ')


100%|██████████| 22/22 [00:16<00:00,  1.32it/s]
100%|██████████| 22/22 [00:17<00:00,  1.28it/s]
100%|██████████| 22/22 [00:19<00:00,  1.15it/s]
100%|██████████| 22/22 [00:19<00:00,  1.12it/s]
100%|██████████| 22/22 [00:21<00:00,  1.01it/s]
100%|██████████| 22/22 [00:28<00:00,  1.29s/it]
100%|██████████| 22/22 [00:51<00:00,  2.34s/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
100%|██████████| 224/224 [00:32<00:00,  6.88it/s]
100%|██████████| 224/224 [00:34<00:00,  6.40it/s]
100%|██████████| 224/224 [00:35<00:00,  6.36it/s]
100%|██████████| 224/224 [00:37<00:00,  5.98it/s]
100%|██████████| 224/224 [00:47<00:00,  4.74it/s]
100%|██████████| 224/224 [01:19<00:00,  2.81it/s]
100%|██████████| 224/224 [03:06<00:00,  1.20it/s]
100%|██████████| 6/6 [00:09<00:00,  1.57s/it]
100%|██████████| 6/6 [00:09<00:00,  1.62s/it]
100%|██████████| 6/6 [00:09<00:00,  1.66s/it]
100%|██████████| 6/6 [00:10<00:00,  1.73s/i

In [None]:
train_class2samples

Check the videos

In [25]:

video_paths = list(Path(data_dir/'videos').glob('*.mp4'))

Due to a `decord` package [issue](https://github.com/dmlc/decord/issues/150), to use mmaction2 we must check if the videos can be loaded by `decord` correctly.

In [26]:
import decord
for path in video_paths:
# for path in ["./notebook_results/train_normal_6_raw_padding-0.mp4"]:
# for path in ["./test_video_output.mp4"]:
    reader = decord.VideoReader(str(path))
    reader.seek(0)
    imgs = list()
    frame_inds = range(0, len(reader))
    for idx in frame_inds:
        reader.seek(idx)
        frame = reader.next()
        imgs.append(frame.asnumpy())
        frame = frame.asnumpy()

        num_channels = frame.shape[-1]
        if num_channels != 3:
            print("invalid video for decord (https://github.com/dmlc/decord/issues/150): ", path)
            break
        # fig, axes = plt.subplots(1, num_channels, figsize=(20, 10))
        # for i in range(num_channels):
        #     axes[i].imshow(frame[:, :, i])
        # plt.show()
    del reader

invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v9\videos\test_normal_33_combined_padding-0.mp4
invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v9\videos\test_normal_33_mask_padding-0.mp4
invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v9\videos\test_normal_33_raw_padding-0.mp4
invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v9\videos\train_normal_204_combined_padding-0.mp4
invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v9\videos\train_normal_204_mask_padding-0.mp4
invalid video for decord (https://github.com/dmlc/decord/issues/150):  notebook_results\mmaction_train_data_v9\videos\train_normal_204_raw_padding-0.mp4


In [None]:
decord.__version__

check if videos can be loaded by cv2 correctly

In [None]:
import cv2

cap = cv2.VideoCapture("./test_video_output.mp4")

while True:
    ret, frame = cap.read()
    if not ret:
        break
    assert frame.shape[-1] == 3, "frame should be in RGB format"

cap.release()
cv2.destroyAllWindows()

In [None]:
# from sklearn.model_selection import train_test_split

# train_df_path = data_dir/'train_data.csv'
# test_df_path = data_dir/'test_data.csv'

# # split train and test from df
# train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
# train_df.to_csv(train_df_path, index=False, header=False, sep=' ')
# test_df.to_csv(test_df_path, index=False, header=False, sep=' ')
