Skip to content

Commit

Permalink
update classify_mitosis_data_prep.ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
dummyindex committed Aug 4, 2023
1 parent 2b4c52f commit 8236d94
Showing 1 changed file with 44 additions and 55 deletions.
99 changes: 44 additions & 55 deletions notebooks/classify_mitosis_data_prep.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@
"# all_scs_json_path = \"./datasets/test_scs_EBSS_starvation/XY16/tmp_corrected_scs.json\"\n",
"sctc = SingleCellTrajectoryCollection()\n",
"for json_path in all_scs_json_path:\n",
" print(\"json path:\", json_path)\n",
" _scs = SingleCellStatic.load_single_cells_json(json_path)\n",
" tmp_sctc = track_SORT_bbox_from_scs(_scs, raw_imgs=_scs[0].img_dataset, min_hits=3, max_age=3)\n",
" tids = set(sctc.get_all_tids())\n",
Expand Down Expand Up @@ -311,9 +312,10 @@
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from livecell_tracker.core.sc_video_utils import gen_mp4_from_frames, gen_samples_df, gen_samples_mp4s\n",
"from livecell_tracker.core.sc_video_utils import gen_mp4_from_frames, gen_class2sample_samples, gen_samples_mp4s\n",
"\n",
"ver = \"10-st\" # single trajectory ver\n",
"# ver = \"10-st\" # single trajectory ver\n",
"ver = \"test\" # single trajectory ver\n",
"MAKE_SINGLE_CELL_TRAJ_SAMPLES = True\n",
"DROP_MITOSIS_DIV = False\n",
"\n",
Expand Down Expand Up @@ -458,49 +460,9 @@
"source": [
"from typing import Dict\n",
"from livecell_tracker.track.data_prep_utils import check_one_sc_at_time\n",
"from livecell_tracker.track.data_prep_utils import make_one_cell_per_timeframe_for_class2samples, make_one_cell_per_timeframe_helper, make_one_cell_per_timeframe_samples\n",
"\n",
"\n",
"def make_one_cell_per_timeframe_helper(sc_by_time, times, cur_idx) -> List[List[SingleCellStatic]]:\n",
" if cur_idx == len(times):\n",
" return [[]]\n",
" cur_time = times[cur_idx]\n",
" cur_scs = sc_by_time[cur_time]\n",
" return [[sc] + scs for sc in cur_scs for scs in make_one_cell_per_timeframe_helper(sc_by_time, times, cur_idx + 1)]\n",
"\n",
"\n",
"def make_one_cell_per_timeframe_samples(sample: List[SingleCellStatic]) -> List[List[SingleCellStatic]]:\n",
" \"\"\"if there are two single cells at a time frame, recursively generate new samples with one single cell at a time frame\"\"\"\n",
" sc_by_time = {}\n",
" for sc in sample:\n",
" if sc.timeframe not in sc_by_time:\n",
" sc_by_time[sc.timeframe] = []\n",
" sc_by_time[sc.timeframe].append(sc)\n",
" return make_one_cell_per_timeframe_helper(sc_by_time, sorted(sc_by_time.keys()), 0)\n",
"\n",
"\n",
"def make_one_cell_per_timeframe_for_class2samples(class2samples: Dict, class2sample_extra_info=None, tar_keys: List[str] = [\"mitosis\"]) -> Dict:\n",
" class2samples = class2samples.copy()\n",
" if class2sample_extra_info is not None:\n",
" class2sample_extra_info = class2sample_extra_info.copy()\n",
" for key in tar_keys:\n",
" tmp_samples = []\n",
" tmp_sample_extra_info = []\n",
" key_samples = class2samples[key]\n",
" for sample_idx, sample in enumerate(key_samples):\n",
" sct_samples = make_one_cell_per_timeframe_samples(sample)\n",
" tmp_samples.extend(sct_samples)\n",
" if class2sample_extra_info is not None:\n",
" tmp_sample_extra_info.extend([class2sample_extra_info[key][sample_idx] for _ in range(len(sct_samples))])\n",
"\n",
" # check the length of sample is the same as the length of tmp_samples[-1]\n",
" sample_times = set([sc.timeframe for sc in sample])\n",
" tmp_sample_times = set([sc.timeframe for sc in tmp_samples[-1]])\n",
" assert len(sample_times) == len(tmp_sample_times), f\"sample times: {sample_times}, tmp sample times: {tmp_sample_times}\"\n",
" class2samples[key] = tmp_samples\n",
" if class2sample_extra_info is not None:\n",
" class2sample_extra_info[key] = tmp_sample_extra_info\n",
" assert all([check_one_sc_at_time(sample) for sample in class2samples[key]]), \"there is more than one sc at the same timepoint\"\n",
" return class2samples, class2sample_extra_info\n",
"\n",
"sample = train_class2samples[\"mitosis\"][0]"
]
Expand Down Expand Up @@ -570,34 +532,61 @@
"metadata": {},
"outputs": [],
"source": [
"\n",
"# # for debug\n",
"# train_class2samples = {key: value[:5] for key, value in all_class2samples.items()}\n",
"# test_class2samples = {key: value[:5] for key, value in all_class2samples.items()}\n",
"# padding_pixels = [20]\n",
"\n",
"\n",
"train_sample_info_df = gen_samples_df(train_class2samples, train_class2sample_extra_info, data_dir, class_labels, padding_pixels, frame_types, fps, prefix=\"train\")\n",
"test_sample_info_df = gen_samples_df(test_class2samples, test_class2sample_extra_info, data_dir, class_labels, padding_pixels, frame_types, fps, prefix=\"test\")\n",
"train_sample_info_df = gen_class2sample_samples(\n",
" train_class2samples,\n",
" train_class2sample_extra_info,\n",
" data_dir,\n",
" class_labels,\n",
" padding_pixels=padding_pixels,\n",
" frame_types=frame_types,\n",
" fps=fps,\n",
" prefix=\"train\",\n",
")\n",
"test_sample_info_df = gen_class2sample_samples(\n",
" test_class2samples,\n",
" test_class2sample_extra_info,\n",
" data_dir,\n",
" class_labels,\n",
" padding_pixels=padding_pixels,\n",
" frame_types=frame_types,\n",
" fps=fps,\n",
" prefix=\"test\",\n",
")\n",
"\n",
"train_sample_info_df.to_csv(data_dir/f'train_data.txt', index=False, header=False, sep=' ', )\n",
"test_sample_info_df.to_csv(data_dir/f'test_data.txt', index=False, header=False, sep=' ', )\n",
"train_sample_info_df.to_csv(\n",
" data_dir / f\"train_data.txt\",\n",
" index=False,\n",
" header=False,\n",
" sep=\" \",\n",
")\n",
"test_sample_info_df.to_csv(\n",
" data_dir / f\"test_data.txt\",\n",
" index=False,\n",
" header=False,\n",
" sep=\" \",\n",
")\n",
"\n",
"for selected_frame_type in frame_types:\n",
" train_df_path = data_dir/f'mmaction_train_data_{selected_frame_type}.txt'\n",
" train_df_path = data_dir / f\"mmaction_train_data_{selected_frame_type}.txt\"\n",
" train_selected_frame_type_df = train_sample_info_df[train_sample_info_df[\"frame_type\"] == selected_frame_type]\n",
" train_selected_frame_type_df = train_selected_frame_type_df.reset_index(drop=True)\n",
" train_selected_frame_type_df = train_selected_frame_type_df[[\"path\", \"label_index\"]]\n",
" train_selected_frame_type_df.to_csv(train_df_path, index=False, header=False, sep=' ')\n",
" \n",
" test_df_path = data_dir/f'mmaction_test_data_{selected_frame_type}.txt'\n",
" train_selected_frame_type_df.to_csv(train_df_path, index=False, header=False, sep=\" \")\n",
"\n",
" test_df_path = data_dir / f\"mmaction_test_data_{selected_frame_type}.txt\"\n",
" test_selected_frame_type_df = test_sample_info_df[test_sample_info_df[\"frame_type\"] == selected_frame_type]\n",
" test_selected_frame_type_df = test_selected_frame_type_df[[\"path\", \"label_index\"]]\n",
" test_selected_frame_type_df = test_selected_frame_type_df.reset_index(drop=True)\n",
" test_selected_frame_type_df.to_csv(test_df_path, index=False, header=False, sep=' ')\n",
" test_selected_frame_type_df.to_csv(test_df_path, index=False, header=False, sep=\" \")\n",
"\n",
"\n",
"# # the follwing code generates v1-v7 test data. The issue is that some of test data shows up in train data, through different padding values.\n",
"# # The follwing code generates v1-v7 test data. The issue is that some of test data shows up in train data, through different padding values.\n",
"# data_df_path = data_dir/'all_data.txt'\n",
"# sample_df = gen_samples_df(data_dir, class_labels, padding_pixels, frame_types, fps)\n",
"# sample_df.to_csv(data_df_path, index=False, header=False, sep=' ')\n",
Expand Down Expand Up @@ -754,7 +743,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.10.8"
},
"orig_nbformat": 4
},
Expand Down

0 comments on commit 8236d94

Please sign in to comment.