Add class labels to different cases

In [26]:
import pandas as pd
import os
from pathlib import Path
from livecellx.core.datasets import read_img_default
from livecellx.segment.ou_utils import csn_augment_helper
import numpy as np
import skimage.measure



# data_df_path = r"./notebook_results/a549_ccp_vim/train_data_v11/train_data.csv"
data_df_path = r"./notebook_results/a549_ccp_vim/test_data_v11/train_data.csv"
data_df = pd.read_csv(data_df_path)

In [27]:
print(data_df["subdir"].unique())

['real_overseg_td1_XY5' 'real_overseg_td1_XY5_dropout'
 'real_underseg_cases' 'real_overseg_td1_XY6'
 'real_overseg_td1_XY6_dropout']


In [28]:
# "ou_aux" column Mapping rules: 
# contain dropout -> dropout
# contain overseg -> overseg
# contain underseg -> underseg
# contain correct -> correct

subdir_aux_mapping = {}
for subdir in data_df["subdir"].unique():
    if "dropout" in subdir:
        subdir_aux_mapping[subdir] = "dropout"
    elif "overseg" in subdir:
        subdir_aux_mapping[subdir] = "overseg"
    elif "underseg" in subdir:
        subdir_aux_mapping[subdir] = "underseg"
    elif "correct" in subdir:
        subdir_aux_mapping[subdir] = "correct"
    else:
        raise ValueError("subdir not recognized: {}".format(subdir))

In [29]:
print("subdir_aux_mapping: {}".format(subdir_aux_mapping))

# Print pairs of subdir and aux
for subdir, aux in subdir_aux_mapping.items():
    print("subdir: {}, aux: {}".format(subdir, aux))

subdir_aux_mapping: {'real_overseg_td1_XY5': 'overseg', 'real_overseg_td1_XY5_dropout': 'dropout', 'real_underseg_cases': 'underseg', 'real_overseg_td1_XY6': 'overseg', 'real_overseg_td1_XY6_dropout': 'dropout'}
subdir: real_overseg_td1_XY5, aux: overseg
subdir: real_overseg_td1_XY5_dropout, aux: dropout
subdir: real_underseg_cases, aux: underseg
subdir: real_overseg_td1_XY6, aux: overseg
subdir: real_overseg_td1_XY6_dropout, aux: dropout


In [30]:
ou_aux_col = []

for row in data_df.iterrows():
    subdir = row[1]["subdir"]
    if subdir not in subdir_aux_mapping:
        raise ValueError("subdir not recognized: {}".format(subdir))
    ou_aux_col.append(subdir_aux_mapping[subdir])

data_df["ou_aux"] = ou_aux_col

In [31]:
# Report the number of rows in each ou_aux category
print(data_df["ou_aux"].value_counts()) 

underseg    70
dropout     57
overseg     30
Name: ou_aux, dtype: int64


In [32]:
Path(data_df_path).parent

PosixPath('notebook_results/a549_ccp_vim/test_data_v11')

In [33]:
# append "aux" to csv file
out_data_df_path = Path(data_df_path).parent / "train_data_aux.csv"
data_df.to_csv(out_data_df_path, index=False)