# Create UMAP of single cell features

In [None]:
from livecell_tracker.sample_data import tutorial_three_image
from pathlib import Path
from livecell_tracker.core.datasets import LiveCellImageDataset
import glob

dataset_dir_path = Path(
    "../datasets/EBSS_Starvation/tif_STAV-A549_VIM_24hours_NoTreat_NA_YL_Ti2e_2022-12-21/XY16/"
)

mask_dataset_path = Path("../datasets/EBSS_Starvation/tif_STAV-A549_VIM_24hours_NoTreat_NA_YL_Ti2e_2022-12-21/out/XY16/seg")

mask_dataset = LiveCellImageDataset(mask_dataset_path, ext="png")
time2url = sorted(glob.glob(str((Path(dataset_dir_path) / Path("*_DIC.tif")))))
time2url = {i: path for i, path in enumerate(time2url)}
dic_dataset = LiveCellImageDataset(time2url=time2url, ext="tif")

In [None]:
from livecell_tracker.core.io_sc import prep_scs_from_mask_dataset
scs = prep_scs_from_mask_dataset(mask_dataset, dic_dataset)

In [None]:
from livecell_tracker.trajectory.feature_extractors import compute_haralick_features, compute_skimage_regionprops
from livecell_tracker.preprocess.utils import normalize_img_to_uint8
from livecell_tracker.core.parallel import parallelize
inputs = []
for sc in scs:
    # features = compute_skimage_regionprops(sc, preprocess_img_func=normalize_img_to_uint8, sc_level_normalize=True)
    inputs.append({
        "sc": sc,
        "feature_key": "skimage",
        "preprocess_img_func": normalize_img_to_uint8,
        "sc_level_normalize": True,
    })

parallelize(compute_skimage_regionprops, inputs)

In [None]:
sc.get_feature_pd_series()

In [None]:
def create_sc_table(scs, normalize_features=True):
    import pandas as pd
    import numpy as np
    df = pd.DataFrame([sc.get_feature_pd_series() for sc in scs])
    if normalize_features:
        for col in df.columns:
            df[col] = (df[col] - df[col].mean())
            col_std = df[col].std()
            if col_std != 0 and not np.isnan(col_std):
                df[col] /= col_std
    # remove column t from df
    df.drop("t", axis=1, inplace=True)
    # df["sc_id"] = [sc.id for sc in scs]
    # df["t"] = [sc.timeframe for sc in scs]
    # df.set_index("sc_id", inplace=True)
    return df

sc_feature_table = create_sc_table(scs)
sc_feature_table[:2]

## Normalize features

## Perform UMAP on features

In [None]:
import umap
reducer = umap.UMAP()

In [None]:
# check if any columns are all NaN
sc_feature_table = sc_feature_table.dropna(axis=1, how="all")

In [None]:
embedding = reducer.fit_transform(sc_feature_table)

In [None]:
embedding.shape

In [None]:
import matplotlib.pyplot as plt
plt.scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=[sc.timeframe for sc in scs],
)

In [None]:
unnormalized_img_features = create_sc_table(scs, normalize_features=False)
# drop na
unnormalized_img_features = unnormalized_img_features.dropna(axis=1, how="all")
reducer = umap.UMAP()
unnormalized_embedding = reducer.fit_transform(unnormalized_img_features)
plt.scatter(
    unnormalized_embedding[:, 0],
    unnormalized_embedding[:, 1],
    c=[sc.timeframe for sc in scs],
)


In [None]:
list(unnormalized_img_features.keys())