# Create UMAP of single cell features

In [None]:
from livecellx.sample_data import tutorial_three_image_sys

dic_dataset, mask_dataset = tutorial_three_image_sys(dic_dataset_path="../datasets/test_data_STAV-A549/DIC_data", mask_dataset_path="../datasets/test_data_STAV-A549/mask_data")

In [None]:
from livecellx.core.io_sc import prep_scs_from_mask_dataset
scs = prep_scs_from_mask_dataset(mask_dataset, dic_dataset)

In [None]:
from livecellx.trajectory.feature_extractors import compute_haralick_features, compute_skimage_regionprops
from livecellx.preprocess.utils import normalize_img_to_uint8
for sc in scs:
    features = compute_skimage_regionprops(sc, preprocess_img_func=normalize_img_to_uint8, sc_level_normalize=True)

In [None]:
sc.get_feature_pd_series()

In [None]:
def create_sc_table(scs, normalize_features=True):
    import pandas as pd
    import numpy as np
    df = pd.DataFrame([sc.get_feature_pd_series() for sc in scs])
    if normalize_features:
        for col in df.columns:
            df[col] = (df[col] - df[col].mean())
            col_std = df[col].std()
            if col_std != 0 and not np.isnan(col_std):
                df[col] /= col_std
    # remove column t from df
    df.drop("t", axis=1, inplace=True)
    # df["sc_id"] = [sc.id for sc in scs]
    # df["t"] = [sc.timeframe for sc in scs]
    # df.set_index("sc_id", inplace=True)
    return df

sc_feature_table = create_sc_table(scs)
sc_feature_table[:2]

## Normalize features

## Perform UMAP on features

In [None]:
import umap
reducer = umap.UMAP()

In [None]:
# check if any columns are all NaN
sc_feature_table = sc_feature_table.dropna(axis=1, how="all")

In [None]:
embedding = reducer.fit_transform(sc_feature_table)

In [None]:
embedding.shape

In [None]:
import matplotlib.pyplot as plt
plt.scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=[sc.timeframe for sc in scs],
)

In [None]:
import numpy as np
# calculate correlation matrix between features and UMAP axis

umap_axis = embedding[:, 0]

feature_cols = sc_feature_table.columns
for feature in feature_cols:
    # print(feature, np.corrcoef(sc_feature_table[feature], umap_axis)[0, 1])
    corr = np.corrcoef(sc_feature_table[feature], umap_axis)[0, 1]
    if abs(corr) > 0.5:
        print(feature, corr)

In [None]:
unnormalized_img_features = create_sc_table(scs, normalize_features=False)
# drop na
unnormalized_img_features = unnormalized_img_features.dropna(axis=1, how="all")
reducer = umap.UMAP()
unnormalized_embedding = reducer.fit_transform(unnormalized_img_features)

fig, axes = plt.subplots(1, 2, figsize=(10, 5))
scatter_time = axes[0].scatter(
    unnormalized_embedding[:, 0],
    unnormalized_embedding[:, 1],
    c=[sc.timeframe for sc in scs],
)
colorbar = fig.colorbar(scatter_time, ax=axes[0], label="Time")


In [None]:
list(unnormalized_img_features.keys())