In [None]:
from pathlib import Path
import os.path

In [None]:
out_dir = Path("./figure_mitosis_trajs_viz")
out_dir.mkdir(exist_ok=True)

In [None]:
from livecellx.track.classify_utils import load_class2samples_from_json_dir, load_all_json_dirs


sample_json_dirs_v0 = [Path(r"../datasets/test_scs_EBSS_starvation/XY1/annotations"), Path(r"./datasets/test_scs_EBSS_starvation/XY16/annotations")]
round1_json_dirs = sample_json_dirs_v0 + [
    Path(r"../datasets/mitosis-annotations-2023/shiman_XY01/XY01"),
Path(r"../datasets/mitosis-annotations-2023/shiman_XY09/XY09"),
Path(r"../datasets/mitosis-annotations-2023/shiman_XY10/XY10"),
Path(r"../datasets/mitosis-annotations-2023/Yajushi/tifs_CFP_A549-VIM_lessThan24hr_NoTreat_NA_YL_Ti2e_2022-10-19/XY1/annotations"),
]

round2_json_dirs = [
Path(r"../datasets/mitosis-annotations-2023/shiman_CXA_high_density/C0.5^4/"),
Path(r"../datasets/mitosis-annotations-2023/shiman_CXA_high_density/C0.75^4/"),
Path(r"../datasets/mitosis-annotations-2023/shiman_CXA_high_density/C10^3/"),
Path(r"../datasets/mitosis-annotations-2023/shiman_CXA_high_density/C10^4/")
] + [
    Path(f"../datasets/mitosis-annotations-2023/Gaohan_tifs_CFP_A549-VIM_lessThan24hr_NoTreat_NA_YL_Ti2e_2022-10-19/XY{pos}/annotations") for pos in range(4, 14)
]

sample_json_dirs = sample_json_dirs_v0 + round1_json_dirs + round2_json_dirs
all_class2samples, all_class2sample_extra_info = load_all_json_dirs(sample_json_dirs)

In [None]:
for key in all_class2samples:
    print(key, len(all_class2samples[key]))

In [None]:
mitosis_sampels = all_class2samples["mitosis"]
mitosis_sampels[0][0].meta

check if sc has image data for future feature calculation

In [None]:
# for idx, sample in enumerate(mitosis_sampels):
#     for sc in sample:
#         sc.get_img()

Store sample index in all_class2samples to each single cell for future use

In [None]:
for idx, sample in enumerate(mitosis_sampels):
    if len(sample) == 0:
        continue
    _first_time = sample[0].timeframe
    for sc in sample:
        sc.meta["_sample_idx"] = idx
        sc.meta["mitosis_relative_time"] = sc.timeframe - _first_time



In [None]:
all_scs = [sc for sample in mitosis_sampels for sc in sample]

In [None]:
from livecellx.trajectory.feature_extractors import compute_haralick_features, compute_skimage_regionprops, parallelize_compute_features
from livecellx.preprocess.utils import normalize_img_to_uint8
from livecellx.core.parallel import parallelize
inputs = []
for sc in all_scs:
    # features = compute_skimage_regionprops(sc, preprocess_img_func=normalize_img_to_uint8, sc_level_normalize=True)
    inputs.append({
        "sc": sc,
        "feature_key": "skimage",
        "preprocess_img_func": normalize_img_to_uint8,
        "sc_level_normalize": True,
    })

def compute_skimage_regionprops_wrapper(**input):
    sc = input["sc"]
    compute_skimage_regionprops(**input)
    return sc

# TODO: debug
processed_scs = parallelize(compute_skimage_regionprops_wrapper, inputs)

# # a for loop for computing
# import tqdm
# processed_scs = []
# for input in tqdm.tqdm(inputs):
#     sc = input["sc"]
#     compute_skimage_regionprops(**input)
#     processed_scs.append(sc)

In [None]:
from livecellx.core.single_cell import SingleCellStatic

SingleCellStatic.write_single_cells_json(processed_scs, out_dir / "processed_scs.json", dataset_dir= out_dir / "datasets")

In [None]:
processed_scs = SingleCellStatic.load_single_cells_json(out_dir / "processed_scs.json")

In [None]:
sample_sc = mitosis_sampels[0][0]
sample_sc.meta.keys()
meta_keys = ['img_dataset_json_path', 'mask_dataset_json_path', '_sample_idx', 'mitosis_relative_time']

In [None]:
from livecellx.core.single_cell import create_sc_table

sc_table_with_time = create_sc_table(processed_scs, add_time=True, add_sc_id=True, meta_keys=meta_keys)
cell_features_2 = create_sc_table(processed_scs, add_time=False)

In [None]:
# save the tables
sc_table_with_time.to_csv(out_dir / "mitosis_scs_table_with_time.csv")

In [None]:
sc_table_with_time[:2]

In [None]:
import umap
reducer = umap.UMAP()
# drop NAN in sc_feature_table
cell_features_2 = cell_features_2.dropna(axis=1)

embedding = reducer.fit_transform(cell_features_2)

In [None]:
import matplotlib.pyplot as plt
plt.scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=[sc.timeframe for sc in processed_scs],
    alpha=0.5,
)
plt.title("Ignore this figure: meaningless timeframe")
plt.xlabel("UMAP_1")
plt.ylabel("UMAP_2")
# add colorbar
plt.colorbar()

In [None]:
plt_fontsize = 14

In [None]:
mitosis_relative_times = [sc.meta["mitosis_relative_time"] for sc in processed_scs]

# plot distribution of mitosis relative times
import seaborn as sns
plt.figure(figsize=(8, 6))
sns.displot(mitosis_relative_times, kde=True, height=6, aspect=1.5)
plt.title("Distribution of mitosis relative times", fontsize=plt_fontsize)
plt.xlabel("Mitosis relative time", fontsize=plt_fontsize)
plt.ylabel("Count", fontsize=plt_fontsize)
plt.xticks(fontsize=plt_fontsize)
plt.yticks(fontsize=plt_fontsize)
plt.show()

In [None]:
clipped_mitosis_time = [sc.meta["mitosis_relative_time"] for sc in processed_scs]
clipped_mitosis_time = [min(10, x) for x in clipped_mitosis_time]

In [None]:
mitosis_sampels[37][2]

In [None]:
import matplotlib.pyplot as plt

large_thresholds = [10, 15, 20, 25, 30, 35, 40, 45, 50]

def print_stats(processed_scs, threshold):
    large_mitosis_relative_time_scs = [sc for sc in processed_scs if sc.meta["mitosis_relative_time"] > threshold]
    tmp_unique_long_samples = set()
    for sc in large_mitosis_relative_time_scs:
        if '_labels' not in sc.meta:
            tmp_unique_long_samples.add(sc.meta['_sample_idx'])
            continue
    return len(tmp_unique_long_samples), len(large_mitosis_relative_time_scs)

# Get the number of unique long samples and total long samples for each threshold
num_unique_long_samples = []
num_total_long_scs = []
for threshold in large_thresholds:
    unique_long_samples, total_long_samples = print_stats(processed_scs, threshold)
    num_unique_long_samples.append(unique_long_samples)
    num_total_long_scs.append(total_long_samples)

# Set the figure size
plt.figure(figsize=(8, 6))

# Plot the data as a scatter plot
plt.plot(large_thresholds, num_unique_long_samples, color='blue', label='#long samples')
# plt.scatter(large_thresholds, num_total_long_samples, color='orange', label='Total Long Samples')

# Add text labels to the scatter points
for i in range(len(large_thresholds)):
    plt.text(large_thresholds[i] + 0.7, num_unique_long_samples[i] + 0.1, str(num_unique_long_samples[i]), fontsize=12, ha='center', va='bottom')
    # plt.text(large_thresholds[i], num_total_long_samples[i], str(num_total_long_samples[i]), fontsize=12, ha='center', va='bottom')

# Set the x and y axis labels with larger font size
plt.xlabel("Sample Length Threshold", fontsize=14)
plt.ylabel("Number of Samples", fontsize=14)

# Set the x and y axis tick labels with larger font size
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Add a title to the plot
plt.title("Number of Samples vs. Sample Length Threshold", fontsize=plt_fontsize)
plt.xlabel("Sample Length Threshold", fontsize=plt_fontsize)
plt.ylabel("Number of Samples", fontsize=plt_fontsize)

# Add a legend to the plot
plt.legend(fontsize=12)

# remove the spines
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Show the plot
plt.show()

In [None]:
cell_features_2.columns

In [None]:
import matplotlib.pyplot as plt
fig, axes = plt.subplots(1, 4, figsize=(20, 5))
axes[0].scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=cell_features_2["skimage_area"],
    cmap="inferno",
    alpha=0.5,
)
axes[0].set_xlabel("UMAP_1")
axes[0].set_ylabel("UMAP_2")
axes[0].set_title("Mitosis: area")
# add colorbar
# axes[0].colorbar()

axes[1].scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=clipped_mitosis_time,
    cmap="inferno",
    alpha=0.5,
)
axes[1].set_xlabel("UMAP_1")
axes[1].set_ylabel("UMAP_2")
axes[1].set_title("Mitosis: relative time")

axes[2].scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=cell_features_2["skimage_eccentricity"],
    cmap="inferno",
    alpha=0.5,
)
axes[2].set_xlabel("UMAP_1")
axes[2].set_ylabel("UMAP_2")
axes[2].set_title("Mitosis: eccentricity")


axes[3].scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=cell_features_2["skimage_orientation"],
    cmap="inferno",
    alpha=0.5,
)
axes[3].set_xlabel("UMAP_1")
axes[3].set_ylabel("UMAP_2")
axes[3].set_title("Mitosis: skimage_orientation")

In [None]:
import seaborn as sns
import pandas as pd
# Create a DataFrame with the mitosis colors and corresponding skimage areas
data = pd.DataFrame({
    "mitosis_color": clipped_mitosis_time,
    "skimage_area": cell_features_2["skimage_area"]
})

# Draw a boxplot for each mitosis color
sns.boxplot(x="mitosis_color", y="skimage_area", data=data)
plt.xlabel("mitosis: relative time")
plt.ylabel("area")

In [None]:
cell_features_2.columns

In [None]:
import seaborn as sns
import pandas as pd
# Create a DataFrame with the mitosis colors and corresponding skimage areas

def viz_boxplot(key, mitosis_times, use_abs_vals=False):
    vals = cell_features_2[key]

    if use_abs_vals:
        vals = vals.abs()
    data = pd.DataFrame({
        "mitosis: relative time": mitosis_times,
        key: vals
    })

    # Draw a boxplot for each mitosis color
    sns.boxplot(x="mitosis: relative time", y=key, data=data)
# viz_boxplot("skimage_area", clipped_mitosis_time)

"""eccentricity: float
Eccentricity of the ellipse that has the same second-moments as the region. 
The eccentricity is the ratio of the focal distance (distance between focal points) over the major axis length. 
The value is in the interval [0, 1). When it is 0, the ellipse becomes a circle."""
# viz_boxplot("skimage_eccentricity", clipped_mitosis_time, use_abs_vals=True)
# viz_boxplot("skimage_axis_minor_length", clipped_mitosis_time)
viz_boxplot("skimage_axis_major_length", clipped_mitosis_time)
# viz_boxplot("skimage_orientation", clipped_mitosis_time)

In [None]:
from livecellx.core.single_cell import create_sc_table
normalized_img_features = create_sc_table(processed_scs, normalize_features=True)
# drop na
normalized_img_features = normalized_img_features.dropna(axis=1, how="all")
reducer = umap.UMAP()
normalized_embedding = reducer.fit_transform(normalized_img_features)

fig, axes = plt.subplots(1, 2, figsize=(10, 5))
scatter_time = axes[0].scatter(
    normalized_embedding[:, 0],
    normalized_embedding[:, 1],
    c=clipped_mitosis_time,
    alpha=0.5,
)
colorbar = fig.colorbar(scatter_time, ax=axes[0], label="Time")

scatter_area = axes[1].scatter(
    normalized_embedding[:, 0],
    normalized_embedding[:, 1],
    c=normalized_img_features["skimage_area"],
)
colorbar = fig.colorbar(scatter_area, ax=axes[1], label="Area")


In [None]:
import plotly.graph_objs as go
import plotly.subplots as sp
import numpy as np

scs = processed_scs
fig = sp.make_subplots(rows=1, cols=2, subplot_titles=("Time", "Area"))

clipped_mitosis_time_np = np.array(clipped_mitosis_time, dtype=int)
scatter_time = go.Scatter(
    x=normalized_embedding[:, 0],
    y=normalized_embedding[:, 1],
    mode="markers",
    marker=dict(color=clipped_mitosis_time, colorbar=dict(title="Time", x=0.45, yanchor="middle", len=0.5)),
    text=[f"Timeframe: {sc.timeframe}<br>sc id: {sc.id}" for sc in scs],
    customdata=[sc.id for sc in scs],
)

unique_mitosis_time_points = np.unique(clipped_mitosis_time_np)
for i, time_point in enumerate(unique_mitosis_time_points):
    if len(np.where(clipped_mitosis_time_np == time_point)) == 0:
        continue
    indices = np.where(clipped_mitosis_time_np == time_point)[0]
    mean_position = np.mean(normalized_embedding[indices], axis=0)
    x = mean_position[0]
    y = mean_position[1]
    annotation = go.layout.Annotation(
        x=x,
        y=y,
        text=str(time_point),
        showarrow=False,
        font=dict(size=20, color="white"),
        xshift=5,
        yshift=5,
    )
    fig.add_annotation(annotation)

scatter_area = go.Scatter(
    x=normalized_embedding[:, 0],
    y=normalized_embedding[:, 1],
    mode="markers",
    marker=dict(color=normalized_img_features["skimage_area"], colorbar=dict(title="Area", x=1, yanchor="middle", len=0.5)),
)
fig.append_trace(scatter_time, row=1, col=1)
fig.add_trace(scatter_area, row=1, col=2)

fig.update_layout(height=500, width=1000, title_text="UMAP Embedding", clickmode="event")

unnormalized feature

In [None]:
from livecellx.core.single_cell import create_sc_table
unnormalized_img_features = create_sc_table(processed_scs, normalize_features=False)
# drop na
unnormalized_img_features = unnormalized_img_features.dropna(axis=1, how="all")
reducer = umap.UMAP()
unnormalized_embedding = reducer.fit_transform(unnormalized_img_features)

fig, axes = plt.subplots(1, 2, figsize=(10, 5))
scatter_time = axes[0].scatter(
    normalized_embedding[:, 0],
    normalized_embedding[:, 1],
    c=clipped_mitosis_time,
)
colorbar = fig.colorbar(scatter_time, ax=axes[0], label="Time")

scatter_area = axes[1].scatter(
    normalized_embedding[:, 0],
    normalized_embedding[:, 1],
    c=unnormalized_img_features["skimage_centroid_weighted_local-0"],
)
colorbar = fig.colorbar(scatter_area, ax=axes[1], label="Area")


In [None]:
import plotly.graph_objs as go
import plotly.subplots as sp
import numpy as np

scs = processed_scs
fig = sp.make_subplots(rows=1, cols=2, subplot_titles=("Time", "Area"))

clipped_mitosis_time_np = np.array(clipped_mitosis_time, dtype=int)
scatter_time = go.Scatter(
    x=unnormalized_embedding[:, 0],
    y=unnormalized_embedding[:, 1],
    mode="markers",
    marker=dict(color=clipped_mitosis_time, colorbar=dict(title="Time", x=0.45, yanchor="middle", len=0.5)),
    text=[f"Timeframe: {sc.timeframe}<br>sc id: {sc.id}<br>Mitosis: {sc.meta['mitosis_relative_time']}" for sc in scs],
    customdata=[sc.id for sc in scs],
)

unique_mitosis_time_points = np.unique(clipped_mitosis_time_np)
for i, time_point in enumerate(unique_mitosis_time_points):
    if len(np.where(clipped_mitosis_time_np == time_point)) == 0:
        continue
    indices = np.where(clipped_mitosis_time_np == time_point)[0]
    mean_position = np.mean(unnormalized_embedding[indices], axis=0)
    x = mean_position[0]
    y = mean_position[1]
    annotation = go.layout.Annotation(
        x=x,
        y=y,
        text=str(time_point),
        showarrow=False,
        font=dict(size=20, color="white"),
        xshift=5,
        yshift=5,
    )
    fig.add_annotation(annotation)

scatter_area = go.Scatter(
    x=unnormalized_embedding[:, 0],
    y=unnormalized_embedding[:, 1],
    mode="markers",
    marker=dict(color=normalized_img_features["skimage_area"], colorbar=dict(title="Area", x=1, yanchor="middle", len=0.5)),
)
fig.append_trace(scatter_time, row=1, col=1)
fig.add_trace(scatter_area, row=1, col=2)

fig.update_layout(height=500, width=1000, title_text="UMAP Embedding", clickmode="event")

### PCA analysis

In [None]:
import numpy as np
from sklearn.decomposition import PCA

pca = PCA(n_components=30)
pca.fit(normalized_img_features)
pca_img_features = pca.transform(normalized_img_features)

In [None]:
# plot the transformed data
import matplotlib.pyplot as plt

plt.scatter(pca_img_features[:, 0], pca_img_features[:, 1])
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.show()

In [None]:
pca_img_features.T.shape, normalized_img_features.shape


In [None]:
def compute_dims_corr(cell_features_1: pd.DataFrame, cell_features_2: pd.DataFrame, sort_by_abs=True):
    feature_cols = cell_features_2.columns
    feature_corr_df = pd.DataFrame()
    for feature in feature_cols:
        for dim in cell_features_1.columns:
            _embedding = cell_features_1[dim]
            # suffer from NAN
            # corr = np.corrcoef(np.array(sc_feature_table[feature]), _embedding)[0, 1]

            # avoid and exclude NA values
            _tmp_df = pd.DataFrame({"embedding": _embedding, "feature": cell_features_2[feature]})
            all_corrs = _tmp_df.corr()
            corr = all_corrs["feature"].loc["embedding"]
            new_df = pd.DataFrame({"feature": [feature], "corr": [corr], "dim": [dim]})
            feature_corr_df = pd.concat([feature_corr_df, new_df], ignore_index=True)

    # sort feature_corr_df by corr
    dim2feature_corr_df = {}
    for dim in cell_features_1.columns:
        dim2feature_corr_df[dim] = feature_corr_df[feature_corr_df["dim"] == dim]\
            .sort_values(by="corr", ascending=False, key=lambda x: abs(x))
    return dim2feature_corr_df

pca_img_features = pd.DataFrame(pca_img_features, columns=[f"PC{i}" for i in range(pca_img_features.shape[1])])
compute_dims_corr(pca_img_features, normalized_img_features)