# Look at barcode diversity in real life

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import iss_preprocess as issp
import iss_analysis as issa
from iss_analysis.barcodes import barcodes as bar
from iss_analysis.barcodes.diagnostics import (
    plot_gmm_clusters,
    plot_error_along_sequence,
)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Get data

## Get all the barcode and filter with GMM

In [None]:
project = "becalia_rabies_barseq"
mouse = "BRAC8498.3e"
data_path = f"{project}/{mouse}"
analysis_folder = issp.io.get_processed_path(data_path) / "analysis"
analysis_folder.mkdir(exist_ok=True)

### [Optional] Filter valid barcode rolonies with GMM

This has been removed. It's better to apply harsher threshold and keep only decent 
cells. See the end of the notebook for the code if you change your mind.

## Re-run error correction

In [None]:
# Re-run error correction
from pathlib import Path

if False:
    slurm_folder = Path.home() / "slurm_logs" / data_path
    job_ids = []
    for edit_distance in [1, 2]:  # range(3):
        for nrounds in [10]:  # range(10, 15):
            weights = np.ones(14)
            weights[nrounds:] = 0
            jid = issa.barcodes.error_correct_acquisition(
                project,
                mouse,
                mean_intensity_threshold=0.03,
                dot_product_score_threshold=0.2,
                mean_score_threshold=0.9,
                use_gmm=False,
                n_components=1,
                valid_components=(0),
                max_edit_distance=edit_distance,
                weights=list(weights),
                minimum_match=max(np.sum(weights) - edit_distance - 1, 0),
                verbose=True,
                conflicts="append",
                use_slurm=True,
            )
            job_ids.append(jid)
    print(job_ids)

### Show which parameters are used for error correction datasets

In [None]:
import flexiznam as flz

flm_sess = flz.get_flexilims_session(project_id=project, reuse_token=True)
mouse_entity = flz.get_entity(name=mouse, datatype="mouse", flexilims_session=flm_sess)
error_datasets = flz.get_datasets(
    flexilims_session=flm_sess,
    origin_id=mouse_entity.id,
    return_dataseries=True,
    dataset_type="error_corrected_barcodes",
)
print(f"Fetched {len(error_datasets)} datasets")
# Compile a list of arguments for the "correct_barcode_sequences" column
# These are the arguments fwded to the actual correction function
keys = list(error_datasets.correct_barcode_sequences.values[0].keys())
arg_df = pd.DataFrame(index=error_datasets.index, columns=keys)
for l, row in error_datasets.iterrows():
    for k in keys:
        arg_df.loc[l, k] = row.correct_barcode_sequences[k]
arg_df["n_rounds"] = arg_df.weights.map(np.sum)
arg_df.head()

In [None]:
# Look at the one we want
arg_df[(arg_df.max_edit_distance == 2) & (arg_df.n_rounds == 10)].head()

# Re-run barcode assignment

In [None]:
error_correction_ds_name = "BRAC8498.3e_error_corrected_barcodes_26"
mouse = "BRAC8498.3e"
if False:
    issa.barcodes.assign_barcode_all_chambers(
        project,
        mouse_name=mouse,
        error_correction_ds_name=error_correction_ds_name,
        base_column="corrected_bases",
        method="spot_by_spot",
        parameters=dict(
            p=0.9,
            m=0.1,
            background_spot_prior=0.0001,
            spot_distribution_sigma=50,
            max_iterations=100,
            max_distance_to_mask=400,
            inter_spot_distance_threshold=50,
            max_spot_group_size=5,
            max_total_combinations=2e6,
            run_by_groupsize=False,
        ),
        valid_chambers=None,
        verbose=3,
        use_slurm=True,
        conflicts="append",
        n_workers=50,
    )

In [None]:
import flexiznam as flz

flm_sess = flz.get_flexilims_session(project_id=project, reuse_token=True)
mouse_series = flz.get_entity(name=mouse, datatype="mouse", flexilims_session=flm_sess)
mask_assignments = flz.get_datasets_recursively(
    origin_id=mouse_series.id,
    dataset_type="barcodes_mask_assignment",
    flexilims_session=flm_sess,
)
print(f"Fetched {len(mask_assignments)} chambers with mask assignments")

# [optional] save for manual inspection

In [None]:
from pathlib import Path

if False:
    slurm_folder = Path.home() / "slurm_logs" / project / mouse / "manual_clicking"
    slurm_folder.mkdir(parents=True, exist_ok=True)
    for chamber in [f"chamber_{i:02}" for i in range(7, 11)]:
        for roi in range(1, 11):
            print("\n\n\n")
            print(f"Processing {chamber} ROI {roi}")
            issa.segment.save_stitched_for_manual_clicking(
                project,
                mouse,
                chamber=chamber,
                roi=roi,
                error_correction_ds_name="BRAC8498.3e_error_corrected_barcodes_26",
                redo=False,
                save_imgs=True,
                save_rabies_masks=True,
                save_spots=True,
                save_mcherry_masks=True,
                use_slurm=True,
                slurm_folder=slurm_folder,
                scripts_name=f"manual_clicking_{chamber}_{roi}",
                correct_illumination=True,
            )

In [None]:
if False:
    for chamber in [f"chamber_{i:02}" for i in range(7, 11)]:
        issp.pipeline.segment_and_stitch_mcherry_cells(
            data_path=f"{project}/{mouse}/{chamber}", prefix="mCherry_1"
        )

# Load data

In [None]:
from iss_analysis import segment

(
    rab_spot_df,
    rab_cells_barcodes,
    rab_cells_properties,
) = issa.segment.get_barcode_in_cells(
    project,
    mouse,
    error_correction_ds_name,
    valid_chambers=None,
    save_folder=None,
    verbose=True,
)

In [None]:
rab_cells_properties.head()

In [None]:
seq = np.vstack(rab_spot_df.sequence.values)
seq.shape

In [None]:
uniq_seq, counts = np.unique(seq, return_counts=True, axis=0)

In [None]:
uniq_seq

In [None]:
prop = np.cumsum(np.sort(counts)[::-1]) / np.sum(counts)
plt.plot(prop)

In [None]:
main_seq = uniq_seq[:30, :]
main_seq.shape

In [None]:
main_seq.shape[1]

In [None]:
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage, dendrogram

distance_matrix = pdist(main_seq, metric="hamming")
distance_matrix.shape

In [None]:
seq_label = []
for s in main_seq:
    dst = np.sum((seq - s) == 0, axis=1)
    seq_label.append(rab_spot_df.loc[dst == 0].iloc[0].bases)

In [None]:
linked = linkage(distance_matrix, "ward")

# 3. (Optional) Visualize the dendrogram.
plt.figure(figsize=(10, 7))
dendrogram(
    linked,
    orientation="top",
    distance_sort="descending",
    labels=seq_label,
    show_leaf_counts=True,
)
plt.title("Hierarchical Clustering Dendrogram (Hamming Distance)")
plt.xlabel("Sequences")
plt.ylabel("Distance (Hamming Fraction)")
plt.show()

In [None]:
37889 / len(seq) * 100

In [None]:
plt.plot(np.sort(counts)[::-1])
plt.semilogy()
plt.xlabel("Sequence index")
plt.ylabel("Rolony count")

In [None]:
(
    mcherry_cell_properties,
    rabies_spot_df,
    rabies_cell_properties,
) = issa.segment.match_starter_to_barcodes(
    project="becalia_rabies_barseq",
    mouse="BRAC8498.3e",
    rabies_cell_properties=rab_cells_properties,
    rab_spot_df=rab_spot_df,
    mcherry_cells=None,  ################################### USING CURATED MCHERRY CELLS BY DEFAULT AND USING MASK METHOD NOT SPOT
    which="curated",
    method="masks",
    verbose=True,
    max_starter_distance=0.5,  # distance to mask borders in um
    min_spot_number=3,  # minimum number of spots to consider a mcherry cells starter
    min_percentage_in_mask=10,  # minimum percentage of spots in mask to consider a mcherry cells starter
    mcherry_prefix="mCherry_1",  # acquisition prefix for mCherry images
)

In [None]:
starters = rabies_cell_properties.query("is_starter == True")
print(f"{len(starters)}/{len(rabies_cell_properties)} starter cells")

In [None]:
442 + 89 + 19 + 5 + 7 + 11

In [None]:
bcs = rabies_cell_properties.all_barcodes.map(set)

In [None]:
bcs = rabies_cell_properties.all_barcodes.map(set)
all_bcs = set()
for bc in bcs.values:
    all_bcs = all_bcs.union(bc)
print(len(all_bcs))

In [None]:
ncellperbc = np.zeros(len(all_bcs))
nstarterperbc = np.zeros(len(all_bcs))
for ibc, bc in enumerate(all_bcs):
    valid_cells = rabies_cell_properties.all_barcodes.map(
        lambda x: len(x.intersetion(bc)) > 0
    )
    ncellperbc[ibc] = valid_cells.sum()
    nstarterperbc[ibc] = (valid_cells & rabies_cell_properties.is_starter).sum()

In [None]:
np.__version__

In [None]:
mcherry_cell_properties, rab_spot_df, rabies_cell_properties = (
    issa.segment.match_starter_to_barcodes(
        project,
        mouse,
        rab_cells_properties,
        rab_spot_df,
        mcherry_cells=None,
        verbose=True,
        max_starter_distance=0.5,  # distance to mask borders in um
        min_spot_number=3,  # minimum number of spots to consider a mcherry cells starter
        min_percentage_in_mask=10,  # minimum percentage of spots in mask to consider a mcherry cells starter
        mcherry_prefix="mCherry_1",  # acquisition prefix for mCherry images
    )
)

In [None]:
rabies_cell_properties

In [None]:
starters_df = rabies_cell_properties.query("is_starter == True")

In [None]:
all_barcodes = set()
for bc in rabies_cell_properties.all_barcodes.values:
    all_barcodes = all_barcodes.union(list(bc))
print(f"{len(all_barcodes)} unique barcodes")

In [None]:
rabies_cell_properties.all_barcodes.isna().sum()

# Check assignment properties

How many spots per cell? How many barcode per cell?

In [None]:
fig = plt.figure(figsize=(5, 3))
ax = fig.add_subplot(111)
ax.hist(
    mcherry_cell_properties.n_barcode_spots,
    bins=np.arange(36) - 0.5,
    log=True,
    cumulative=False,
    color="tomato",
)
ax.set_xlabel("Number of barcode spots")
ax.set_ylabel("Number of mCherry cells")
ax.set_xlim(-0.6, 35)

In [None]:
assigned_spot = rab_spot_df.query("cell_mask > -1")
spot_per_cell = assigned_spot[["mask_uid", "cell_mask"]].groupby("mask_uid").agg(len)
spot_per_cell.columns = ["n_spots"]
spot_per_cell.head()

fig, axes = plt.subplots(1, 2, figsize=(10, 4))
_ = axes[0].hist(
    spot_per_cell.n_spots,
    bins=np.arange(spot_per_cell.n_spots.max() + 1) - 0.5,
    log=True,
)
axes[0].set_xlim(0, spot_per_cell.n_spots.max())
axes[0].set_xlabel("Number of spots per cell")
axes[0].set_ylabel("Number of rabies cells")

_ = axes[1].hist(spot_per_cell.n_spots, bins=np.arange(20) - 0.5)
axes[1].set_xlim(0, 20)
axes[1].set_xlabel("Number of spots per cell")
axes[1].set_ylabel("Number of rabies cells")
axes[1].set_xticks(np.arange(20).astype(int))
fig.tight_layout()

# Now same for mCherry cells

In [None]:
all_bc = rab_spot_df.corrected_bases.unique()

n_rab = np.zeros(len(all_bc))
n_start = np.zeros(len(all_bc))
for ibc, bc in enumerate(all_bc):
    rab_cells = rabies_cell_properties.all_barcodes.map(
        lambda x: any([i == bc for i in x])
    )
    n_rab[ibc] = rab_cells.sum()
    n_start[ibc] = rabies_cell_properties[rab_cells].is_starter.sum()

valid_bc = n_rab > 5
fig, axes = plt.subplots(1, 2, figsize=(10, 3))
# axes[0].hist(n_rab, bins = np.arange(n_rab.max()+1)-0.5, log=True, color='grey')

axes[0].hist(n_rab[valid_bc], bins=np.arange(n_rab.max() + 1) - 0.5, log=True)
axes[0].set_xlabel("Number of rabies cells")
axes[0].set_ylabel("Number of barcodes")
# axes[1].hist(n_start, color='grey', bins = np.arange(n_start.max()+1)-0.5, log=False)
axes[1].hist(
    n_start[valid_bc],
    color="tomato",
    bins=np.arange(n_start.max() + 1) - 0.5,
    log=False,
)
axes[1].set_xlabel("Number of starter cells")
axes[1].set_ylabel("Number of barcodes")
fig.suptitle("Including only barcodes with more than 5 cells (min 3 spots per cell)")

In [None]:
assigned_spots = rab_spot_df.query("cell_mask > -1")
spot_per_cell = assigned_spots[["mask_uid", "cell_mask"]].groupby("mask_uid").agg(len)
spot_per_cell.columns = ["n_spots"]
has_spot_over_mcherry_mask = (
    assigned_spot[["mask_uid", "mcherry_mask_label"]].groupby("mask_uid").agg("sum") > 0
)
valid_cells = spot_per_cell.query("n_spots > 5").index

rab_spot_df_5rol = rab_spot_df.query("cell_mask > -1 and mask_uid in @valid_cells")
rab_cells_properties_5rol = rab_cells_properties.query(
    "mask_uid in @valid_cells"
).copy()
rab_cells_properties_5rol["could_be_starter"] = has_spot_over_mcherry_mask.loc[
    rab_cells_properties_5rol.index
].values

all_bc = rab_spot_df_5rol.corrected_bases.unique()

n_rab = np.zeros(len(all_bc))
n_start = np.zeros(len(all_bc))
for ibc, bc in enumerate(all_bc):
    rab_cells = rab_cells_properties_5rol.all_barcodes.map(
        lambda x: any([i == bc for i in x])
    )
    n_rab[ibc] = rab_cells.sum()
    n_start[ibc] = rab_cells_properties_5rol[rab_cells].could_be_starter.sum()

valid_bc = n_rab > 5
fig, axes = plt.subplots(1, 2, figsize=(10, 3))
# axes[0].hist(n_rab, bins = np.arange(n_rab.max()+1)-0.5, log=True, color='grey')
print(f"{n_rab[valid_bc].sum()} cells")
print(f"{n_start[valid_bc].sum()} starters")
axes[0].hist(n_rab[valid_bc], bins=np.arange(n_rab.max() + 1) - 0.5, log=True)
axes[0].set_xlabel("Number of rabies cells")
axes[0].set_ylabel("Number of barcodes")
# axes[1].hist(n_start, color='grey', bins = np.arange(n_start.max()+1)-0.5, log=False)
axes[1].hist(
    n_start[valid_bc],
    color="tomato",
    bins=np.arange(n_start.max() + 1) - 0.5,
    log=False,
)
axes[1].set_xlabel("Number of starter cells")
axes[1].set_ylabel("Number of barcodes")
fig.suptitle(
    "Including only barcodes with more than 5 cells (min 5 spots per cell) - 1 spot over mCherry"
)

In [None]:
assigned_spots = rab_spot_df.query("cell_mask > -1")
spot_per_cell = assigned_spots[["mask_uid", "cell_mask"]].groupby("mask_uid").agg(len)
spot_per_cell.columns = ["n_spots"]
valid_cells = spot_per_cell.query("n_spots > 5").index

rab_spot_df_5rol = rab_spot_df.query("cell_mask > -1 and mask_uid in @valid_cells")
rab_cells_properties_5rol = rab_cells_properties.query("mask_uid in @valid_cells")

all_bc = rab_spot_df_5rol.corrected_bases.unique()

n_rab = np.zeros(len(all_bc))
n_start = np.zeros(len(all_bc))
for ibc, bc in enumerate(all_bc):
    rab_cells = rab_cells_properties_5rol.all_barcodes.map(
        lambda x: any([i == bc for i in x])
    )
    n_rab[ibc] = rab_cells.sum()
    n_start[ibc] = rab_cells_properties_5rol[rab_cells].is_starter.sum()

valid_bc = n_rab > 5
fig, axes = plt.subplots(1, 2, figsize=(10, 3))
# axes[0].hist(n_rab, bins = np.arange(n_rab.max()+1)-0.5, log=True, color='grey')
print(f"{n_rab[valid_bc].sum()} cells")
print(f"{n_start[valid_bc].sum()} starters")
axes[0].hist(n_rab[valid_bc], bins=np.arange(n_rab.max() + 1) - 0.5, log=True)
axes[0].set_xlabel("Number of rabies cells")
axes[0].set_ylabel("Number of barcodes")
# axes[1].hist(n_start, color='grey', bins = np.arange(n_start.max()+1)-0.5, log=False)
axes[1].hist(
    n_start[valid_bc],
    color="tomato",
    bins=np.arange(n_start.max() + 1) - 0.5,
    log=False,
)
axes[1].set_xlabel("Number of starter cells")
axes[1].set_ylabel("Number of barcodes")
fig.suptitle("Including only barcodes with more than 5 cells (min 5 spots per cell)")

In [None]:
# Distance to starter betweem cells in the same slice
import itertools

starter2starter_distances = []
starter2presynaptic_distances = []
# concatenate chambera and roi to make a slice_uid
rab_cells_properties_5rol = rab_cells_properties_5rol.copy()
rab_cells_properties_5rol["slice_uid"] = (
    rab_cells_properties_5rol.chamber + "_" + rab_cells_properties_5rol.roi.astype(str)
)
for bc, bc_df in rab_cells_properties_5rol.groupby("main_barcode"):
    n_starter = bc_df.is_starter.sum()
    if not n_starter:
        continue
    starter_cells = bc_df.query("is_starter")
    if len(starter_cells) > 1:
        for slice, starters in starter_cells.groupby("slice_uid"):
            for combi in itertools.combinations(starters.index, 2):
                coords = starters.loc[combi, ["ara_x", "ara_y", "ara_z"]]
                starter2starter_distances.append(
                    np.linalg.norm(np.diff(coords.values, axis=0))
                )
    elif len(starter_cells):
        starter_coords = starter_cells[["ara_x", "ara_y", "ara_z"]].values
        slice_uid = starter_cells.slice_uid.values[0]
        valid = bc_df.query("slice_uid == @slice_uid")
        valid = valid[valid.index != starter_cells.index[0]]
        cell_coords = valid[["ara_x", "ara_y", "ara_z"]].values
        if len(cell_coords):
            starter2presynaptic_distances.append(
                np.linalg.norm(starter_coords - cell_coords)
            )

starter2starter_distances = np.array(starter2starter_distances) * 1000
starter2presynaptic_distances = np.array(starter2presynaptic_distances) * 1000

h, b, p = plt.hist(
    starter2presynaptic_distances,
    bins=100,
    log=False,
    label="Presynaptic cells",
    alpha=0.5,
)
_ = plt.hist(
    starter2starter_distances, bins=b, log=False, alpha=0.5, label="Starter cells"
)
plt.xlabel("Distance (um)")
plt.legend()
plt.ylabel("Number of cells")
plt.suptitle("Distance to starter cells (same slice)")

In [None]:
# Distance to starter betweem cells in the same slice
import itertools

starter2starter_distances = []
starter2presynaptic_distances = []
n2more = 0
n1 = 0
n0 = 0
# concatenate chambera and roi to make a slice_uid
rab_cells_properties_5rol = rab_cells_properties_5rol.copy()
rab_cells_properties_5rol["slice_uid"] = (
    rab_cells_properties_5rol.chamber + "_" + rab_cells_properties_5rol.roi.astype(str)
)
for bc, bc_df in rab_cells_properties_5rol.groupby("main_barcode"):
    n_starter = bc_df.is_starter.sum()
    if not n_starter:
        continue
    starter_cells = bc_df.query("is_starter")
    if len(starter_cells) == 2:
        n2more += 1
        for combi in itertools.combinations(starter_cells.index, 2):
            coords = starter_cells.loc[combi, ["ara_x", "ara_y", "ara_z"]]
            starter2starter_distances.append(
                np.linalg.norm(np.diff(coords.values, axis=0))
            )
    elif len(starter_cells) == 1:
        n1 += 1
        starter_coords = starter_cells[["ara_x", "ara_y", "ara_z"]].values
        valid = bc_df[bc_df.index != starter_cells.index[0]]
        cell_coords = valid[["ara_x", "ara_y", "ara_z"]].values
        if len(cell_coords):
            starter2presynaptic_distances.append(
                np.linalg.norm(starter_coords - cell_coords)
            )
    else:
        n0 += 1

print(f"More than 1 starter: {n2more}")
print(f"1 starter: {n1}")
print(f"0 starter: {n0}")
starter2starter_distances = np.array(starter2starter_distances) * 1000
starter2presynaptic_distances = np.array(starter2presynaptic_distances) * 1000

h, b, p = plt.hist(
    starter2presynaptic_distances,
    bins=100,
    log=False,
    label="Presynaptic cells",
    alpha=0.5,
)
_ = plt.hist(
    starter2starter_distances, bins=b, log=False, alpha=0.5, label="Starter cells"
)
plt.xlabel("Distance (um)")
plt.legend()
plt.ylabel("Number of cells")
plt.suptitle("Distance to starter cells (all slices)")

# Debug probabilistic model for rolonie attribution

Cells to run it slowly and see how it works

In [None]:
# get the mask
project = "becalia_rabies_barseq"
mouse_name = "BRAC8498.3e"
data_path = f"{project}/{mouse_name}"
chamber = "chamber_08"
prefix = "hybridisation_round_3_1"
roi = 1

from iss_preprocess.pipeline.segment import get_cell_masks

masks = get_cell_masks(data_path=f"{project}/{mouse_name}/{chamber}", roi=roi)

In [None]:
sdf = issp.pipeline.stitch.stitch_cell_dataframes(
    data_path=f"{project}/{mouse}/chamber_08", prefix="mCherry_1", ref_prefix=None
)

In [None]:
fig = plt.figure(figsize=(20, 10))
for roi in range(1, 11):
    ax = plt.subplot(2, 5, roi, aspect="equal")
    sp = rab_spot_df.query(f"roi == {roi} & chamber == 'chamber_08'")
    plt.scatter(sp.x, sp.y, s=1, color="k", alpha=0.5)
    s = sdf.query(f"roi == {roi}")
    plt.scatter(s.x, s.y, s=10, color="r", alpha=1)
    ax.set_axis_off()
fig.tight_layout()

In [None]:
error_correction_ds_name

In [None]:
from iss_analysis import segment

(
    rab_spot_df,
    rab_cells_barcodes,
    rab_cells_properties,
) = issa.segment.get_barcode_in_cells(
    project,
    mouse,
    error_correction_ds_name,
    valid_chambers=None,
    save_folder=None,
    verbose=True,
)

# DEBUG MATCH STARTER

In [None]:
rab_cells_properties.mask_uid

In [None]:
mcherry_cell_properties.is_starter

In [None]:
mouse = "BRAC8498.3e"

In [None]:
starter_per_dist = dict()
multi_assignment = dict()
for max_dst in np.arange(2, 15, 0.2):
    rabies_cell_properties, mcherry_cell_properties = (
        issa.segment.match_starter_to_barcodes(
            project,
            mouse,
            rab_cells_properties,
            rab_spot_df,
            mcherry_cells=None,
            redo=True,
            verbose=False,
            max_starter_distance=max_dst,
            min_spot_number=4,
        )
    )
    nst = mcherry_cell_properties.is_starter.value_counts()
    starter_per_dist[max_dst] = nst[True]
    multi_assignment[max_dst] = (
        rabies_cell_properties[rabies_cell_properties.is_starter].distance2mcherry.map(
            len
        )
        > 1
    ).sum()
    print(f"Min spot: {max_dst}, n starter: {nst[True]}/{nst.sum()}")

In [None]:
fig = plt.figure(figsize=(7, 3))
ax = plt.subplot(121)
ax.set_xlabel("Search radius (um)")
ax.set_ylabel("# of mcherry classified as starter")
ax.axvline(5, color="black", linestyle="--", label="5 um")
plt.plot(*zip(*starter_per_dist.items()), ".", label="n starters")

ax = plt.subplot(122)
ax.plot(*zip(*multi_assignment.items()), ".", label="multi assignment", color="red")
ax.set_ylabel("# of rabies cell overlaping\nwith multiple mcherry")
ax.set_xlabel("Search radius (um)")
ax.axvline(5, color="black", linestyle="--", label="5 um")
plt.tight_layout()

In [None]:
chamber = "chamber_07"
roi = 4

mch = out.query(f'chamber == "{chamber}" and roi == {roi}')
spots = rab_spot_df.query(f'chamber == "{chamber}" and roi == {roi}')
spots = spots.query("cell_mask > 1").copy()
barcode = spots.corrected_bases.unique()
spots["barcode_id"] = spots.corrected_bases.map({b: i for i, b in enumerate(barcode)})

fig = plt.figure(figsize=(20, 10))
for i in range(2):
    ax = fig.add_subplot(1, 2, 1 + i, aspect="equal")

    if i:
        for i, row in mch.iterrows():
            # ax.text(row.x, row.y, str(i), fontsize=8, color='k')
            circl = plt.Circle((row.x, row.y), 5 / 0.2, fill=False, color="k")
            ax.add_artist(circl)
    plt.scatter(
        mch.x,
        mch.y,
        c=mch.is_starter,
        edgecolors="k",
        s=100,
        cmap="coolwarm",
        alpha=0.5,
    )
    plt.scatter(
        spots.x,
        spots.y,
        ec="k" if i else "None",
        c=spots.barcode_id % 20,
        s=20,
        cmap="tab20",
        alpha=0.5,
    )

    if i == 0:
        ax.set_xlim(mch.x.min() - 100, mch.x.max() + 100)
        ax.set_ylim(mch.y.min() - 100, mch.y.max() + 100)
    else:
        ax.set_xlim(13700, 15000)
        ax.set_ylim(15000, 16000)
    ax.set_xticks([])
    ax.set_yticks([])
fig.tight_layout()

In [None]:
20 * 0.2

In [None]:
rabies_cell_properties = rab_cells_properties
starters = None
verbose = True
redo = True

In [None]:
manual_click = (
    issp.io.get_processed_path(f"{project}/{mouse}") / "analysis" / "starter_cells"
)
if starters is None:
    starters = issa.io.get_starter_cells(project, mouse)

rabies_cell_properties["starter"] = False
rabies_cell_properties["starter_id"] = "none"
rabies_cell_properties["distance"] = np.nan
for (ch, roi), starter_df in starters.groupby(["chamber", "roi"]):
    # XXX: TEMPORARY FIX
    if ch != "chamber_07":
        continue
    if roi != 1:
        continue
    # XXX: TEMPORARY FIX end
    fname = manual_click / f"rabies_cells_{mouse}_{ch}_roi_{roi}.pkl"
    if (not redo) and fname.exists():
        if verbose:
            print(f"Loading {fname}")
        rab_this_roi = pd.read_pickle(fname)
        try:
            rabies_cell_properties.loc[rab_this_roi.index, "starter"] = rab_this_roi[
                "starter"
            ]
        except KeyError:
            raise ValueError("Mask IDs have changed. Use `redo` to update matching")
        rabies_cell_properties.loc[rab_this_roi.index, "starter_id"] = rab_this_roi[
            "starter_id"
        ]
        continue
    if verbose:
        print(f"Determining starter cell for {ch} {roi}")
        # get the masks
        print("Loading masks")
    # XXX: TEMPORARY FIX
    if ch != "chamber_07":
        cell_masks = issp.segment.get_cell_masks(f"{project}/{mouse}/{ch}", roi)
    else:
        print("Loading masks manually")
        # manually load the masks
        p = Path(
            "/Users/blota/Data/processed/becalia_rabies_barseq/temp/BRAC8498.3e_chamber_07_2_all_cells_mask.tif"
        )
        cell_masks = issp.io.load_stack(p)[..., 0]
    # XXX: TEMPORARY FIX END

    rab_cells_slice = rabies_cell_properties[
        (rabies_cell_properties.roi == roi) & (rabies_cell_properties.chamber == ch)
    ].copy()
    if verbose:
        print("Finding starter cell")

In [None]:
for st_id, st in starter_df.iterrows():
    mask_id = cell_masks[int(st.y), int(st.x)]
    mask_uid = f"{ch}_{roi}_{int(mask_id)}"
    if mask_uid in rabies_cell_properties.index:
        assert (
            rabies_cell_properties.loc[mask_uid, "starter"] == False
        ), "Starter already assigned"
        rabies_cell_properties.loc[mask_uid, "starter"] = True
        rabies_cell_properties.loc[mask_uid, "starter_id"] = st_id
        rabies_cell_properties.loc[mask_uid, "distance"] = 0
        continue
    # we did not find the mask in the rabies_cell_properties
    if verbose:
        if mask_id == 0:
            print(f"Starter {st_id} not above any mask")
        else:
            print(f"Starter {st_id} in mask {mask_uid}, which is not a rabies cell.")
    # find the closest mask
    # create a temporary column d2thisstarter
    rab_cells_slice["d2thisstarter"] = np.sqrt(
        (rab_cells_slice.x - st.x) ** 2 + (rab_cells_slice.y - st.y) ** 2
    )
    mask_uid = rab_cells_slice.d2thisstarter.idxmin()
    distance = rab_cells_slice.loc[mask_uid, "d2thisstarter"]
    assert (
        rabies_cell_properties.loc[mask_uid, "starter"] == False
    ), "Starter already assigned"
    rabies_cell_properties.loc[mask_uid, "starter"] = True
    rabies_cell_properties.loc[mask_uid, "starter_id"] = st_id
    rabies_cell_properties.loc[mask_uid, "distance"] = 0

In [None]:
rabies_cell_properties.loc[mask_uid]

# END DEBUG MATCH STARTER

In [None]:
rab_cells_properties = issa.segment.match_starter_to_barcodes(
    project,
    mouse,
    rab_cells_properties,
    rab_spot_df,
    mcherry_cells=None,
    redo=True,
    verbose=True,
)

In [None]:
if False:
    dapi = issp.pipeline.stitch_registered(
        data_path + f"/{chamber}",
        prefix=prefix,
        roi=roi,
        projection=None,  # "corrected",
        channels=[3, 1],
    )

In [None]:
mask_centers = issp.pipeline.segment.make_cell_dataframe(
    f"{project}/{mouse_name}/{chamber}",
    roi,
    masks=masks,
    atlas_size=None,
)

In [None]:
barcode_roi = rab_spot_df[
    (rab_spot_df.chamber == chamber) & (rab_spot_df.roi == roi)
].copy()
plt.subplot(1, 1, 1, aspect="equal")
plt.scatter(mask_centers.x, mask_centers.y, c="purple", s=20, alpha=0.5)
plt.scatter(barcode_roi.x, barcode_roi.y, c="k", s=1, alpha=0.5)
plt.ylim(plt.ylim()[::-1])

In [None]:
nc = 20
ma = masks.astype(float)
ma[ma == 0] = np.nan

In [None]:
from iss_analysis import vis

infiteloop = dict(m=0.1, spot_prior=0.0001, spot_distribution_sigma=50)
# run on a part of the data
xlim = np.array([10200, 11000]).astype(int)
ylim = np.array([9000, 9800]).astype(int)

mc = vis.get_spot_part(mask_centers, xlim, ylim, return_mask=False)

In [None]:
from iss_analysis import vis

fig = plt.figure(figsize=(20, 20))
ax = plt.subplot(1, 1, 1, aspect="equal")
bc, valid_sp = vis.get_spot_part(barcode_roi, xlim, ylim, return_mask=True)
mask_assignment = bc.cell_mask.values.astype(int)

vis.plot_bc_over_mask(
    ax,
    ma,
    bc,
    mask_assignment,
    xlim,
    ylim,
    nc=10,
    show_bg_barcodes=True,
    mask_alpha=0.5,
    line2mask=True,
    mask_centers=mc,
)

In [None]:
def plot_bc_over_mask(ax, ma, bc, mask_assignment, xlim, ylim, nc=12):
    im = ax.imshow(
        get_stack_part(ma, xlim, ylim),
        alpha=0.5,
        cmap="Set3",
        vmin=0,
        vmax=nc,
        interpolation="none",
    )
    # centroids = get_spot_part()
    sp_col = mask_assignment % nc
    too_far = mask_assignment == -2
    background = mask_assignment == -1
    assigned = mask_assignment >= 0
    barcodes = list(bc.corrected_bases.unique())
    bc_color = np.array([barcodes.index(b) for b in bc.corrected_bases]).astype(int)
    ax.scatter(
        bc.x.values[too_far] - xlim[0],
        bc.y.values[too_far] - ylim[0],
        color="w",
        edgecolors="k",
        linewidths=0.2,
        s=5,
        alpha=0.5,
    )
    ax.scatter(
        bc.x.values[background] - xlim[0],
        bc.y.values[background] - ylim[0],
        s=5,
        alpha=0.5,
        color="k",
        edgecolors="k",
    )
    ax.scatter(
        bc.x.values[assigned] - xlim[0],
        bc.y.values[assigned] - ylim[0],
        c=bc_color[assigned] % 20,
        s=30,
        alpha=1,
        cmap="Set3",
        vmin=0,
        vmax=nc,
        marker="o",
        facecolors="none",
    )
    ax.scatter(
        bc.x.values[assigned] - xlim[0],
        bc.y.values[assigned] - ylim[0],
        c=sp_col[assigned],
        s=20,
        alpha=0.5,
        cmap="Set3",
        vmin=0,
        vmax=nc,
        edgecolors="none",
    )
    ax.set_xticks([])
    ax.set_yticks([])

In [None]:
fig, axes = plt.subplots(
    len(mask_assignment_id), 2, figsize=(10, len(mask_assignment_id) * 5), dpi=300
)
previous = None
for iax, assi in enumerate(mask_assignment_id):
    mask_assignment = np.array(mc.index[assi])
    mask_assignment[assi == -1] = -1
    mask_assignment[assi == -2] = -2
    ax = axes[iax, 0]
    ax.set_ylabel("Iteration %d" % iax)
    plot_bc_over_mask(ax, ma, bc, mask_assignment, xlim, ylim, nc=12)
    if previous is None:
        previous = mask_assignment
    changed = mask_assignment != previous
    mask_a = mask_assignment.copy()
    mask_a[~changed] = -2
    plot_bc_over_mask(axes[iax, 1], ma, bc, mask_a, xlim, ylim, nc=12)
    previous = mask_assignment

axes[0, 0].set_title("Assigned barcodes")
axes[0, 1].set_title("Changed barcodes")
fig.tight_layout()

In [None]:
import warnings

warnings.filterwarnings("error")

# run on a part of the data
xlim = np.array([10200, 11000]).astype(int)
ylim = np.array([9000, 9800]).astype(int)
bc, valid_sp = get_spot_part(barcode_roi, xlim, ylim, return_mask=True)
mc = get_spot_part(mask_centers, xlim, ylim, return_mask=False)

p = 0.9
m = 0.1
background_spot_prior = 0.0001
spot_distribution_sigma = 50
distance_threshold = 200

spot_distribution_sigma = 50
fig, axes = plt.subplots(3, 3, figsize=(20, 20))


for ix, spot_distribution_sigma in enumerate([25, 50, 100]):
    for iy, p in enumerate([0.7, 0.8, 0.9]):
        ax = axes[ix, iy]
        ax.set_aspect("equal")
        if ix == 0:
            ax.set_title(f"p={p}")
        if iy == 0:
            ax.set_ylabel(f"spot_distribution_sigma={spot_distribution_sigma}")
        params = dict(
            p=p,
            m=m,
            background_spot_prior=background_spot_prior,
            spot_distribution_sigma=spot_distribution_sigma,
            distance_threshold=200,
        )
        mask_assignment_id = issa.barcodes.barcodes.assign_barcodes_to_masks(
            spots=bc,
            masks=mc,
            max_iterations=100,
            verbose=True,
            base_column="corrected_bases",
            **params,
        )
        mask_assignment = np.array(mc.index[mask_assignment_id])
        mask_assignment[mask_assignment_id == -1] = -1
        mask_assignment[mask_assignment_id == -2] = -2
        plot_bc_over_mask(ax, ma, bc, mask_assignment, xlim, ylim, nc=12)
fig.tight_layout()
fig.savefig(analysis_folder / f"{chamber}_{roi}_barcode_assignment.png", dpi=300)

In [None]:
fig = plt.figure(figsize=(20, 20))
xlim = np.array([9000, 9500]).astype(int)
ylim = np.array([6000, 7000]).astype(int)
ax = fig.add_subplot(111, aspect="equal")
st = get_stack_part(dapi, xlim, ylim)
rgb = issp.vis.to_rgb(
    st,
    colors=[(0, 0, 1), (1, 0, 0)],
    vmin=np.nanpercentile(st, 1, axis=(0, 1)),
    vmax=np.nanpercentile(st, 99, axis=(0, 1)),
)
im = ax.imshow(rgb)

In [None]:
from pathlib import Path

ch_path = "becalia_rabies_barseq/BRAC8498.3e/chamber_07"
issp.pipeline.segment.remove_all_duplicate_masks(
    data_path=ch_path,
    prefix="hybridisation_round_3_1",
    upper_overlap_thresh=0.3,
)

In [None]:
for i, seq in enumerate(["AACTAAGCATAGAA", "CCTCTTACATAGCT"]):
    c = bc[bc.corrected_bases == seq]
    mat = np.vstack(c.sequence.values)
    print(mat.shape)
    all_base = []
    for base in range(4):
        v = mat == base
        _, c = np.where(v)
        all_base.append(c)
    plt.subplot(2, 1, 1 + i)
    _ = plt.hist(all_base, bins=np.arange(0, 15) - 0.5, stacked=True)

In [None]:
vc = bc[(bc.chamber == chamber) & (bc.roi == roi)].corrected_bases.value_counts()
vc.head()

In [None]:
roi_bc = bc[(bc.chamber == chamber) & (bc.roi == roi)]
plt.subplot(1, 1, 1, aspect="equal")
plt.scatter(
    roi_bc[roi_bc.corrected_bases == "CCTACATCATAATA"].x,
    roi_bc[roi_bc.corrected_bases == "CCTACATCATAATA"].y,
    c="r",
    s=5,
    alpha=0.1,
)
plt.xlim(2000, 7000)
plt.ylim(10000, 15000)
plt.ylim(plt.ylim()[::-1])

In [None]:
# Load the data we want
edit_distance = 2
nrounds = 10

weights = np.ones(14)
weights[nrounds:] = 0
barcode_spots = issa.barcodes.error_correct_acquisition(
    project,
    mouse,
    n_components=3,
    valid_components=(1, 2),
    mean_intensity_threshold=0.01,
    dot_product_score_threshold=0.2,
    mean_score_threshold=0.75,
    max_edit_distance=edit_distance,
    weights=list(weights),
    minimum_match=max(np.sum(weights) - edit_distance - 1, 0),
    verbose=True,
    conflicts="skip",
    use_slurm=False,
)
barcode_spots.head()

In [None]:
threshold = dict(
    mean_intensity=0.03,
    dot_product_score=0.2,
    mean_score=0.9,
)
use_gmm = (False,)
n_components = (1,)
valid_components = ((0),)

if False:
    # old params
    valid_components = [1, 2]
    thresholds = dict(mean_intensity=0.01, dot_product_score=0.2, mean_score=0.5)
    n_components = 3
    use_gmm = True

barcode_spots, gmm, all_barcode_spots = bar.get_barcodes(
    acquisition_folder=data_path,
    n_components=n_components,
    valid_components=valid_components,
    use_gmm=use_gmm,
    **{f"{k}_threshold": v for k, v in thresholds.items()},
)

pg = plot_gmm_clusters(all_barcode_spots, gmm, thresholds=thresholds)
fig = pg.figure
fig.savefig(analysis_folder / "gmm_clusters.png")

## Plot proportion of barcodes in each cluster

In [None]:
ch_r_gp = all_barcode_spots.groupby(["chamber", "roi"])
n_rois = len(ch_r_gp)
n_comp = len(all_barcode_spots.gmm_label.unique())
n_per_lab = np.zeros((n_rois, n_comp))

for i, (ch_r, df) in enumerate(ch_r_gp):
    n_per_lab[i] = df.groupby("gmm_label").size()

fig = plt.figure(figsize=(20, 10))
ax = fig.add_subplot(2, 1, 1)
ax.plot(np.arange(n_rois), n_per_lab, "o-")
ax.set_xticks(np.arange(n_rois))
ax.set_xticklabels([])
ax.set_ylabel("Number of spots")
ax = fig.add_subplot(2, 1, 2)
ax.plot(np.arange(n_rois), n_per_lab / np.sum(n_per_lab, axis=1, keepdims=True), "o-")
ax.set_xticks(np.arange(n_rois))
ax.set_ylabel("Fraction of spots")
_ = ax.set_xticklabels([f"{ch}_{r}" for ch, r in ch_r_gp.groups.keys()], rotation=90)
fig.tight_layout()

## Plot one example chamber

In [None]:
chamber = 9
roi = 9

import matplotlib.pyplot as plt

df = all_barcode_spots[
    (all_barcode_spots["chamber"] == f"chamber_{chamber:02}")
    & (all_barcode_spots["roi"] == roi)
]
for i in range(n_comp):
    fig = plt.figure(figsize=(20, 10), facecolor="black")
    ax = fig.add_subplot(1, 1, 1, aspect="equal", facecolor="black")
    ax.scatter(
        df[df["gmm_label"] == i]["x"],
        df[df["gmm_label"] == i]["y"],
        label=f"cluster {i}",
        alpha=0.2,
        s=1,
        color=f"C{i}",
    )
    ax.set_ylim(ax.get_ylim()[::-1])
plt.legend()
_ = ax.axis("off")

In [None]:
barcode_spots.gmm_label.value_counts()

# Error correction

## Estimate error rate along sequence

In [None]:
errors_along_seq = bar.error_per_round(
    spot_df=barcode_spots,
    edit_distance=1,
    spot_count_threshold=30,
    sequence_column="sequence",
    filter_column="bases",
)

In [None]:
figs = plot_error_along_sequence(
    errors_along_seq, nrows=2, plot_matrix=False, marker="o"
)

## Manually filter out missing data

In [None]:
# This is INDEX of the round, NOT the number as written in the file name, so 0 based.
missing_roi = dict(chamber_09={8: 2, 9: 2}, chamber_08={8: 10, 9: 10})
missing_tiles = dict(chamber_08={"10_2_4": 5})

In [None]:
clean_barcodes = barcode_spots.copy()

print("Removing rounds with missing rois")
for chamber, rois in missing_roi.items():
    for roi, round_ in rois.items():
        print(f"Removing round {round_} from {chamber}, roi {roi}")
        bad = (clean_barcodes["chamber"] == chamber) & (clean_barcodes["roi"] == roi)
        mask = np.zeros(len(clean_barcodes.iloc[0]["sequence"]), dtype=float)
        mask[round_] = np.nan
        c = clean_barcodes.loc[bad, "sequence"].map(lambda x: x + mask)
        clean_barcodes.loc[bad, "sequence"] = c
        c = clean_barcodes.loc[bad, "bases"].map(
            lambda x: x[:round_] + "N" + x[round_ + 1 :]
        )
        clean_barcodes.loc[bad, "bases"] = c
print("Removing rounds with missing tiles")
for chamber, tiles in missing_tiles.items():
    for tile, round_ in tiles.items():
        print(f"Removing round {round_} from {chamber}, tile {tile}")
        bad = (clean_barcodes["chamber"] == chamber) & (clean_barcodes["tile"] == tile)
        mask = np.zeros(len(clean_barcodes.iloc[0]["sequence"]), dtype=float)
        mask[round_] = np.nan
        c = clean_barcodes.loc[bad, "sequence"].map(lambda x: x + mask)
        clean_barcodes.loc[bad, "sequence"] = c
        c = clean_barcodes.loc[bad, "bases"].map(
            lambda x: x[:round_] + "N" + x[round_ + 1 :]
        )
        clean_barcodes.loc[bad, "bases"] = c

In [None]:
errors_along_seq = bar.error_per_round(
    spot_df=clean_barcodes, edit_distance=1, spot_count_threshold=30
)
figs = plot_error_along_sequence(
    errors_along_seq, nrows=2, plot_matrix=False, marker="o"
)

## Actual error correction part

In [None]:
# save clean barcode to reload later
clean_barcodes.to_pickle(analysis_folder / "clean_barcodes.pkl")

In [None]:
edist = 3
fname = analysis_folder / f"corrected_barcode_spots_edit{edist}.pkl"
err_corr = pd.read_pickle(fname)
non_corr = err_corr.bases.value_counts()
corr = err_corr.corrected_bases.value_counts()

In [None]:
import pandas as pd

redo = False
weights = np.ones(14)
weights[10:] = 0
err_corr_edt = dict()
use_slurm = True

for edist in [2]:
    print(f"Correcting barcodes with edit distance {edist}")
    fname = analysis_folder / f"corrected_barcode_spots_edit{edist}.pkl"
    if redo or not fname.exists():
        err_corr, merge_dict = bar.correct_barcode_sequences(
            analysis_folder / "clean_barcodes.pkl",
            max_edit_distance=edist,
            weights=weights,
            return_merge_dict=True,
            save_path=fname,
        )
        err_corr.to_pickle(fname)
    else:
        err_corr = pd.read_pickle(fname)
    non_corr = err_corr.bases.value_counts()
    corr = err_corr.corrected_bases.value_counts()
    print(f"Number of unique sequences before correction: {len(non_corr)}")
    print(f"Number of unique sequences after correction: {len(corr)}")
    err_corr_edt[edist] = err_corr

In [None]:
# counting number of unique sequence before correction
sequences = np.stack(clean_barcodes["sequence"].to_numpy())
seq_no_nan = np.nan_to_num(sequences, nan=4)
n_unique = np.zeros(seq_no_nan.shape[1])
for i in range(seq_no_nan.shape[1]):
    n_unique[i] = len(np.unique(seq_no_nan[:, :i], axis=0))

In [None]:
plt.plot(np.arange(14), 4 ** np.arange(14), "k--")
plt.plot(np.arange(len(n_unique)), n_unique, "o-")
plt.xlabel("# of rounds")
plt.ylabel("# of unique sequences")
plt.semilogy()
plt.ylim(0.1, 1e6)

## Quality control

In [None]:
for threshold in range(10, 100, 10):
    n_bc = {}
    n_bc_with_N = {}
    for k, v in err_corr_edt.items():
        v["corrected_bases"] = v["corrected_bases"].map(lambda x: "".join(x))
        vc = v["corrected_bases"].value_counts()
        n_bc[k] = vc[vc > threshold].shape[0]
        n_bc_with_N[k] = np.sum(["N" in i for i in vc.index])
    plt.plot(n_bc.keys(), n_bc.values(), "o-", label=f"Aboundance > {threshold}")
plt.ylabel("Number of unique barcodes")
plt.xlabel("Edit distance")
plt.legend()

In [None]:
err_corr["corrected_bases"] = err_corr["corrected_bases"].map(lambda x: "".join(x))
vc = err_corr["corrected_bases"].value_counts()

ab = vc[vc <= 50]

nchamber = len(v.chamber.unique())
nrois = 10
fig, axes = plt.subplots(nrois, nchamber, figsize=(20, 40))
for bci in range(5):
    bc = err_corr[err_corr.corrected_bases == ab.index[bci]]
    for ich, (ch_name, ch) in enumerate(v.groupby("chamber")):
        for iroi, (roi_name, roi) in enumerate(ch.groupby("roi")):
            ax = axes[iroi, ich]
            bcc = bc[(bc.roi == roi_name) & (bc.chamber == ch_name)]
            if not bci:
                ax.scatter(
                    roi["y"], roi["x"], label=f"roi {iroi}", s=1, color="k", alpha=0.05
                )
            ax.scatter(
                bcc["y"], bcc["x"], label=f"roi {iroi}", s=5, color=f"C{bci}", alpha=0.5
            )
            ax.axis("equal")
            ax.axis("off")
fig.tight_layout()

In [None]:
plt.plot(n_bc_with_N.keys(), n_bc_with_N.values(), "o-", label="with N")
plt.ylabel("Number of unique barcodes")
plt.xlabel("Edit distance")
plt.legend()

In [None]:
nan_to_start = np.sum(["N" in i for i in non_corr.index])
nan_after = np.sum(["N" in i for i in corr.index])
print(f"Number of 'unique' sequences with N before correction: {nan_to_start}")
print(f"Number of 'unique' sequences with N after correction: {nan_after}")
print(f"Number of sequences with N corrected: {nan_to_start-nan_after}")

In [None]:
fig = plt.figure(figsize=(7, 3))
fig.suptitle("# Rolonies per sequence")
ax = fig.add_subplot(1, 2, 1)
ax.hist(
    non_corr.values,
    bins=np.arange(0, corr.max(), 10),
    log=True,
    histtype="step",
    label="non-corrected",
)
ax.hist(
    corr.values,
    bins=np.arange(0, corr.max(), 10),
    log=True,
    histtype="step",
    label="corrected",
)
ax = fig.add_subplot(1, 2, 2)
ax.hist(
    non_corr.values,
    bins=np.arange(100),
    log=True,
    label="non-corrected",
    histtype="step",
)
ax.hist(corr.values, bins=np.arange(100), log=True, label="corrected", histtype="step")
ax.legend(loc="upper right")
fig.tight_layout()

In [None]:
fig = plt.figure(figsize=(7, 3))
fig.suptitle("# Rolonies per sequence")
ax = fig.add_subplot(1, 2, 1)
ax.hist(
    non_corr.values,
    bins=np.arange(corr.max()),
    log=False,
    histtype="step",
    label="non-corrected",
    cumulative=True,
    density=True,
)
ax.hist(
    corr.values,
    bins=np.arange(corr.max()),
    log=False,
    histtype="step",
    label="corrected",
    cumulative=True,
    density=True,
)
ax = fig.add_subplot(1, 2, 2)
ax.hist(
    non_corr.values,
    bins=np.arange(100),
    log=False,
    label="non-corrected",
    histtype="step",
    density=True,
)
ax.hist(
    corr.values,
    bins=np.arange(100),
    log=False,
    label="corrected",
    histtype="step",
    density=True,
)
ax.legend(loc="upper right")
fig.tight_layout()

In [None]:
from iss_preprocess.call import BASES

th = 1
good = corr[corr > 30].index
print(len(good))
sequences = np.stack(err_corr["corrected_sequence"].to_numpy())
error_along_sequence = np.zeros((len(good), sequences.shape[1]))
bases = list(BASES) + ["N"]
for ibar, barcode in enumerate(good):
    seq = [bases.index(b) for b in barcode]
    diff = sequences - seq
    edit_distance = np.sum(diff != 0, axis=1)
    actual_errs = edit_distance <= th
    bad_barcode = diff[actual_errs]
    error_along_sequence[ibar] = np.any(bad_barcode != 0, axis=0)

In [None]:
def plot_error_along_seq(error_along_sequence):
    fig = plt.figure(figsize=(10, 5))
    fig.suptitle("Errors along sequence (edit distance 1)")
    ax = fig.add_subplot(1, 2, 1)
    im = ax.imshow(error_along_sequence, aspect="auto")
    ax.set_xlabel("Position")
    ax.set_ylabel("Barcode")
    cb = fig.colorbar(im, ax=ax)
    cb.set_label("Errors")
    ax = fig.add_subplot(1, 2, 2)
    ax.plot(np.mean(error_along_sequence, axis=0))
    ax.set_xlabel("Position")
    ax.set_ylabel("Mean errors")
    fig.tight_layout()
    return fig


f = plot_error_along_seq(error_along_sequence)

In [None]:
non_corr = err_corr.bases.value_counts()
corr = err_corr.corrected_bases.value_counts()
df_non_corr = pd.DataFrame(dict(seq=non_corr.index, cnt=non_corr.values))
df_corr = pd.DataFrame(dict(seq=corr.index, cnt=corr.values))

print(df_non_corr.head())

In [None]:
ok = df_non_corr.cnt > 50
print(df_non_corr[ok].cnt.sum() / df_non_corr.cnt.sum())
ok = df_corr.cnt > 50
print(df_corr[ok].cnt.sum() / df_corr.cnt.sum())

In [None]:
fig = plt.figure(figsize=(5, 3))
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.log(df_non_corr.index), np.log(df_non_corr["cnt"]), label="non-corrected")
ax.plot(np.log(df_corr.index), np.log(df_corr["cnt"]), label="corrected")
plt.legend()
ax.set_xlabel("Rank")
ax.set_ylabel("log(count)")

# Check registration

Save overview plot to check registration

In [None]:
from pathlib import Path

redo = False
if redo:
    chamber = "chamber_10"
    print(chamber)
    for roi in range(1, 11):
        jid = issp.vis.diagnostics.check_barcode_mcherry_reg(
            data_path=data_path + f"/{chamber}",
            roi=roi,
            barcode_prefix="barcode_round_1_1",
            mcherry_prefix="mCherry_1",
            target=analysis_folder / "barcode_mcherry_reg" / f"{chamber}_{roi}.png",
            slurm_folder=Path.home() / "slurm_logs" / data_path,
            scripts_name=f"barcode_mcherry_reg_{chamber}_{roi}",
            use_slurm=True,
            slurm_options=dict(time="3:00:00", mem="72G"),
        )
        print(jid)

In [None]:
data_path

In [None]:
# Look at one tile
data_path = "becalia_rabies_barseq/BRAC8498.3e/"
chamber = "chamber_07"
dpath = data_path + f"/{chamber}"
ops = issp.io.load_ops(dpath)
tile_coors = ops["barcode_ref_tiles"][0]
tile_coors = (9, 2, 5)
# tile_coors = (3,1,4)
stacks = {}
for prefix in [
    "mCherry_1",
    "barcode_round_1_1",
    "hybridisation_round_2_1",
    "mCherry_1_masks",
]:
    projection = "corrected" if "mask" in prefix else None
    stack, _ = issp.pipeline.stitch.load_tile_ref_coors(
        dpath, tile_coors, prefix, filter_r=False, projection=projection
    )
    stacks[prefix] = stack
    print(f"{prefix}: {stack.shape}")

In [None]:
tile_name = "_".join(map(str, tile_coors))
barcodes = err_corr[(err_corr.chamber == chamber) & (err_corr.tile == tile_name)].copy()
tc = issp.pipeline.stitch.get_tile_corners(
    dpath, prefix=ops["reference_prefix"], roi=tile_coors[0]
)
tc = tc[tile_coors[1], tile_coors[2]]
origin = tc.min(axis=1)

barcodes["tile_x"] = barcodes["x"] - origin[1]
barcodes["tile_y"] = barcodes["y"] - origin[0]

In [None]:
# helper plotting functions
def get_stack_part(stack, xlim, ylim):
    ylim = sorted(ylim)
    xlim = sorted(xlim)
    return stack[ylim[0] : ylim[1], xlim[0] : xlim[1]]


def get_spot_part(df, xlim, ylim):
    ylim = sorted(ylim)
    xlim = sorted(xlim)
    return df[
        (df["tile_x"] >= xlim[0])
        & (df["tile_x"] < xlim[1])
        & (df["tile_y"] >= ylim[0])
        & (df["tile_y"] < ylim[1])
    ]

In [None]:
fig = plt.figure(figsize=(10, 10))
xlim = (512, 2460)
ylim = (512, 2460)
mCherry = get_stack_part(stacks["mCherry_1"][..., 2, 0], xlim=xlim, ylim=ylim)
rabies_img = np.nanmax(
    get_stack_part(stacks["barcode_round_1_1"], xlim=xlim, ylim=ylim), axis=(2, 3)
)
dapi = get_stack_part(
    stacks["hybridisation_round_2_1"][..., 3, 0], xlim=xlim, ylim=ylim
)
masks = get_stack_part(
    stacks["mCherry_1_masks"][..., 0, 0], xlim=xlim, ylim=ylim
).astype(float)
masks[masks == 0] = np.nan

ax = fig.add_subplot(1, 1, 1)
st = np.stack([mCherry, rabies_img, dapi], axis=-1)
rgb = issp.vis.to_rgb(
    st,
    colors=[(1, 0, 0), (0, 1, 0), (0, 0, 1)],
    vmax=np.nanpercentile(st, 99.9, axis=(0, 1)),
)
ax.imshow(rgb)
ax.imshow(masks, alpha=0.1, cmap="prism")
ax.contour(~np.isnan(masks), levels=[0.5], colors="w")
bc = get_spot_part(barcodes, xlim, ylim)
ax.scatter(
    bc["tile_x"] - xlim[0],
    bc["tile_y"] - ylim[0],
    s=5,
    ec="purple",
    fc="none",
    marker="o",
    lw=0.5,
)
ax.axis("off")
fig.tight_layout()

# Look at starters

## Select example ROI

In [None]:
# try to make a big registered mask image
chamber = "chamber_08"
roi = 4
print(data_path + f"/{chamber}")

## Load mCherry masks

In [None]:
chamber = "chamber_07"
roi = 8
bigmask = issp.pipeline.stitch_registered(
    data_path + f"/{chamber}",
    prefix="mCherry_1_masks",
    roi=roi,
    projection="corrected",
)
bigmask = issp.pipeline.segment.get_big_masks(
    data_path + f"/{chamber}", roi, bigmask, mask_expansion=5
)[..., 0]

In [None]:
big_rabies = issp.pipeline.stitch_registered(
    data_path + f"/{chamber}",
    prefix="barcode_round_1_1",
    roi=roi,
    projection=None,
)

In [None]:
big_mcherry = issp.pipeline.stitch_registered(
    data_path + f"/{chamber}",
    prefix="mCherry_1",
    roi=roi,
    channels=2,
    projection=None,
)

In [None]:
print(bigmask.shape, big_rabies.shape, big_mcherry.shape)

In [None]:
st = np.dstack([big_mcherry, big_rabies])
rgb = issp.vis.to_rgb(
    st,
    colors=[(1, 0, 0), (0, 1, 0)],
    vmax=np.nanpercentile(st, 99.9, axis=(0, 1)),
)

In [None]:
fig = plt.figure(figsize=(20, 20))
ax = fig.add_subplot(1, 1, 1)
ax.set_title(f"{chamber}, roi {roi}")
ax.imshow(rgb)
m = bigmask.astype(float)
m[m == 0] = np.nan
ax.imshow(m, alpha=0.1, cmap="prism")
ax.contour(~np.isnan(m), levels=[0.5], colors="w")
ax.set_xlim(7500, 18000)
ax.set_ylim(6000, 16000)

In [None]:
redo = False
if redo:
    slurm_folder = Path.home() / "slurm_logs" / data_path / "mcherry_logs"
    print(slurm_folder)
    slurm_folder.mkdir(exist_ok=True)
    for chamber in [f"chamber_{i:02}" for i in range(7, 11)]:
        print(chamber)
        issp.pipeline.segment.remove_all_duplicate_masks(
            data_path + f"/{chamber}",
            prefix="mCherry_1",
            upper_overlap_thresh=0.3,
            use_slurm=True,
            slurm_folder=slurm_folder,
            scripts_name=f"remove_overlapping_{chamber}",
        )

In [None]:
df = err_corr[(err_corr["chamber"] == chamber) & (err_corr["roi"] == roi)].copy()
barcode_in_cells = issp.segment.cells.count_spots(
    df, grouping_column="corrected_bases", masks=bigmask
)
all_cells = np.unique(bigmask)
missing = set(all_cells) - set(barcode_in_cells.index)
missing_df = pd.DataFrame(
    index=list(missing),
    columns=barcode_in_cells.columns,
    data=np.zeros((len(missing), len(barcode_in_cells.columns))),
)
barcode_in_cells = pd.concat([barcode_in_cells, missing_df])

In [None]:
barcode_in_cells.head()

In [None]:
err_corr = barcode_spots

In [None]:
from pathlib import Path
import iss_analysis as issa

redo = False
skip_not_done = False
use_slurm = True
all_cells_list = []
edist = 2
target_folder = analysis_folder / "barcode_in_mcherry_cells"
if not target_folder.exists():
    target_folder.mkdir()
print(target_folder)
for chamber in [f"chamber_{i:02}" for i in [7, 8, 9, 10]]:  # err_corr.chamber.unique():
    print(f"Doing {chamber}")
    for roi in range(1, 11):  # err_corr.roi.unique():
        target = target_folder / f"barcode_in_cells_{chamber}_{roi}.pkl"
        if not redo and target.exists():
            # print(f"    Reloading already processed roi")
            barcode_in_cells = pd.read_pickle(target)
        else:
            if skip_not_done:
                print(f"     Skipping {chamber}, {roi}", flush=True)
                continue
            print(f"     Processing {chamber}, {roi}", flush=True)
            if use_slurm:
                err_corr_fname = str(
                    analysis_folder / f"corrected_barcode_spots_edit{edist}.pkl"
                )
                print("       Starting slurm job")
                slurm_folder = (
                    Path.home()
                    / "slurm_logs"
                    / (data_path + f"/{chamber}")
                    / "segment_spots"
                )
                slurm_folder.mkdir(parents=True, exist_ok=True)
                job = issa.segment.get_barcode_in_cells(
                    chamber,
                    roi,
                    data_path,
                    err_corr_fname,
                    save_folder=str(target_folder),
                    use_slurm=True,
                    slurm_folder=slurm_folder,
                    scripts_name=f"segment_spots_{chamber}_{roi}",
                )
                continue
            try:
                barcode_in_cells = issa.segment.get_barcode_in_cells(
                    chamber, roi, data_path, err_corr, save_folder=target_folder
                )
            except Exception as e:
                print(f"Error processing {chamber}, {roi}: {e}")
                continue
        barcode_in_cells = barcode_in_cells.loc[barcode_in_cells.index != 0].copy()
        barcode_in_cells.index = barcode_in_cells.index.map(
            lambda x: f"{chamber}_{roi}_{x}"
        )
        all_cells_list.append(barcode_in_cells)
all_cells = pd.concat(all_cells_list)
all_cells.fillna(0, inplace=True)
print(all_cells.shape)
# add missing barcodes
missing = set(err_corr.corrected_bases.unique()) - set(all_cells.columns)
missing_df = pd.DataFrame(
    index=all_cells.index,
    columns=list(missing),
    data=np.zeros((len(all_cells), len(missing))),
)
all_cells = pd.concat([all_cells, missing_df], axis=1)
print(all_cells.shape)

cnt_array = all_cells.values.astype(int)

In [None]:
all_cells.head()

In [None]:
rol_per_barcode = err_corr.corrected_bases.value_counts()
rol_per_barcode.head()

In [None]:
# order the columns of all cells by rolony abundance
seq = rol_per_barcode.index
reorderd = all_cells[seq]
plt.scatter(np.sum(reorderd >= 5, axis=0).values, rol_per_barcode.values)
plt.ylabel("Total # of rolonies")
plt.xlabel("# of starter cells with 5 or more rolonies")

In [None]:
reorderd.head()

In [None]:
plt.hist(reorderd.values.flatten(), bins=np.arange(0, 100, 1), log=True)

In [None]:
mat = reorderd.values
print(mat.shape)

bc_per_cell = np.sum(mat >= 5, axis=1)
_ = plt.hist(bc_per_cell, bins=np.arange(0, 10, 1) - 0.5)
plt.ylim(0, 500)

In [None]:
data = pd.DataFrame(
    data=dict(
        rolnum=rol_per_barcode.values,
        ncells=np.sum(reorderd >= 5, axis=0).values,
        sequence=seq,
    )
)
data.head()

In [None]:
plt.hist(
    data[data.ncells == 1].rolnum,
    bins=np.geomspace(1, 10000, 20),
    alpha=0.5,
    label="1 cell",
)
plt.semilogx()

In [None]:
import seaborn as sns

fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(1, 1, 1)
df = data[data.ncells < 4]
v = [d.rolnum.values for _, d in data.groupby("ncells")]
ax.hist(v, bins=np.geomspace(1, data.rolnum.max(), 20), stacked=True)
ax.set_xscale("log")

fig.tight_layout()

In [None]:
data[(data.rolnum > 1000) & (data.ncells == 1)]

In [None]:
bc = err_corr[err_corr["corrected_bases"] == "ATCATCGACTTCAC"]

nchamber = 4
nrois = 10
fig, axes = plt.subplots(nrois, nchamber, figsize=(20, 40))
bci = 0
for ich, ch_name in enumerate([f"chamber_{i:02}" for i in range(7, 11)]):
    for iroi, roi_name in enumerate(range(10)):
        roi = err_corr[(err_corr["chamber"] == ch_name) & (err_corr["roi"] == roi_name)]
        ax = axes[iroi, ich]
        bcc = bc[(bc.roi == roi_name) & (bc.chamber == ch_name)]
        if not bci:
            ax.scatter(
                roi["y"], roi["x"], label=f"roi {iroi}", s=1, color="k", alpha=0.05
            )
        ax.scatter(bcc["y"], bcc["x"], label=f"roi {iroi}", s=5, color=f"r", alpha=0.5)
        ax.axis("equal")
        ax.axis("off")
fig.tight_layout()

In [None]:
bc_cnt = err_corr.corrected_bases.value_counts()
print(bc_cnt.shape)
valid_barcodes = bc_cnt[bc_cnt > 1000].index
print(len(valid_barcodes))
all_cells_valid_bc = all_cells[valid_barcodes]
valid_barcodes

In [None]:
all_cells_valid_bc.head()

In [None]:
cnt_array = all_cells_valid_bc.astype(int).values

In [None]:
cnt_array.shape

In [None]:
np.sum(cnt_array > 5, axis=0).shape

In [None]:
df = err_corr[(err_corr["chamber"] == chamber) & (err_corr["roi"] == roi)].copy()
barcode_in_cells = issp.segment.cells.count_spots(
    df, grouping_column="corrected_bases", masks=bigmask
)
all_cells = np.unique(bigmask)
missing = set(all_cells) - set(barcode_in_cells.index)
missing_df = pd.DataFrame(
    index=list(missing),
    columns=barcode_in_cells.columns,
    data=np.zeros((len(missing), len(barcode_in_cells.columns))),
)
barcode_in_cells = pd.concat([barcode_in_cells, missing_df])

In [None]:
p0 = 0.85
p1 = 0.14
p2 = 1 - (p0 + p1)
print(p0, p1, p2)

In [None]:
(2 * p0 + p1) ** 2 + 4 * p0 * (p0 + p1 + p2)

In [None]:
m = ((2 * p0 + p1) + np.sqrt((2 * p0 + p1) ** 2 - 4 * p0 * (p0 + p1 + p2))) / (
    2 * (p0 + p1 + p2)
)
print(m)

In [None]:
rol_per_cells = np.sum(cnt_array, axis=1)
bar_per_cells = np.sum(cnt_array > 0, axis=1)
bar_per_good_cells = np.sum(cnt_array > 5, axis=1)
cell_per_barcode = np.sum(cnt_array > 0, axis=0)
good_cell_per_barcode = np.sum(cnt_array > 5, axis=0)
rol_per_barcode = np.sum(cnt_array, axis=0)

fig = plt.figure(figsize=(7, 5))
ax = fig.add_subplot(2, 2, 1)
ax.hist(rol_per_cells, bins=np.arange(0, rol_per_cells.max(), 1), log=True)
ax.set_xlabel("Rolonies per cell")
ax = fig.add_subplot(2, 2, 2)
ax.hist(rol_per_cells, bins=np.arange(0, rol_per_cells.max(), 1), log=False)
ax.set_xlabel("Rolonies per cell")
ax = fig.add_subplot(2, 2, 3)
ax.hist(
    good_cell_per_barcode, bins=np.arange(0, good_cell_per_barcode.max() + 1, 1) - 0.5
)
ax.set_xlabel("Cells per barcode (count > 5)")
ax = fig.add_subplot(2, 2, 4)
ax.hist(bar_per_cells, bins=np.arange(0, bar_per_cells.max() + 1, 1) - 0.5)
ax.set_xlabel("Barcode per cells")
fig.tight_layout()

In [None]:
pd.Series(good_cell_per_barcode).value_counts()

In [None]:
plt.scatter(cell_per_barcode, rol_per_barcode)
plt.xlabel("Cells per barcode")
plt.ylabel("Rolonies per barcode")

In [None]:
p2 = 0.36 * (1 - 0.85) ** 2
p1 = 0.64 * 0.15 + 2 * 0.36 * 0.85 * 0.15
print(p2, p1)

In [None]:
good_cells = cnt_array[cnt_array.max(axis=1) > 5, :]
rol_per_cells = np.sum(good_cells, axis=1)
bar_per_cells = np.sum(good_cells > 0, axis=1)

fig = plt.figure(figsize=(7, 4))
ax = fig.add_subplot(1, 2, 1)
ax.hist(rol_per_cells, bins=np.arange(0, rol_per_cells.max(), 1), log=False)
ax.set_xlabel("Rolonies per good cell")
ax = fig.add_subplot(1, 2, 2)
ax.hist(bar_per_cells, bins=np.arange(0, bar_per_cells.max(), 1), log=False)
ax.set_xlabel("Barcode per good cell")
fig.tight_layout()

In [None]:
# get masks
stitched_mask = issp.pipeline.stitch_registered(
    data_path + f"/{chamber}", prefix="mCherry_1_masks", roi=roi, projection="corrected"
)
bigmask = issp.pipeline.segment.get_big_masks(
    data_path + f"/{chamber}", roi, stitched_mask, mask_expansion=5
)[..., 0]

In [None]:
# filter rolonies
import numpy as np

# df = all_barcode_spots[(all_barcode_spots["chamber"] == chamber) & (all_barcode_spots["roi"] == roi)]
df = err_corr[(err_corr["chamber"] == chamber) & (err_corr["roi"] == roi)]

In [None]:
ylim = [16000, 8000]
xlim = [12000, 18000]
plt.figure(figsize=(20, 20))

# mask with nan for display
m = bigmask.astype(float)
m[bigmask == 0] = np.nan


plt.imshow(
    get_stack_part(m, xlim, ylim),
    interpolation="none",
    alpha=0.5,
    cmap="prism",
    extent=[xlim[0], xlim[1], ylim[0], ylim[1]],
)
sp = get_spot_part(df, xlim, ylim)
plt.scatter(sp["x"], sp["y"], alpha=0.5, s=1, color="k")

In [None]:
cell_df = issp.pipeline.segment.make_cell_dataframe(
    data_path + f"/{chamber}", roi, masks=bigmask, mask_expansion=0, atlas_size=10
)
cell_df.head()

In [None]:
analysis_folder

In [None]:
cell_df.to_pickle(analysis_folder / f"cell_df_{chamber}_{roi}.pkl")

In [None]:
barcode_in_cells = issp.segment.cells.count_spots(
    df.copy(), grouping_column="corrected_bases", masks=bigmask[..., 0]
)
barcode_in_cells.head()

In [None]:
cell_df.index.name = "mask_id"
cell_barcode

In [None]:
bcm = barcode_in_cells.values[1:, :]
plt.figure(figsize=(7, 2))
plt.suptitle(f"Chamber {chamber}, roi {roi}")
plt.subplot(1, 3, 1)
plt.hist(bcm.sum(axis=1), bins=np.arange(0, 30, 2))
plt.ylabel("Number of cells")
plt.xlabel("Number of spots\n(all barcodes)")
plt.subplot(1, 3, 2)
plt.hist(bcm.max(axis=1), bins=np.arange(0, 30, 2))
plt.ylabel("Number of cells")
plt.xlabel("Number of spots\n(most abundant barcode)")
plt.subplot(1, 3, 3)
plt.hist(np.sum(bcm != 0, axis=1), bins=np.arange(0, 10, 1) - 0.5)
plt.ylabel("Number of cells")
plt.xlabel("Number of\ndifferent barcodes")
plt.tight_layout()

In [None]:
plt.scatter(bcm.sum(axis=1), np.sum(bcm != 0, axis=1), c=bcm.max(axis=1))
plt.colorbar()

In [None]:
ylim = [16000, 8000]
xlim = [12000, 18000]
plt.figure(figsize=(20, 20))

# mask with nan for display
m = bigmask.astype(float)
m[bigmask == 0] = np.nan


plt.imshow(m, interpolation="none", alpha=0.5, cmap="prism")

plt.scatter(df["x"], df["y"], alpha=0.5, s=1, color="k")
plt.xlim(xlim)
plt.ylim(ylim)

In [None]:
ops = issp.io.load_ops(data_path + f"/{chamber}")
tile = ops["mcherry_ref_tiles"]

In [None]:
# Load the mCherry masks
import numpy as np
import matplotlib.pyplot as plt

processed = issp.io.get_processed_path(data_path)
tname = "_".join([str(t) for t in tile])
masks_file = processed / chamber / "cells" / f"mCherry_1_masks_corrected_{tname}.npy"
masks = np.load(masks_file)
m = masks.astype(float)
m[m == 0] = np.nan
plt.imshow(m, interpolation="None", cmap="prism")

In [None]:
# Warp into ref coordinages
masks_file = processed / chamber / "cells" / f"mCherry_1_masks_corrected_{tname}.npy"
masks = np.load(masks_file)
warped_mask, bad_pixels = issp.pipeline.stitch.warp_stack_to_ref(
    masks,
    data_path + f"/{chamber}",
    prefix="mCherry_1",
    tile_coors=tile,
    interpolation=0,
)
wm = warped_mask.astype(float)
wm[wm == 0] = np.nan
plt.imshow(wm[..., 0, 0], interpolation="None", cmap="prism")

In [None]:
# get tile corners

tile_origin

In [None]:
ok = (err_corr.chamber == chamber) & (
    err_corr["tile"] == "_".join([str(t) for t in tile])
)
bc = err_corr[ok]
tc = issp.pipeline.stitch.get_tile_corners(
    data_path + f"/{chamber}", prefix=ops["reference_prefix"], roi=tile[0]
)
tc = tc[tile[1], tile[2]]
tile_origin = tc[:, 0]
plt.imshow(wm[..., 0, 0], interpolation="None", cmap="prism", alpha=0.5)
plt.scatter(bc.x - tile_origin[1], bc.y - tile_origin[0], s=1, color="k", alpha=0.5)

In [None]:
issp.pipeline.segment.make_cell_dataframe(
    data_path, roi, masks=None, mask_expansion=5.0, atlas_size=10
)