In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from brisc.manuscript_analysis import barcodes_in_cells as bc_cells
from brisc.manuscript_analysis import match_to_library as match_lib
from brisc.manuscript_analysis import sensitivity as sens
from brisc.manuscript_analysis import mcherry_intensity as mcherry_int
from brisc.manuscript_analysis import distance_between_cells as dist_cells
from brisc.manuscript_analysis import overview_image

from pathlib import Path
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib
import matplotlib.font_manager as fm

arial_font_path = "/nemo/lab/znamenskiyp/home/shared/resources/fonts/arial.ttf"  # update path as needed
arial_prop = fm.FontProperties(fname=arial_font_path)
plt.rcParams["font.family"] = arial_prop.get_name()
plt.rcParams.update({"mathtext.default": "regular"})  # make math mode also Arial
fm.fontManager.addfont(arial_font_path)
matplotlib.rcParams["pdf.fonttype"] = 42  # for pdfs

from iss_preprocess.io import get_processed_path

In [None]:
barseq_path = Path("Y:/")
main_path = Path("Z:/")
barseq_path = get_processed_path("becalia_rabies_barseq").parent.parent
main_path = Path("/nemo/lab/znamenskiyp/")
print(barseq_path)
print(main_path)

error_correction_ds_name = "BRAC8498.3e_error_corrected_barcodes_26"

In [None]:
(
    in_situ_barcode_matches,
    random_barcode_matches,
    rv35_library,
) = match_lib.load_data(
    redo=False,
    barseq_path=barseq_path,
    main_path=main_path,
    error_correction_ds_name=error_correction_ds_name,
)

good_cells, labeled_images, rabies_stack, spots_df = sens.load_data()

valid_mcherry = mcherry_int.load_mcherry_data(
    error_correction_ds_name=error_correction_ds_name
)

In [None]:
cells_df = pd.read_pickle(
    barseq_path
    / f"processed/becalia_rabies_barseq/BRAC8498.3e/analysis/{error_correction_ds_name}_cell_barcode_df.pkl"
)
cells_df = cells_df[cells_df["main_barcode"].notna()]
cells_df["n_unique_barcodes"] = cells_df["all_barcodes"].apply(len)

In [None]:
import itertools

all_barcodes = list(set(itertools.chain.from_iterable(cells_df["all_barcodes"].values)))
barcodes_df = pd.DataFrame({"barcode": all_barcodes})
barcodes_df["n_starters"] = barcodes_df["barcode"].apply(
    lambda barcode: cells_df[cells_df["is_starter"] == True]["all_barcodes"]
    .apply(lambda x: barcode in x)
    .sum()
)
barcodes_df["n_presynaptic"] = barcodes_df["barcode"].apply(
    lambda barcode: cells_df[cells_df["is_starter"] == False]["all_barcodes"]
    .apply(lambda x: barcode in x)
    .sum()
)

In [None]:
# Plot Fig.3
save_fig = True
fontsize_dict = {"title": 8, "label": 8, "tick": 6, "legend": 6}
pad_dict = {"label": 1, "tick": 1, "legend": 5}
hist_linewidth = 0.5
linewidth = 1.2
line_alpha = 1
save_path = main_path / "home/shared/presentations/becalick_2025"
figname = "fig3_barcodes_in_cells_overview"


cm = 1 / 2.54
fig = plt.figure(figsize=(17.4 * cm, 17.4 * cm), dpi=600)
# data_path = (
#    main_path / "home/shared/projects/barcode_diversity_analysis/collapsed_barcodes/"
# )

# Plot barcode
ax_bc_per_cell_presyn = fig.add_axes([0.08, 0.85, 0.13, 0.08])
bc_cells.plot_hist(
    cells_df[cells_df["is_starter"] == False],
    ax=ax_bc_per_cell_presyn,
    col="n_unique_barcodes",
    tick_fontsize=fontsize_dict["tick"],
    max_val=6,
    linewidth=hist_linewidth,
)
ax_bc_per_cell_presyn.set_xticklabels([])
ax_bc_per_cell_presyn.text(
    6.5, 1, "Presynaptic\ncells", ha="right", va="top", fontsize=fontsize_dict["legend"]
)

ax_bc_per_cell_starter = fig.add_axes([0.08, 0.73, 0.13, 0.08])
bc_cells.plot_hist(
    cells_df[cells_df["is_starter"] == True],
    ax=ax_bc_per_cell_starter,
    col="n_unique_barcodes",
    tick_fontsize=fontsize_dict["tick"],
    max_val=6,
    linewidth=hist_linewidth,
)
ax_bc_per_cell_starter.text(
    6.5, 1, "Starter\ncells", ha="right", va="top", fontsize=fontsize_dict["legend"]
)
plt.ylabel("     Proportion of barcodes", loc="bottom", fontsize=fontsize_dict["label"])
plt.xlabel("Barcodes per cell", fontsize=fontsize_dict["label"])

# b) match to libraray
ax_matches_to_lib = fig.add_axes([0.31, 0.73, 0.23, 0.14])
ax_matches_to_lib_top = fig.add_axes([0.31, 0.93 - 0.14 / 4, 0.23, 0.14 / 4])
match_lib.plot_matches_to_library(
    in_situ_barcode_matches,
    random_barcode_matches,
    rv35_library,
    ax=(ax_matches_to_lib, ax_matches_to_lib_top),
    label_fontsize=fontsize_dict["label"],
    tick_fontsize=fontsize_dict["tick"],
    linewidth=linewidth,
    hist_edgewith=hist_linewidth,
    alpha=0.8,
)

# c) Histogram of starters per barcode
ax_starters_per_barcode = fig.add_axes([0.68, 0.73, 0.22, 0.20])
bc_cells.plot_hist(
    barcodes_df,
    col="n_starters",
    ax=ax_starters_per_barcode,
    tick_fontsize=fontsize_dict["tick"],
    show_zero=True,
    linewidth=hist_linewidth,
)
plt.xlabel("Starter cells per barcode", fontsize=fontsize_dict["label"])
plt.ylabel("Proportion of barcodes", fontsize=fontsize_dict["label"])

# d) orphan split of presynaptic per barcode
ax_presyn_orphans = fig.add_axes([0.08, 0.47, 0.15, 0.18])
bc_cells.plot_presyn_per_barcode(
    barcodes_df,
    ax=ax_presyn_orphans,
    tick_fontsize=fontsize_dict["tick"],
    label_fontsize=fontsize_dict["label"],
    linewidth=linewidth,
    alpha=0.5,
)

ax_sensitivity_histo = fig.add_axes([0.35, 0.47, 0.15, 0.18])
bc_cells.plot_hist(
    good_cells,
    col="spot_count",
    ax=ax_sensitivity_histo,
    tick_fontsize=fontsize_dict["tick"],
    show_zero=True,
    show_counts=False,
    max_val=40,
    linewidth=hist_linewidth,
)
ax_sensitivity_histo.set_xticks(np.arange(0, 50, 10))
ax_sensitivity_histo.set_ylim(0, 0.08)
plt.vlines(2.5, 0, 0.08, colors="k", linestyles="dotted")
print(f"cells with <3 spots: {good_cells['spot_count'].lt(3).mean()*100:.3f}%")
plt.xlabel("Number of barcode spots", fontsize=fontsize_dict["label"])
plt.ylabel("Proportion of cells", fontsize=fontsize_dict["label"])

ax_spots_in_cells = fig.add_axes([0.47, 0.50, 0.15, 0.15])
sens.plot_cells_spots(
    good_cells,
    rabies_stack,
    labeled_images,
    ax=ax_spots_in_cells,
    roi_of_interest=6,
    min_y=1000,
    max_y=1600,
    min_x=1200,
    max_x=1800,
    vmaxs=[1500, 200],
)

# Add scalebar
overview_image.add_scalebar(
    ax_spots_in_cells,
    downsample_factor=1,
    length_um=20,
    bar_height_px=20,
    margin_px=25,
)

overview_image.print_image_stats(
    "rab_spots",
    rabies_stack[:, :, 0, :],
    pixel_size_um=0.231,
    downsample_factor=1,
)

ax_mcherry_fluor = fig.add_axes([0.72, 0.47, 0.18, 0.18])
im = mcherry_int.plot_mcherry_intensity_presyn(
    valid_mcherry,
    ax=ax_mcherry_fluor,
    label_fontsize=fontsize_dict["label"],
    tick_fontsize=fontsize_dict["tick"],
    marker_size=3,
)


if save_fig:
    fig.savefig(save_path / f"{figname}.pdf", format="pdf", dpi=600)
    fig.savefig(save_path / f"{figname}.png", format="png")
else:
    fig.suptitle(error_correction_ds_name)

In [None]:
# Print useful numbers:
# Num distinct barcodes in infected cells
print(f"{len(all_barcodes)} distinct barcodes found in cells")

# Match to library
bc_in_lib = (in_situ_barcode_matches.ham_lib_bc_counts > 0).sum()
perc = bc_in_lib / len(in_situ_barcode_matches) * 100
print(f"{bc_in_lib}/{len(in_situ_barcode_matches)} barcode in library, {perc:.1f}%")
bc_in_lib = (random_barcode_matches.lib_bc_counts > 0).sum()
perc = bc_in_lib / len(random_barcode_matches) * 100
print(
    f"{bc_in_lib}/{len(random_barcode_matches)} random barcode in library, {perc:.1f}%"
)

# Barcode found in starters
starter_df = cells_df.query("is_starter==True")
starter_barcodes = list(
    set(itertools.chain.from_iterable(starter_df["all_barcodes"].values))
)
print(f"{len(starter_barcodes)} distinct barcodes found in starter cells")
cells_without_starter = barcodes_df[barcodes_df["n_starters"] == 0]
print(f"{cells_without_starter.n_presynaptic.sum()} cells with orphan barcode")