In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import iss_preprocess as iss

In [None]:
data_path = "becalia_rabies_barseq/BRAC8498.3e/chamber_07/"
processed_path = iss.io.get_processed_path(data_path)
ara_starters = pd.read_pickle(processed_path.parent / "ara_starter_cells.pkl")

# Rolonies per barcode
rol_per_barcode = ara_starters.groupby("main_barcode")["max_n_spots"].sum()

# Rolonies per cell
rol_per_presynaptic_cell = ara_starters[ara_starters["starter"] == False]["max_n_spots"].values
rol_per_starter_cell = ara_starters[ara_starters["starter"] == True]["max_n_spots"].values

# Barcodes per cell
bar_per_presynaptic_cell = ara_starters[ara_starters["starter"] == False]["n_unique_barcodes"].values
bar_per_starter_cell = ara_starters[ara_starters["starter"] == True]["n_unique_barcodes"].values
bar_per_good_presynaptic_cell = ara_starters[(ara_starters["max_n_spots"] > 5) & (ara_starters["starter"] == False)]["n_unique_barcodes"]
bar_per_good_starter_cell = ara_starters[(ara_starters["max_n_spots"] > 5) & (ara_starters["starter"] == True)]["n_unique_barcodes"]

# Cells per barcode
starter_cells_per_barcode = np.unique(ara_starters[ara_starters["starter"] == True]["main_barcode"], return_counts=True)[1]
good_starters_per_barcode = np.unique(ara_starters[(ara_starters["max_n_spots"] > 5) & (ara_starters["starter"] == True)]["main_barcode"], return_counts=True)[1]
presynaptic_cells_per_barcode = np.unique(ara_starters[ara_starters["starter"] == False]["main_barcode"], return_counts=True)[1]
good_presynaptic_cells_per_barcode = np.unique(ara_starters[(ara_starters["max_n_spots"] > 5) & (ara_starters["starter"] == False)]["main_barcode"], return_counts=True)[1]
# include barcodes with no presynaptic cells
starter_barcodes, starter_counts = np.unique(ara_starters[ara_starters["starter"] == True]["main_barcode"], return_counts=True)
good_starters_barcodes, good_starter_counts = np.unique(ara_starters[(ara_starters["max_n_spots"] > 5) & (ara_starters["starter"] == True)]["main_barcode"], return_counts=True)
good_non_starter_barcodes = np.unique(ara_starters[(ara_starters["max_n_spots"] > 5) & (ara_starters["starter"] == False)]["main_barcode"])
barcodes_not_in_starters = good_non_starter_barcodes[~np.isin(good_non_starter_barcodes, good_starters_barcodes)].shape
barcodes_only_in_starters = good_starters_barcodes[~np.isin(good_starters_barcodes, good_non_starter_barcodes)].shape
all_starters_per_barcode = np.concatenate([np.zeros(barcodes_not_in_starters), good_starters_per_barcode])


fig = plt.figure(figsize=(10, 5), dpi=200)
#1
ax = fig.add_subplot(2, 3, 1)
ax.set_ylabel("Number of cells")
ax.hist(rol_per_presynaptic_cell, bins=np.arange(0, rol_per_presynaptic_cell.max(), 1), log=True, color="grey")
ax.set_xlim(0, 100)
ax.set_ylim(1, 5000)
ax.set_xlabel("Barcode rolonies per presynaptic cell")

#2
ax = fig.add_subplot(2, 3, 2)
ax.hist(bar_per_good_presynaptic_cell, bins=np.arange(1, bar_per_good_presynaptic_cell.max() + 1, 1) - 0.5, color="lightblue")
ax.set_xlabel("Unique barcodes per presynaptic cell (count > 5)")
ax.set_xlim(0.5, 5.5)
fig.tight_layout()

#3
ax = fig.add_subplot(2, 3, 3)
ax.set_ylabel("Number of cells")
ax.hist(good_presynaptic_cells_per_barcode , bins=np.arange(0, good_presynaptic_cells_per_barcode.max(), 1), log=False, align="left")
ax.set_xlabel("Presynaptic cells per barcode (count > 5)")




#4
ax = fig.add_subplot(2, 3, 4)
ax.set_ylabel("Number of cells")
ax.hist(rol_per_starter_cell, bins=np.arange(0, rol_per_starter_cell.max(), 1), log=True, color="grey")
ax.set_xlabel("Barcode rolonies per starter cell")
ax.set_xlim(0, 100)
ax.set_ylim(1, 5000)

#5
ax = fig.add_subplot(2, 3, 5)
ax.hist(
    bar_per_good_starter_cell, bins=np.arange(1, bar_per_good_starter_cell.max() + 1, 1) - 0.5, log=False, color="lightblue"
)
ax.set_xlim(0.5, 5.5)
ax.set_xlabel("Unique barcodes per starter cell (count > 5)")

#6
ax = fig.add_subplot(2, 3, 6)
ax.hist(
    all_starters_per_barcode, bins=np.arange(0, all_starters_per_barcode.max() + 1, 1) - 0.5, log=False
)
ax.set_xlabel("Starter cells per barcode (count > 5)")
ax.set_xlim(-0.5, 5.5)

plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=(10, 5), dpi=200)
#1
ax = fig.add_subplot(2, 3, 1)
ax.set_ylabel("Number of cells")
ax.hist(rol_per_presynaptic_cell, bins=np.arange(0, rol_per_presynaptic_cell.max(), 1), log=False, color="blue", cumulative=True, histtype="step", density=True)
ax.hist(rol_per_starter_cell, bins=np.arange(0, rol_per_starter_cell.max(), 1), log=False, color="grey", cumulative=True, histtype="step", density=True)
ax.set_xlim(0, 100)
#ax.set_ylim(1, 5000)
ax.set_xlabel("Barcode rolonies per presynaptic cell")

#2
ax = fig.add_subplot(2, 3, 2)
ax.hist(bar_per_good_presynaptic_cell, bins=np.arange(1, bar_per_good_presynaptic_cell.max() + 1, 1) - 0.5, color="lightblue")
ax.set_xlabel("Unique barcodes per presynaptic cell (count > 5)")
ax.set_xlim(0.5, 5.5)
fig.tight_layout()

#3
ax = fig.add_subplot(2, 3, 3)
ax.set_ylabel("Number of cells")
ax.hist(good_presynaptic_cells_per_barcode , bins=np.arange(0, good_presynaptic_cells_per_barcode.max(), 1), log=False, align="left")
ax.set_xlabel("Presynaptic cells per barcode (count > 5)")




#4
ax = fig.add_subplot(2, 3, 4)
ax.set_ylabel("Number of cells")
ax.hist(rol_per_starter_cell, bins=np.arange(0, rol_per_starter_cell.max(), 1), log=False, color="grey", cumulative=True)
ax.set_xlabel("Barcode rolonies per starter cell")
ax.set_xlim(0, 100)
#ax.set_ylim(1, 5000)

#5
ax = fig.add_subplot(2, 3, 5)
ax.hist(
    bar_per_good_starter_cell, bins=np.arange(1, bar_per_good_starter_cell.max() + 1, 1) - 0.5, log=False, color="lightblue"
)
ax.set_xlim(0.5, 5.5)
ax.set_xlabel("Unique barcodes per starter cell (count > 5)")

#6
ax = fig.add_subplot(2, 3, 6)
ax.hist(
    all_starters_per_barcode, bins=np.arange(0, all_starters_per_barcode.max() + 1, 1) - 0.5, log=False
)
ax.set_xlabel("Starter cells per barcode (count > 5)")
ax.set_xlim(-0.5, 5.5)

plt.tight_layout()
plt.show()