In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from brisc.manuscript_analysis import connectivity_matrices as conn_mat
from brisc.manuscript_analysis import distance_between_cells as dist
from brisc.manuscript_analysis import bootstrapping as boot
from brisc.manuscript_analysis import load
from brisc.exploratory_analysis.plot_summary_for_all_bc import compute_flatmap_coors, get_avg_layer_depth

from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd

matplotlib.rcParams["pdf.fonttype"] = 42  # for pdfs

from iss_preprocess.io import get_processed_path

In [None]:
processed_path = get_processed_path(
    "becalia_rabies_barseq/BRAC8498.3e/analysis/cell_barcode_df.pkl"
)
print('Loading data')
cells_df = load.load_cell_barcode_data(
    processed_path,
    areas_to_empty=["fiber tracts", "outside"],
    valid_areas=["Isocortex", "TH"],
    distance_threshold=150,
)
print('Projection on flatmap')
# projecting cells that were in the white matter doesn't work, used the moved 
# coordinates. Even just moved fails because voxels can be off by 1 once we floor
flat_coors = compute_flatmap_coors(cells_df,  distance_cutoff=150)
cells_df["flatmap_x"] = flat_coors[:, 0]
cells_df["flatmap_y"] = flat_coors[:, 1]
cells_df["flatmap_z"] = flat_coors[:, 2]


In [None]:

norm_flat = compute_flatmap_coors(
    cells_df, col_prefix="ara_", col_suffix="", projection="flatmap_dorsal", distance_cutoff=150,
    thickness_type="normalized_layer",
)
cells_df["flatmap_normalised_x"] = norm_flat[:, 0]
cells_df["flatmap_normalised_y"] = norm_flat[:, 1]
cells_df["flatmap_normalised_z"] = norm_flat[:, 2]

ctx_cells = cells_df[(~cells_df['cortical_area'].isna()) & ~(cells_df.cortical_area.isin(['TH', 'hippocampal']))]
print(f"{ctx_cells.flatmap_x.isna().sum()}/{ctx_cells.shape[0]} cortical cells have no flatmap coordinates")
print('Matching barcodes')
conn_mat.match_barcodes(cells_df)
dist.add_connection_distances(cells_df, cols=["flatmap_x", "flatmap_y"])

In [None]:
non_excitatory_cell_types = ["Vip", "Sst", "Pvalb", "Lamp5", "VLMC"]
non_excitatory = cells_df["best_cluster"].isin(non_excitatory_cell_types)

cells_df = cells_df[~non_excitatory]
layers = ["L2/3", "L4", "L5", "L6a", "L6b"]
# local connections
cells_df = cells_df[cells_df["cortical_layer"].notnull()]
# change any cells with cells_df["cortical_layer"] == "L1" to "L2,3"
cells_df.loc[cells_df["cortical_layer"] == "L1", "cortical_layer"] = "L2/3"
cells_df = cells_df[cells_df["cortical_layer"].apply(lambda layer: layer in layers)]
cells_df = cells_df[cells_df["distances"].apply(lambda dist: np.max(dist)) < 100]

grouping = "cortical_layer"  # "area_acronym_ancestor_rank1"
# Shuffle the barcodes assigned to each cell in the connectivity matrix
(
    shuffled_cell_barcode_dfs,
    shuffled_matrices,
    mean_input_fraction_dfs,
    starter_input_fractions,
    _,
) = conn_mat.shuffle_and_compute_connectivity(
    cells_df,
    n_permutations=1000,
    shuffle_starters=False,
    shuffle_presyn=True,
    starter_grouping=grouping,
    presyn_grouping=grouping,
    output_fraction=False,
)

(
    shuffled_cell_barcode_dfs,
    shuffled_matrices,
    output_fraction_dfs,
    _,
    _,
) = conn_mat.shuffle_and_compute_connectivity(
    cells_df,
    n_permutations=1000,
    shuffle_starters=False,
    shuffle_presyn=True,
    starter_grouping=grouping,
    presyn_grouping=grouping,
    output_fraction=True,
)

connectivity_matrix, mean_input_fraction, fractions_df, _ = (
    conn_mat.compute_connectivity_matrix(
        cells_df,
        starter_grouping=grouping,
        presyn_grouping=grouping,
        output_fraction=False,
    )
)

connectivity_matrix, output_fraction, _, _ = conn_mat.compute_connectivity_matrix(
    cells_df,
    starter_grouping=grouping,
    presyn_grouping=grouping,
    output_fraction=True,
)

In [None]:
input_fraction_log_ratio, input_fraction_pval = conn_mat.compare_to_shuffle(
    *conn_mat.filter_matrices(
        mean_input_fraction,
        np.array(mean_input_fraction_dfs),
        row_order=layers,
        col_order=layers,
    )
)

output_fraction_log_ratio, output_fraction_pval = conn_mat.compare_to_shuffle(
    *conn_mat.filter_matrices(
        output_fraction,
        np.array(output_fraction_dfs),
        row_order=layers,
        col_order=layers,
    )
)

In [None]:
layers = ["L2/3", "L4", "L5", "L6a", "L6b"]

counts_df, mean_input_frac_df, fractions_df, _ = conn_mat.compute_connectivity_matrix(
    cells_df,
    starter_grouping="cortical_layer",  # "area_acronym_ancestor_rank1",
    presyn_grouping="cortical_layer",  # "area_acronym_ancestor_rank1",
)

from tqdm import tqdm

# Create bootstrap samples
nboot = 1000
bootstrap_samples = []

for i in tqdm(range(nboot)):
    rows = []
    for layer, group in fractions_df.groupby("cortical_layer", observed=True):
        this_layer = group.sample(n=len(group), replace=True)[layers].mean()
        this_layer.name = layer
        rows.append(this_layer)  # Resample with replacement
    bootstrap_samples.append(pd.concat(rows, axis=1))
bootstrap_samples = np.array(bootstrap_samples)
lower_df = pd.DataFrame(
    data=np.quantile(bootstrap_samples, 0.025, axis=0), index=layers, columns=layers
)
upper_df = pd.DataFrame(
    data=np.quantile(bootstrap_samples, 0.975, axis=0), index=layers, columns=layers
)

In [None]:
areas = {
    "L2/3": "2/3",
    "L4": "4",
    "L5": "5",
    "L6a": "6a",
    "L6b": "6b",
}
presynaptic_counts = conn_mat.reorganise_matrix(counts_df).sum(axis=1)
starter_counts = fractions_df.value_counts("cortical_layer").rename(index=areas)

In [None]:
from brisc.manuscript_analysis import distance_between_cells as dist_cells

relative_presyn_coords_flatmap, distancess_flatmap, starters_df = (
    dist_cells.determine_presynaptic_distances(cells_df, col_prefix="flatmap_")
)

In [None]:
p = np.hstack(starters_df.presynaptic_coors.values)[0][:,2]
p.max()

In [None]:
# Plot Fig.1
fontsize_dict = {"title": 7, "label": 7, "tick": 6, "legend": 6}
pad_dict = {"label": 1, "tick": 1, "legend": 5}
line_width = 0.9
line_alpha = 1

cm = 1 / 2.54
fig = plt.figure(figsize=(17.4 * cm, 17.4 * cm), dpi=200)

# save_path = Path("Z:/home/shared/presentations/becalick_2025")
save_path = Path("/nemo/lab/znamenskiyp/home/shared/presentations/becalick_2025")

save_fig = False
figname = "matrices"

# Presynaptic scatters
w = 0.17
scl = 10  # scale to put distance in um
avg_layer_tops = get_avg_layer_depth()
layer_borders = np.hstack([0, np.sort(np.hstack(list(avg_layer_tops.values())))])
layer_borders[-1] = 1200
midlayer = np.diff(layer_borders)/2 + layer_borders[:-1]
for il, layer in enumerate(layers):
    axl = fig.add_axes([0.05 + (w + 0.01) * il, 0.6, w, 0.2], aspect="equal")
    for d in layer_borders:
        axl.axhline(d, ls='--', lw=0.5, color='lightgray')
    this_layer = starters_df[starters_df["cortical_layer"] == layer]

    rel_ap = np.hstack(this_layer["presynaptic_coors_relative"].values)[0][:, 0] * scl
    rel_ml = np.hstack(this_layer["presynaptic_coors_relative"].values)[0][:, 1] * scl
    abs_depth = np.hstack(this_layer["presynaptic_coors"].values)[0][:, 2] * scl
    plt.scatter(rel_ml, abs_depth, marker=".", color='k', alpha=0.5, ec='None', s=5)
    plt.scatter(
        np.zeros(len(this_layer)),
        this_layer["flatmap_z"] * scl,
        marker=".",
        color="darkorange",
        s=20,
        alpha=0.3,
        ec='None',
    )
    axl.set_xlim(-800, 800)
    axl.set_ylim(1250, -50)
    axl.set_title(f"Layer {layer[1:]}", fontsize=fontsize_dict["title"])
    axl.set_axis_off()
for il, layer in enumerate(['L1'] + layers):
    print(midlayer[il])
    axl.text(750, midlayer[il],layer[1:],  fontsize=fontsize_dict['legend'], horizontalalignment='center', verticalalignment='center')

# Raw counts
ax_counts = fig.add_axes([0.07, 0.35, 0.2, 0.2])
conn_mat.plot_area_by_area_connectivity(
    conn_mat.reorganise_matrix(counts_df, areas=areas),
    starter_counts,
    presynaptic_counts,
    ax_counts,
    input_fraction=False,
    odds_ratio=False,
    label_fontsize=fontsize_dict["label"],
    tick_fontsize=fontsize_dict["tick"],
    line_width=line_width,
)

# Input fraction
ax_input_fraction = fig.add_axes([0.29, 0.35, 0.2, 0.2])
ax_input_fraction_cb = fig.add_axes([0.50, 0.35, 0.01, 0.05])
conn_mat.plot_area_by_area_connectivity(
    conn_mat.reorganise_matrix(mean_input_fraction, areas=areas),
    starter_counts,
    presynaptic_counts,
    ax_input_fraction,
    input_fraction=True,
    odds_ratio=False,
    label_fontsize=fontsize_dict["label"],
    tick_fontsize=fontsize_dict["tick"],
    line_width=line_width,
    show_counts=False,
    cbax=ax_input_fraction_cb,
    cbar_label="Input\nfraction",
)
ax_input_fraction.set_ylabel("")
ax_input_fraction.set_yticks([])

# Confidence interval of input fraction
ax_input_fraction_bars = fig.add_axes([0.62, 0.37, 0.25, 0.18])
boot.plot_confidence_intervals(
    conn_mat.reorganise_matrix(mean_input_frac_df, areas=areas),
    conn_mat.reorganise_matrix(lower_df, areas=areas),
    conn_mat.reorganise_matrix(upper_df, areas=areas),
    ax_input_fraction_bars,
    label_fontsize=fontsize_dict["label"],
    tick_fontsize=fontsize_dict["tick"],
    line_width=line_width,
    orientation="horizontal",
)

# Schematics
ax_schematics = fig.add_axes([0.85, 0.37, 0.15, 0.18])
cax_schematics = fig.add_axes([0.98, 0.35, 0.01, 0.05])
positions = {l: p for l, p in zip(layers, [(2, 6), (0, 5), (2, 4), (0, 3), (2, 2)])}
fig, ax, cbar = conn_mat.connectivity_diagram_mpl(
    mean_input_fraction,
    lower_df,
    upper_df,
    connection_names=layers,
    positions=positions,
    display_names=[l[1:] for l in layers],
    node_style=dict(facecolor="Lightgray", radius=0.5, fontsize=fontsize_dict["title"]),
    min_fraction_cutoff=0.2,
    ci_to_alpha=False,
    ci_cmap="plasma_r",
    edge_width_scale=10,
    arrow_head_scale=20,
    arrow_style=dict(connectionstyle="Arc3, rad=-0.2", ec="k"),
    vmin=0,
    vmax=1,
    ax=ax_schematics,
    cax=cax_schematics,
)
cbar.set_ticks([0, 0.5, 1])


# Bubble plot input fraction
ax_bubble_plot_input_frac = fig.add_axes([0.07, 0.05, 0.2, 0.2])
ax_bubble_plot_input_frac_cb = fig.add_axes([0.29, 0.2, 0.01, 0.05])
conn_mat.bubble_plot(
    conn_mat.reorganise_matrix(input_fraction_log_ratio, areas=areas),
    conn_mat.reorganise_matrix(input_fraction_pval, areas=areas),
    alpha=0.05,
    size_scale=250,
    ax=ax_bubble_plot_input_frac,
    cbax=ax_bubble_plot_input_frac_cb,
    label_fontsize=fontsize_dict["label"],
    tick_fontsize=fontsize_dict["tick"],
)

# Output fraction
ax_output_fraction = fig.add_axes([0.45, 0.05, 0.2, 0.2])
ax_output_fraction_cb = fig.add_axes([0.66, 0.05, 0.01, 0.05])
# output_fraction = conn_mat.reorganise_matrix(output_fraction)
conn_mat.plot_area_by_area_connectivity(
    conn_mat.reorganise_matrix(output_fraction, areas=areas),
    starter_counts,
    presynaptic_counts,
    ax_output_fraction,
    cbax=ax_output_fraction_cb,
    cbar_label="Output\nfraction",
    input_fraction=True,
    odds_ratio=False,
    label_fontsize=fontsize_dict["label"],
    tick_fontsize=fontsize_dict["tick"],
    line_width=line_width,
    show_counts=False,
)

# Bubble plot input fraction
ax_bubble_plot_output_frac = fig.add_axes([0.78, 0.05, 0.2, 0.2])
conn_mat.bubble_plot(
    conn_mat.reorganise_matrix(output_fraction_log_ratio, areas=areas),
    conn_mat.reorganise_matrix(output_fraction_pval, areas=areas),
    alpha=0.05,
    size_scale=250,
    ax=ax_bubble_plot_output_frac,
    show_legend=False,
    label_fontsize=fontsize_dict["label"],
    tick_fontsize=fontsize_dict["tick"],
)

if save_fig:
    fig.savefig(save_path / f"conn_mat.pdf", format="pdf")
    fig.savefig(save_path / f"conn_mat.png", format="png")

In [None]:
from brisc.manuscript_analysis import distance_between_cells as dist_cells

relative_presyn_coords_flatmap, distancess_flatmap, starters_df = (
    dist_cells.determine_presynaptic_distances(cells_df, col_prefix="flatmap_")
)

plt.figure(figsize=(10, 5))
nboot = 1000
dist = np.zeros(len(layers))
dist_boot = np.zeros((len(layers), nboot))


def get_median_dist(starters):
    relative_ap = np.hstack(starters["presynaptic_coors_relative"].values)[0][:, 0]
    relative_ml = np.hstack(starters["presynaptic_coors_relative"].values)[0][:, 1]
    distances = ((relative_ml**2 + relative_ap**2) ** 0.5).astype(float)
    return np.nanmedian(distances)


for i, layer in enumerate(layers):

    plt.subplot(2, 6, i + 1)
    this_layer = starters_df[starters_df["cortical_layer"] == layer]
    rel_ap = np.hstack(this_layer["presynaptic_coors_relative"].values)[0][:, 0]
    rel_ml = np.hstack(this_layer["presynaptic_coors_relative"].values)[0][:, 1]

    abs_depth = np.hstack(this_layer["presynaptic_coors"].values)[0][:, 2]
    for iboot in range(nboot):
        dist_boot[i, iboot] = get_median_dist(
            this_layer.sample(replace=True, n=len(this_layer), axis=0)
        )
    plt.plot(rel_ml, abs_depth, ",k", alpha=0.5)
    # plt.plot(relative_ap, relative_ml, ',k', alpha=0.5)
    plt.plot(np.zeros(len(this_layer)), this_layer["flatmap_z"], ".", color="r")

    plt.gca().invert_yaxis()
    plt.axis("equal")
    plt.axis("off")
    plt.xlim([-100, 100])
    plt.ylim([100, 0])

    distances = ((rel_ml**2 + rel_ap**2) ** 0.5).astype(float)
    dist[i] = np.nanmedian(distances)
    print(dist[i])