In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import flexiznam as flz
import pandas as pd
import numpy as np

# Load data

Load big dataframe and add projected coordinates as well as retinotopy

In [None]:
# Load big dataframe
df_file = flz.get_processed_path(
    "becalia_rabies_barseq/BRAC8498.3e/analysis/cell_barcode_df.pkl"
)

full_df = pd.read_pickle(df_file)
# Add a slice id which is the concatenation of chamber and roi
full_df["slice_id"] = full_df["chamber"].astype(str) + "_" + full_df["roi"].astype(str)
print(f"Loaded {len(full_df)} cells")
barcoded_cells = full_df.query("main_barcode.notna()").copy()
print(f"Found {len(barcoded_cells)} barcoded cells")

In [None]:
# Add slice number to rabies cell properties
from iss_analysis.io import get_sections_info

section_infos = get_sections_info(project="becalia_rabies_barseq", mouse="BRAC8498.3e")
barcoded_cells["slice"] = np.nan
for islice, slice_prop in section_infos.iterrows():
    chamber, roi = slice_prop[["chamber", "roi"]]
    cell_this_slice = (barcoded_cells["chamber"] == chamber) & (
        barcoded_cells["roi"] == roi
    )
    barcoded_cells.loc[cell_this_slice, "slice"] = islice

In [None]:
# project barcoded cells on flatmap, both dorsal and top view
from brisc.exploratory_analysis.plot_summary_for_all_bc import get_projector

barcoded_cells["x_flat"] = np.nan
barcoded_cells["y_flat"] = np.nan
barcoded_cells["z_flat"] = np.nan
barcoded_cells["x_top"] = np.nan
barcoded_cells["y_top"] = np.nan
barcoded_cells["z_top"] = np.nan

flat_projector = get_projector()
cell_mask = barcoded_cells.query("ara_x > 0 and ara_y > 0 and ara_z > 0")
cell_mask = cell_mask.dropna(subset=["ara_x", "ara_y", "ara_z"])
coords = cell_mask[["ara_x", "ara_y", "ara_z"]].values
flat_coords = flat_projector.project_coordinates(
    coords * 1000,
    drop_voxels_outside_view_streamlines=False,
    hemisphere="right",
    view_space_for_other_hemisphere="flatmap_dorsal",
)
barcoded_cells.loc[cell_mask.index, "x_flat"] = flat_coords[:, 0]
barcoded_cells.loc[cell_mask.index, "y_flat"] = flat_coords[:, 1]
barcoded_cells.loc[cell_mask.index, "z_flat"] = flat_coords[:, 2]

top_projector = get_projector("top")
top_coords = top_projector.project_coordinates(
    coords * 1000,
    drop_voxels_outside_view_streamlines=False,
    hemisphere="right",
)
barcoded_cells.loc[cell_mask.index, "x_top"] = top_coords[:, 0]
barcoded_cells.loc[cell_mask.index, "y_top"] = top_coords[:, 1]
barcoded_cells.loc[cell_mask.index, "z_top"] = top_coords[:, 2]

In [None]:
# Add retinotopy
from cricksaw_analysis import atlas_utils

ara_elevation, ara_azimuth = atlas_utils.get_ara_retinotopic_map()
barcoded_cells["elevation"] = np.nan
barcoded_cells["azimuth"] = np.nan

# Keep only cells for which x_top and y_top are defined
cell_mask = barcoded_cells.dropna(subset=["x_top", "y_top"])
coords = cell_mask[["x_top", "y_top"]].values.astype(int)
barcoded_cells.loc[cell_mask.index, "elevation"] = ara_elevation[
    coords[:, 1], coords[:, 0]
]
barcoded_cells.loc[cell_mask.index, "azimuth"] = ara_azimuth[coords[:, 1], coords[:, 0]]

# Find starter for each cell

For cells for which there is one unique starter, label which one it is.

In [None]:
# First find barcodes that are in only one starter
starter_cells = barcoded_cells.query("is_starter == True")
starter_cells = starter_cells.query("cortical_area == 'VISp'")
starter2bc = starter_cells.all_barcodes.explode()
bc_cnt = starter2bc.value_counts()
unique_barcodes = bc_cnt[bc_cnt == 1].index
print(
    f"Found {len(unique_barcodes)} barcodes present in only 1 starter out of {len(bc_cnt)} in {len(starter_cells)} starter cells"
)

cell2bc = barcoded_cells.all_barcodes.explode()
unique_bc_cells = cell2bc[cell2bc.isin(unique_barcodes)]
print(
    f"Found {len(unique_bc_cells)} cells with unique barcodes (including the starters)"
)

barcoded_cells["is_unique_bc"] = False
barcoded_cells.loc[unique_bc_cells.index, "is_unique_bc"] = True
barcoded_cells["unique_bc"] = [set() for _ in range(len(barcoded_cells))]
# Find the corresponding starter index too
bc2starter = starter2bc.reset_index().set_index("all_barcodes")
barcoded_cells["starter_ids"] = [set() for _ in range(len(barcoded_cells))]
for mask, bc in unique_bc_cells.items():
    barcoded_cells.loc[mask, "unique_bc"].add(bc)
    barcoded_cells.loc[mask, "starter_ids"].add(bc2starter.loc[bc].mask_uid)

barcoded_cells["starter_id"] = np.nan
for idx, starter_ids in barcoded_cells.starter_ids.items():
    if len(starter_ids) == 1:
        barcoded_cells.loc[idx, "starter_id"] = list(starter_ids)[0]

In [None]:
from iss_preprocess.io.load import get_pixel_size

px_size = get_pixel_size(data_path="becalia_rabies_barseq/BRAC8498.3e/chamber_08")
dstraw = []
dstara = []
dstara_all = []
dstara_next_prev = []
for bc in unique_barcodes:
    bc_cells = cell2bc[cell2bc == bc].index
    presynaptic = barcoded_cells.loc[bc_cells].query("is_starter == False")
    # only v1
    presynaptic = presynaptic.query("cortical_area == 'VISp'")
    starter = bc2starter.loc[bc].mask_uid
    starter_prop = starter_cells.loc[starter]
    starter_slice = starter_prop["slice"]

    bc_cells_this_slice = presynaptic.query("slice == @starter_slice")
    bc_cells_prev_or_next = presynaptic.query(
        "slice == @starter_slice - 1 or slice == @starter_slice + 1"
    )
    start_raw = starter_prop[["x", "y"]].values.astype(float) * px_size
    start_ara = starter_prop[["ara_x", "ara_y", "ara_z"]].values.astype(float)
    dstara.append(
        np.linalg.norm(
            bc_cells_this_slice[["ara_x", "ara_y", "ara_z"]].values.astype(float)
            - start_ara,
            axis=1,
        )
    )
    dstraw.append(
        np.linalg.norm(
            bc_cells_this_slice[["x", "y"]].values.astype(float) * px_size - start_raw,
            axis=1,
        )
    )
    dstara_all.append(
        np.linalg.norm(
            presynaptic[["ara_x", "ara_y", "ara_z"]].values.astype(float) - start_ara,
            axis=1,
        )
    )
    dstara_next_prev.append(
        np.linalg.norm(
            bc_cells_prev_or_next[["ara_x", "ara_y", "ara_z"]].values.astype(float)
            - start_ara,
            axis=1,
        )
    )

dstara = np.hstack(dstara) * 1000
dstraw = np.hstack(dstraw)
dstara_all = np.hstack(dstara_all) * 1000
dstara_next_prev = np.hstack(dstara_next_prev) * 1000

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 4, figsize=(15, 4))
bins = np.arange(0, 1000, 20)
ax[0].hist(dstraw, bins=bins)
ax[1].hist(np.array(dstara), bins=bins)
ax[2].hist(np.array(dstara_all), bins=bins)
ax[3].hist(np.array(dstara_next_prev), bins=bins)
ax[0].set_title("Raw data, within slice")
ax[1].set_title("ARA space, within slice")
ax[2].set_title("ARA space, all slices")
ax[3].set_title("ARA space, previous and next slices only")

for x in ax:
    x.set_xlabel("Distance to starter (um)")

fig.tight_layout

In [None]:
# Reduce that to starter in V1
starter_cells = barcoded_cells.query("is_starter == True")
v1_starter_cells = starter_cells.query(
    "cortical_area == 'VISp' and is_unique_bc == True"
).copy()
print(f"Found {len(v1_starter_cells)} V1 starter cells")

cell_with_v1_starter = barcoded_cells.query(
    "starter_id.isin(@v1_starter_cells.index)"
).copy()
print(f"... and {len(cell_with_v1_starter)} cells with starter in V1")

In [None]:
# mini helper function to plot background flatmap and set axis limits
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.stats import binned_statistic_2d


xlim = (100, 1150)
ylim = (800, 1400)


def plot_flatmap(ax):
    atlas_utils.plot_flatmap(
        ax,
        hemisphere="right",
        area_colors={},
        alpha=0,
        ccf_streamlines_folder=None,
    )
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xlim(xlim)
    ax.set_ylim(ylim[::-1])


def plot_sp(ax, x, y, c, vmin=None, vmax=None, bins=None, **kwargs):
    kw = dict(cmap="jet", vmin=vmin, vmax=vmax)
    kw.update(kwargs)
    if bins is None:
        kw.setdefault("s", 5)
        sc = ax.scatter(
            x,
            y,
            c=c,
            **kw,
        )
    else:
        x_stat, xedge, yedge, binnumber = binned_statistic_2d(
            x,
            y,
            c,
            statistic="median",
            bins=bins,
        )
        sc = ax.imshow(
            x_stat.T,
            origin="lower",
            extent=[xedge[0], xedge[-1], yedge[0], yedge[-1]],
            **kw,
        )

    plot_flatmap(ax)
    ax_divider = make_axes_locatable(ax)
    cax = ax_divider.append_axes("right", size="2%", pad="0%")
    fig = ax.get_figure()
    cb = fig.colorbar(sc, cax=cax)
    return cb

In [None]:
# Plot the scatter of starter positions on the flatmap
import matplotlib.pyplot as plt


presynaptic_cells = cell_with_v1_starter.query("is_starter == False")


xflat2mm = lambda x: x / 100
yflat2mm = lambda y: y / 100

bins = [np.arange(0, 1250, 10), np.arange(850, 1500, 10)]

fig, axes = plt.subplots(3, 2, figsize=(12, 10))

clim = dict(
    x_flat=(
        xflat2mm(v1_starter_cells["x_flat"].min()),
        xflat2mm(v1_starter_cells["x_flat"].max()),
    ),
    y_flat=(
        yflat2mm(v1_starter_cells["y_flat"].min()),
        yflat2mm(v1_starter_cells["y_flat"].max()),
    ),
)
cb_lab = ["Starter M/L position (mm)", "Starter A/P position (mm)"]
for iax, coord in enumerate(["x_flat", "y_flat"]):
    f = xflat2mm if coord == "x_flat" else yflat2mm
    cb = plot_sp(
        axes[0, iax],
        v1_starter_cells.x_flat,
        v1_starter_cells.y_flat,
        c=f(v1_starter_cells[coord]),
        vmin=clim[coord][0],
        vmax=clim[coord][1],
        cmap="jet",
    )
    cb.set_label(cb_lab[iax])

    cb = plot_sp(
        axes[1, iax],
        presynaptic_cells.x_flat,
        presynaptic_cells.y_flat,
        c=f(v1_starter_cells.loc[presynaptic_cells.starter_id, coord]),
        vmin=clim[coord][0],
        vmax=clim[coord][1],
        # bins=bins,
        cmap="jet",
    )
    cb.set_label(cb_lab[iax])

    reti = "azimuth" if coord == "x_flat" else "elevation"
    cb = plot_sp(
        axes[2, iax],
        presynaptic_cells.x_flat,
        presynaptic_cells.y_flat,
        c=v1_starter_cells.loc[presynaptic_cells.starter_id, reti],
        # bins=bins,
        cmap="rainbow",
    )
    cb.set_label(f"Starter {reti} (°)")

axes[0, 0].set_ylabel("Starter cells")
axes[1, 0].set_ylabel("Presynaptic cells")
axes[2, 0].set_ylabel("Presynaptic cells")
fig.tight_layout()

In [None]:
# Function to calculate weighted median
import brainglobe_atlasapi as bga

presynaptic_x = presynaptic_cells.x_flat.values
presynaptic_y = presynaptic_cells.y_flat.values


pixel_size = 100
xx, yy = np.meshgrid(
    np.arange(xlim[0], xlim[1], pixel_size / 100),
    np.arange(ylim[0], ylim[1], pixel_size / 100),
)

bregma = np.array([540, 44, 570])


def weighted_median(values, weights):
    """Return the weighted median of values.

    Args:
        values (np.array): values to compute the median of
        weights (np.array): weights of each value, must be positive

    Returns:
        float: the weighted median of values
    """
    i = np.argsort(values)
    c = np.cumsum(weights[i])
    return values[i[np.searchsorted(c, 0.5 * c[-1])]]


atlas = bga.bg_atlas.BrainGlobeAtlas("allen_mouse_10um")
midline = int(atlas.shape[2] / 2)


def weighted_median_map(
    xvalues,
    yvalues,
    cvalues,
    distance_threshold=500,
    min_cells=5,
    xlim=xlim,
    ylim=ylim,
    pixel_size=10,
    map_pixel_size=100,
    sigma=500,
    use_weights=True,
):
    xvalues = xvalues * pixel_size
    yvalues = yvalues * pixel_size
    # remove nans
    mask = np.isnan(cvalues) | np.isnan(xvalues) | np.isnan(yvalues)
    xvalues = xvalues[~mask]
    yvalues = yvalues[~mask]
    cvalues = cvalues[~mask]
    xranges = np.arange(xlim[0] * pixel_size, xlim[1] * pixel_size, map_pixel_size)
    yranges = np.arange(ylim[0] * pixel_size, ylim[1] * pixel_size, map_pixel_size)
    weighted_median_values = np.zeros((len(yranges), len(xranges)))
    for ix, x in enumerate(xranges):
        for iy, y in enumerate(yranges):
            # keep cells within distance_threshold um
            dst2cells = (xvalues - x) ** 2 + (yvalues - y) ** 2
            cell_mask = dst2cells < distance_threshold**2
            if np.sum(cell_mask) <= min_cells:
                value = np.nan
            elif np.sum(cell_mask) == 1:
                value = np.nan = cvalues[cell_mask]
            elif use_weights:
                value = weighted_median(
                    cvalues[cell_mask],
                    np.exp(-dst2cells[cell_mask] ** 2 / (2 * sigma**2)),
                )
            else:
                value = np.nanmedian(cvalues[cell_mask])
            weighted_median_values[iy, ix] = value
    return weighted_median_values

In [None]:
# shuffle 1000 times and recompute the running average
nshuffle = 1000
scale = 10 / 1000
starter_id = presynaptic_cells.starter_id
# get posiitions in mm
pre_x = presynaptic_x * scale
st_x = v1_starter_cells.loc[starter_id, "x_flat"].values * scale
nan = np.isnan(st_x) | np.isnan(pre_x)
pre_x = pre_x[~nan]
st_x = st_x[~nan]

bin_centers = np.arange(pre_x.min(), pre_x.max(), 10.0 / 1000)
bw = 300.0 / 1000


def running_med(x, y, bin_centers, bw):
    running_med = np.zeros_like(bin_centers)
    for i, bc in enumerate(bin_centers):
        mask = (x > (bc - bw / 2)) & (x < (bc + bw / 2))
        running_med[i] = np.nanmedian(y[mask])
    return running_med


def running_mean(x, y, bin_centers, bw):
    running_mean = np.zeros_like(bin_centers)
    for i, bc in enumerate(bin_centers):
        mask = (x > (bc - bw / 2)) & (x < (bc + bw / 2))
        running_mean[i] = np.nanmean(y[mask])
    return running_mean


shuffled_med = np.zeros((nshuffle, len(bin_centers)))
shuffled_mean = np.zeros((nshuffle, len(bin_centers)))
for i in range(nshuffle):
    if i % 100 == 0:
        print(f"Shuffle {i}", flush=True)
    perm_x = np.random.permutation(st_x)
    shuffled_med[i] = running_med(pre_x, perm_x, bin_centers, bw)
    shuffled_mean[i] = running_mean(pre_x, perm_x, bin_centers, bw)

# Plot the running median
fig, ax = plt.subplots()
ax.fill_between(
    bin_centers,
    np.nanpercentile(shuffled_mean, 2.5, axis=0),
    np.nanpercentile(shuffled_mean, 97.5, axis=0),
    alpha=0.5,
    label="95% CI",
)
ax.plot(bin_centers, running_mean(pre_x, st_x, bin_centers, bw), label="Data")
ax.set_xlabel("Starter M/L position (mm)")

In [None]:
import seaborn as sns

cb_lab = "Starter position (mm)"
scale = 10 / 1000
clims = (5.1, 7.1)
cmap = "rainbow"
fig = plt.figure(figsize=(12, 13))

prop2plot = "x_flat"


ax = plt.subplot2grid((5, 1), (0, 0), rowspan=3)
# bt_ax = plt.subplot2grid((5, 1), (3, 0), rowspan=1)
# ax_z = plt.subplot2grid((5, 1), (4, 0), rowspan=1)
plot_flatmap(ax)

med_im = weighted_median_map(
    presynaptic_x,
    presynaptic_y,
    v1_starter_cells.loc[starter_id, prop2plot].values * scale,
    distance_threshold=300,
    sigma=5000,
    use_weights=False,
    map_pixel_size=50,
)
im = ax.imshow(
    med_im,
    origin="lower",
    extent=[xlim[0], xlim[1], ylim[0], ylim[1]],
    cmap=cmap,
    vmin=clims[0],
    vmax=clims[1],
)

v1_min = int(v1_starter_cells.x_flat.min())
v1_max = int(v1_starter_cells.x_flat.max())
ax.axvline(v1_min, color="k", linestyle="--", alpha=0.5)
ax.axvline(v1_max, color="k", linestyle="--", alpha=0.5)

ax_divider = make_axes_locatable(ax)
if True:
    cax = ax_divider.append_axes("right", size="2%", pad="0%")
    fig = ax.get_figure()
    cb = fig.colorbar(im, cax=cax)
    cb.set_label(cb_lab)


st_ax = ax_divider.append_axes("top", size="2%", pad="0%")
im = np.zeros((10, xlim[1]))
im += np.arange(xlim[1], dtype=float).reshape(1, -1) * scale
im[:, :v1_min] = np.nan
im[:, v1_max:] = np.nan
st_ax.imshow(im, cmap=cmap, vmin=clims[0], vmax=clims[1])
st_ax.set_aspect("auto")
st_ax.set_xlim(xlim)
st_ax.set_yticks([])
xt = np.arange(200, 1100, 200)
xtl = [f"{x*scale:.0f}" for x in xt]
st_ax.set_xticks(xt)
st_ax.set_xticklabels(xtl)
st_ax.xaxis.set_label_position("top")
st_ax.xaxis.tick_top()

ax.scatter(
    v1_starter_cells.x_flat,
    v1_starter_cells.y_flat,
    c=v1_starter_cells[prop2plot].values * scale,
    cmap=cmap,
    vmin=clims[0],
    vmax=clims[1],
    ec="k",
    lw=0.5,
    s=20,
)


bt_ax = ax_divider.append_axes("bottom", size="50%", pad="0%")
bt_ax.set_aspect("equal")
bt_ax.set_ylabel("Starter position (mm)")
pre_x = presynaptic_x * scale
st_x = v1_starter_cells.loc[starter_id, "x_flat"].values * scale
nan = np.isnan(st_x) | np.isnan(pre_x)
pre_x = pre_x[~nan]
st_x = st_x[~nan]

almost_black = np.array([1, 1, 1]) * 0.1
bt_ax.scatter(pre_x, st_x, color=almost_black, s=5, alpha=0.2)
# sns.kdeplot(x=pre_x, y=st_x, ax=bt_ax, fill=False, color='royalblue', levels=7, thresh=0.05)
bt_ax.axvline(v1_min * scale, color="k", linestyle="--", alpha=0.5)
bt_ax.axvline(v1_max * scale, color="k", linestyle="--", alpha=0.5)
bt_ax.fill_between(
    bin_centers,
    np.nanpercentile(shuffled_mean, 2.5, axis=0),
    np.nanpercentile(shuffled_mean, 97.5, axis=0),
    color="k",
    alpha=0.2,
    label="95% CI",
)
bt_ax.plot(
    bin_centers, running_med(pre_x, st_x, bin_centers, bw), color="royalblue", lw=2
)
bt_ax.set_xlim(np.array(xlim) * scale)
bt_ax.set_xticklabels([])

az_ax = ax_divider.append_axes("bottom", size="20%", pad="0%")
azi = presynaptic_cells.azimuth.values
pre_x = presynaptic_x * scale
bad = np.isnan(azi) | np.isnan(pre_x)
azi = azi[~bad]
pre_x = pre_x[~bad]
az_ax.plot(
    bin_centers, running_med(pre_x, azi, bin_centers, bw), color=almost_black, lw=2
)
az_ax.set_xlim(np.array(xlim) * scale)
az_ax.axvline(v1_min * scale, color="k", linestyle="--", alpha=0.5)
az_ax.axvline(v1_max * scale, color="k", linestyle="--", alpha=0.5)
az_ax.set_xlabel("Presynaptic Cell Position (mm)")
az_ax.set_ylabel("Azimuth (°)")


fig.tight_layout()

In [None]:
x = presynaptic_cells.x_flat
xflat = v1_starter_cells.loc[presynaptic_cells.starter_id, "x_flat"].values

from scipy.stats import gaussian_kde

coords = np.vstack([x, xflat])
coords = coords[:, ~np.isnan(coords).any(axis=0)]
kde = gaussian_kde(coords, bw_method=300)

# Distribution of presynaptic on flatmap by layer

In [None]:
layers = ["L1", "L2/3", "L4", "L5", "L6a", "L6b"]

coord_to_use = ["x_flat", "y_flat"]
relative_coords_by_layer_2d = dict()
for layer, st in starter_cells.groupby("cortical_layer"):
    usable_starter = st.query("is_unique_bc == True")
    usable_starter = usable_starter.query("cortical_area == 'VISp'")
    usable_starter = usable_starter.dropna(subset=coord_to_use)
    print(f"{layer}: {len(st)}, unique: {len(usable_starter)}")
    all_cells = []
    for cell, series in usable_starter.iterrows():
        starter_slice = series.slice_id
        pres = presynaptic_cells.query("starter_id == @cell")
        pres = pres.query("cortical_area == 'VISp'")
        pres = pres.query("slice_id == @starter_slice")
        if len(pres) == 0:
            continue
        coords = pres[coord_to_use].values.astype(float)
        st_coord = series[coord_to_use].values.astype(float)
        all_cells.append(coords - st_coord)
    if len(all_cells) > 0:
        all_cells = np.vstack(all_cells)
    else:
        all_cells = np.zeros((0, len(coord_to_use)))

    relative_coords_by_layer_2d[layer] = all_cells

In [None]:
coords = relative_coords_by_layer_2d["L5"].astype(float) * 10
dst = np.sqrt(np.sum(coords**2, axis=1))
dst = dst[~np.isnan(dst)]
logdst = np.log(dst)
norm_fit = stats.norm.fit(logdst)
print(norm_fit)
_ = plt.hist(logdst, bins=100, density=True)
# plot th fit
x = np.linspace(logdst.min(), logdst.max(), 100)
y = stats.norm.pdf(x, *norm_fit)
plt.plot(x, y)

In [None]:
from scipy import stats

fig, axes = plt.subplots(3, 2, figsize=(5, 5))

for i, layer in enumerate(layers):
    ax = axes.flatten()[i]
    coords = relative_coords_by_layer_2d[layer].astype(float) * 10
    dst = np.sqrt(np.sum(coords**2, axis=1))
    dst = dst[~np.isnan(dst)]
    if len(dst) == 0:
        continue
    logdst = np.log(dst)
    norm_fit = stats.norm.fit(logdst)
    h, bedge, bar = ax.hist(dst, bins=np.arange(0, 1000, 50), density=True)
    med = np.nanmedian(dst)
    mean_dst = np.nanmean(dst)
    ax.scatter(
        med,
        1,
        color="royalblue",
        marker="v",
        transform=ax.get_xaxis_transform(),
        clip_on=False,
    )
    x = np.linspace(dst.min(), dst.max(), 100)
    y = np.exp(stats.norm.pdf(np.log(x), *norm_fit)) - 1
    # ax.plot(x, y/y.max()*h.max(), color=almost_black)
    if layer in ["L2/3", "L5"]:
        print(f"{layer}:\nmedian dst {med:.0f}um, mean dst {mean_dst:.0f}um")
        print(f"Fit parameters: loc={norm_fit[0]:.2f}, scale={norm_fit[1]:.1f}")
        logstd = stats.norm.std(*norm_fit)
        logmean = np.exp(stats.norm.mean(*norm_fit))
        logconfint = stats.norm.interval(0.95, *norm_fit)
        logmedian = stats.norm.median(*norm_fit)
        print(
            f"Norm std: {logstd:.0f}um, 95% CI: {logconfint[0]:.0f}-{logconfint[1]:.0f}um, mean: {logmean:.0f}um, median: {logmedian:.0f}um"
        )
    ax.set_title(layer)
fig.tight_layout()
axes[2, 0].set_xlabel("Distance to starter (um)")
axes[2, 1].set_xlabel("Distance to starter (um)")

In [None]:
# Do gaussian kernel density estimation of the relative coordinates
import seaborn as sns

layers = ["L1", "L2/3", "L4", "L5", "L6a", "L6b"]


scale = 10
coord_to_use = ["x_flat", "y_flat", "z_flat"]
relative_coords_by_layer = dict()
relative_not_z = dict()
starter_flat_coor_bylayer = dict()
for layer, st in starter_cells.groupby("cortical_layer"):
    usable_starter = st.query("is_unique_bc == True")
    usable_starter = usable_starter.query("cortical_area == 'VISp'")
    usable_starter = usable_starter.dropna(subset=coord_to_use)
    print(f"{layer}: {len(st)}, unique: {len(usable_starter)}")
    all_cells = []
    all_not_z = []
    all_st = []
    for cell, series in usable_starter.iterrows():
        pres = presynaptic_cells.query("starter_id == @cell")
        pres = pres.query("cortical_area == 'VISp'")
        if len(pres) == 0:
            continue
        coords = pres[coord_to_use].values.astype(float)
        st_coord = series[coord_to_use].values.astype(float)
        all_st.append(st_coord.copy())
        all_cells.append(coords - st_coord)
        if len(coord_to_use) == 3:
            st_coord[2] = 0
        all_not_z.append(coords - st_coord)
    if len(all_cells) > 0:
        all_cells = np.vstack(all_cells)
        all_not_z = np.vstack(all_not_z)
        all_st = np.vstack(all_st)
    else:
        all_cells = np.zeros((0, len(coord_to_use)))
        all_not_z = np.zeros((0, len(coord_to_use)))
        all_st = np.zeros((0, len(coord_to_use)))
    relative_coords_by_layer[layer] = all_cells
    relative_not_z[layer] = all_not_z
    starter_flat_coor_bylayer[layer] = all_st


colors = sns.color_palette(n_colors=len(layers))[::-1]
xlims = [-1000, 1000]
fig, axes = plt.subplots(4, 2, figsize=(6, 12))
for iax, layer in enumerate(layers):
    ax = axes.flat[iax]
    valid_coords = relative_not_z[layer] * scale
    valid_coords = valid_coords[~np.isnan(valid_coords).any(axis=1)]
    valid_coords = valid_coords[valid_coords[:, 0] > xlims[0]]
    valid_coords = valid_coords[valid_coords[:, 0] < xlims[1]]
    print(f"{layer}: {len(valid_coords)}. Max Z: {valid_coords[:, 2].max()}")
    sns.kdeplot(
        x=valid_coords[:, 0],
        y=valid_coords[:, 2],
        ax=ax,
        color=colors[iax],
        bw_adjust=0.7,
        levels=5,
    )
    ax.scatter(
        valid_coords[:, 0], valid_coords[:, 2], color="k", alpha=0.2, marker="o", s=6
    )

    if len(valid_coords) > 700:
        sns.kdeplot(
            x=valid_coords[:, 0],
            y=valid_coords[:, 2],
            ax=axes[-1],
            cmap="Purples" if layer == "L2/3" else "Greens",
            bw_adjust=0.7,
            levels=10,
            alpha=1,
            label=layer,
            fill=False,
        )
    ax_divider = make_axes_locatable(ax)
    top_ax = ax_divider.append_axes("top", size="30%", pad="0%")
    top_ax.hist(
        valid_coords[:, 0],
        bins=np.arange(-1000, 1000, 50),
        color=colors[iax],
        density=True,
    )
    top_ax.set_yticks([])
    top_ax.set_xticks([])
    top_ax.set_xlim(-1000, 1000)
    top_ax.set_title(layer)
    right_ax = ax_divider.append_axes("right", size="20%", pad="0%")
    right_ax.hist(
        valid_coords[:, 2],
        bins=np.arange(0, 1200, 50),
        color=colors[iax],
        density=True,
        orientation="horizontal",
    )
    right_ax.set_xticks([])
    right_ax.set_yticks([])
    right_ax.set_ylim(1200, 0)


for iax, x in enumerate(axes.flat):
    x.set_aspect("equal")
    x.axvline(0, color="grey", lw=0.5, ls="--")
    x.set_xlim(-1000, 1000)
    x.set_ylim(1200, 0)
    x.set_ylabel("Cortical Depth (um)")
axes[-1].set_xlabel("Lateral distance to starter (um)")
_ = fig.tight_layout()

In [None]:
# Do a line plot on the M/L axis
x = presynaptic_cells.x_flat
y = presynaptic_cells.y_flat
xflat = v1_starter_cells.loc[presynaptic_cells.starter_id, "x_flat"].values
azi = v1_starter_cells.loc[presynaptic_cells.starter_id, "azimuth"].values
ele = v1_starter_cells.loc[presynaptic_cells.starter_id, "elevation"].values

# running median of xflat and azi along x
dx = 30
xrange = np.arange(x.min(), x.max())
xfl_range = np.zeros_like(xrange)
az_range = np.zeros_like(xrange)
ele_range = np.zeros_like(xrange)
for i, xx in enumerate(xrange):
    mask = (x > xx - dx / 2) & (x < xx + dx / 2)
    if np.sum(mask) > 0:
        xfl_range[i] = np.nanmedian(xflat[mask])
        az_range[i] = np.nanmedian(azi[mask])
        ele_range[i] = np.nanmedian(ele[mask])
    else:
        xfl_range[i] = np.nan
        az_range[i] = np.nan
        ele_range[i] = np.nan


plt.subplot(3, 1, 1)
plt.plot(xrange, xfl_range)
plt.ylabel("M/L position (mm)")
plt.subplot(3, 1, 2)
plt.plot(xrange, az_range)
plt.ylabel("Azimuth (°)")

plt.subplot(3, 1, 3)
plt.plot(xrange, ele_range)
plt.ylabel("Elevation (°)")
plt.xlabel("X Range")

In [None]:
# shuffle v 1000 times and recomputer the running median
nshuffle = 10000
xfl_range_shuffle = np.zeros((nshuffle, len(xrange)))
az_range_shuffle = np.zeros((nshuffle, len(xrange)))
ele_range_shuffle = np.zeros((nshuffle, len(xrange)))
for i in range(nshuffle):
    if i % 100 == 0:
        print(f"Shuffle {i}", flush=True)
    perm_x = np.random.permutation(x)
    for j, xx in enumerate(xrange):
        mask = (perm_x > xx - dx / 2) & (perm_x < xx + dx / 2)
        if np.sum(mask):
            xfl_range_shuffle[i, j] = np.nanmedian(xflat[mask])
            az_range_shuffle[i, j] = np.nanmedian(azi[mask])
            ele_range_shuffle[i, j] = np.nanmedian(ele[mask])
        else:
            xfl_range_shuffle[i, j] = np.nan
            az_range_shuffle[i, j] = np.nan
            ele_range_shuffle[i, j] = np.nan

In [None]:
prop_names = ["M/L position (mm)", "Azimuth (°)", "Elevation (°)"]
props_shuffle = [xfl_range_shuffle, az_range_shuffle, ele_range_shuffle]
props = [xfl_range, az_range, ele_range]

fig, axes = plt.subplots(3, 1, figsize=(7, 7))
unity = np.arange(500, 700, 10) / 100
axes[0].plot(unity, unity, color="grey", linestyle="--")
for iax in range(3):
    ax = axes[iax]
    scale = 1 if iax else 1 / 100
    low, med, high = np.nanpercentile(props_shuffle[iax] * scale, [5, 50, 95], axis=0)
    ax.fill_between(xrange / 100, low, high, alpha=0.3, color="black")
    ax.plot(xrange / 100, med, color="black")
    ax.plot(xrange / 100, props[iax] * scale, lw=2, color="royalblue")
    ax.set_ylabel(prop_names[iax])
    ax.set_xlim(xrange[0] / 100, xrange[-1] / 100)
ax.set_xlabel("M/L position (mm)")
plt.tight_layout()