In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from brainglobe_atlasapi import BrainGlobeAtlas
from pprint import pprint
import iss_preprocess as iss

bg_atlas = BrainGlobeAtlas("allen_mouse_10um", check_latest=False)

In [None]:
data_path = "becalia_rabies_barseq/BRAC8498.3e/chamber_07/"
processed_path = iss.io.get_processed_path(data_path)
ara_barcode_spots = pd.read_pickle(processed_path.parent / "ara_barcode_spots.pkl")
ara_starters = pd.read_pickle(processed_path.parent / "ara_starter_cells.pkl")

In [None]:
if False:
    # Get unique barcodes present in both DataFrames
    common_barcodes = set(ara_barcode_spots['barcode'].unique()) & set(ara_starters['main_barcode'].unique())

    # Filter DataFrames to only include common barcodes
    ara_barcode_spots = ara_barcode_spots[ara_barcode_spots['barcode'].isin(common_barcodes)]
    ara_starters = ara_starters[ara_starters['main_barcode'].isin(common_barcodes)]

In [None]:
ara_starters = ara_starters[ara_starters["max_n_spots"] > 5]

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Filtering data
starters = ara_starters[ara_starters['starter'] == True]
non_starters = ara_starters[ara_starters['starter'] == False]

# Creating the confusion matrix
confusion_matrix = pd.DataFrame(0, index=non_starters['area_acronym'].unique(), columns=starters['area_acronym'].unique())

for _, starter_row in starters.iterrows():
    main_barcode = starter_row['main_barcode']
    starter_area = starter_row['area_acronym']
    
    linked_non_starters = non_starters[non_starters['main_barcode'] == main_barcode]
    for _, non_starter_row in linked_non_starters.iterrows():
        non_starter_area = non_starter_row['area_acronym']
        confusion_matrix.loc[non_starter_area, starter_area] += 1

# Sort the confusion matrix by index and columns alphabetically
confusion_matrix = confusion_matrix.sort_index(axis=0).sort_index(axis=1)

#Remove rows and columns with all zeros
filtered_confusion_matrix = confusion_matrix.loc[(confusion_matrix != 0).any(axis=1)]
filtered_confusion_matrix = filtered_confusion_matrix.loc[:, (filtered_confusion_matrix != 0).any(axis=0)]

# Plotting the confusion matrix using seaborn heatmap
plt.figure(figsize=(20, 18), dpi=80)
sns.heatmap(filtered_confusion_matrix, annot=True, fmt="d", cmap="YlGnBu", cbar=True, yticklabels=True)
plt.title('Confusion Matrix')
plt.xlabel('Starter Area Acronym')
plt.ylabel('Non-Starter Area Acronym')
plt.show()

In [None]:
import numpy as np

def get_ancestor_rank1(area_acronym):
    try:
        ancestors = bg_atlas.get_structure_ancestors(area_acronym)
        if "TH" in ancestors:
            return "TH"
        elif "RSP" in ancestors:
            return "RSP"
        elif "TEa" in ancestors:
            return "TEa"
        elif "AUD" in ancestors:
            return "AUD"
        elif "VISp" in ancestors:
            return area_acronym
        elif "VIS" in ancestors:
            return ancestors[-1]
        else:
            return ancestors[1] if len(ancestors) > 1 else 'Unknown'
    except KeyError:
        return 'Unknown'

ara_starters["area_acronym_ancestor_rank1"] = ara_starters["area_acronym"].apply(get_ancestor_rank1)

# Filtering data
starters = ara_starters[ara_starters['starter'] == True]
non_starters = ara_starters[ara_starters['starter'] == False]

# Creating the confusion matrix
confusion_matrix = pd.DataFrame(0, index=non_starters['area_acronym_ancestor_rank1'].unique(), columns=starters['area_acronym_ancestor_rank1'].unique())

for _, starter_row in starters.iterrows():
    main_barcode = starter_row['main_barcode']
    starter_area = starter_row['area_acronym_ancestor_rank1']
    
    linked_non_starters = non_starters[non_starters['main_barcode'] == main_barcode]
    for _, non_starter_row in linked_non_starters.iterrows():
        non_starter_area = non_starter_row['area_acronym_ancestor_rank1']
        confusion_matrix.loc[non_starter_area, starter_area] += 1

# Sort the confusion matrix by index and columns alphabetically
confusion_matrix = confusion_matrix.sort_index(axis=0).sort_index(axis=1)

# Remove rows and columns with all zeros
filtered_confusion_matrix = confusion_matrix.loc[(confusion_matrix != 0).any(axis=1)]
filtered_confusion_matrix = filtered_confusion_matrix.loc[:, (filtered_confusion_matrix != 0).any(axis=0)]

# Remove specific unwanted rows from the confusion matrix
filtered_confusion_matrix = filtered_confusion_matrix.drop("Unknown", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("fiber tracts", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("grey", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("ECT", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("Unknown", axis=1)
filtered_confusion_matrix = filtered_confusion_matrix.drop("fiber tracts", axis=1)



# Filter to include only areas of interest
areas_of_interest = [
    'VISp1',
    'VISp2/3',
    'VISp4',
    'VISp5',
    'VISp6a',
    'VISp6b',
    'VISal',
    'VISpm',
    'VISl',
    'VISli',
    'AUD',
    'RSP',
    'TEa',
    'TH',
]
filtered_confusion_matrix = filtered_confusion_matrix.reindex(index=areas_of_interest, columns=areas_of_interest, fill_value=0)

filtered_confusion_matrix = filtered_confusion_matrix.loc[areas_of_interest, areas_of_interest]

# Plotting the fully normalized confusion matrix using seaborn heatmap
plt.figure(figsize=(20, 18), dpi=80)

# Define a mask to hide zero values
mask = filtered_confusion_matrix == 0

# Plot the heatmap with zero values masked
ax = sns.heatmap(filtered_confusion_matrix, cmap="magma_r", cbar=False, yticklabels=True, square=True, linewidths=1, linecolor='white', mask=mask, annot=False, vmax=1100)
ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')

# Annotate with appropriate color based on background
for (i, j), val in np.ndenumerate(filtered_confusion_matrix):
    if not mask.iloc[i, j]:
        text_color = 'white' if val > 600 else 'black'
        ax.text(j + 0.5, i + 0.5, f'{val}', ha='center', va='center', color=text_color, fontsize=15)

# Highlight the diagonal with a black outline
for i in range(filtered_confusion_matrix.shape[0]):
    ax.add_patch(plt.Rectangle((i, i), 1, 1, fill=False, edgecolor='black', lw=3))
ax.add_patch(plt.Rectangle((0, 0), 14, 14, fill=False, edgecolor='dimgrey', lw=3))

# Highlight the last 6x6 region with a black outline
ax.add_patch(plt.Rectangle((0, 0), 6, 6, fill=False, edgecolor='red', lw=5))
# Adjust the limits of the x and y axes to avoid cutting off the outer edges
ax.set_xlim(-0.5, filtered_confusion_matrix.shape[1] - 0.5 + 1)
ax.set_ylim(filtered_confusion_matrix.shape[0] - 0.5 + 1, -0.5)

ax.add_patch(plt.Rectangle((6, 6), 4, 4, fill=False, edgecolor='red', lw=5))

for label in ax.get_yticklabels():
    x, y = label.get_position()
    label.set_position((x + 0.025, y))
ax.tick_params(axis='both', width=0)

for label in ax.get_xticklabels():
    x, y = label.get_position()
    label.set_position((x, y  - 0.025))

#plt.title('Fully Normalized Confusion Matrix (Row-wise then Column-wise)')
plt.xlabel('Starter cell location', fontsize=20, labelpad=20)
plt.ylabel('Presynaptic cell location', fontsize=20, labelpad=20)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.show()

In [None]:
import numpy as np

def get_ancestor_rank1(area_acronym):
    try:
        ancestors = bg_atlas.get_structure_ancestors(area_acronym)
        if "TH" in ancestors:
            return "TH"
        elif "RSP" in ancestors:
            return "RSP"
        elif "TEa" in ancestors:
            return "TEa"
        elif "AUD" in ancestors:
            return "AUD"
        elif "VISp" in ancestors:
            return area_acronym
        elif "VIS" in ancestors:
            return ancestors[-1]
        else:
            return ancestors[1] if len(ancestors) > 1 else 'Unknown'
    except KeyError:
        return 'Unknown'

ara_starters["area_acronym_ancestor_rank1"] = ara_starters["area_acronym"].apply(get_ancestor_rank1)

# Filtering data
starters = ara_starters[ara_starters['starter'] == True]
non_starters = ara_starters[ara_starters['starter'] == False]

# Creating the confusion matrix
confusion_matrix = pd.DataFrame(0, index=non_starters['area_acronym_ancestor_rank1'].unique(), columns=starters['area_acronym_ancestor_rank1'].unique())

for _, starter_row in starters.iterrows():
    main_barcode = starter_row['main_barcode']
    starter_area = starter_row['area_acronym_ancestor_rank1']
    
    linked_non_starters = non_starters[non_starters['main_barcode'] == main_barcode]
    for _, non_starter_row in linked_non_starters.iterrows():
        non_starter_area = non_starter_row['area_acronym_ancestor_rank1']
        confusion_matrix.loc[non_starter_area, starter_area] += 1

# Sort the confusion matrix by index and columns alphabetically
confusion_matrix = confusion_matrix.sort_index(axis=0).sort_index(axis=1)

# Remove rows and columns with all zeros
filtered_confusion_matrix = confusion_matrix.loc[(confusion_matrix != 0).any(axis=1)]
filtered_confusion_matrix = filtered_confusion_matrix.loc[:, (filtered_confusion_matrix != 0).any(axis=0)]

# Remove specific unwanted rows from the confusion matrix
filtered_confusion_matrix = filtered_confusion_matrix.drop("Unknown", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("fiber tracts", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("grey", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("ECT", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("Unknown", axis=1)
filtered_confusion_matrix = filtered_confusion_matrix.drop("fiber tracts", axis=1)



# Filter to include only areas of interest
areas_of_interest = [
    'VISp1',
    'VISp2/3',
    'VISp4',
    'VISp5',
    'VISp6a',
    'VISp6b',
    'VISal',
    'VISpm',
    'VISl',
    'VISli',
    'AUD',
    'RSP',
    'TEa',
    'TH',
]
filtered_confusion_matrix = filtered_confusion_matrix.reindex(index=areas_of_interest, columns=areas_of_interest, fill_value=0)

filtered_confusion_matrix = filtered_confusion_matrix.loc[areas_of_interest, areas_of_interest]

# Normalize the confusion matrix by sum of starters in a column
number_of_starters_per_area = ara_starters[ara_starters["starter"] == True]["area_acronym_ancestor_rank1"].value_counts()
number_of_starters_per_area = number_of_starters_per_area.reindex(areas_of_interest, fill_value=0)

normalized_confusion_matrix = filtered_confusion_matrix.div(number_of_starters_per_area, axis=1)

# Plotting the fully normalized confusion matrix using seaborn heatmap
plt.figure(figsize=(20, 18), dpi=80)

# Define a mask to hide zero values
mask = normalized_confusion_matrix == 0

# Plot the heatmap with zero values masked
ax = sns.heatmap(normalized_confusion_matrix, cmap="magma_r", cbar=False, yticklabels=True, square=True, linewidths=1, linecolor='white', mask=mask, annot=False) #, vmax=2000
ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')

# Annotate with appropriate color based on background
for (i, j), val in np.ndenumerate(normalized_confusion_matrix):
    if not mask.iloc[i, j]:
        text_color = 'white' if val > 3 else 'black'
        #if val is nan, ignore it
        if val == val:
            ax.text(j + 0.5, i + 0.5, f'{val:.2f}', ha='center', va='center', color=text_color, fontsize=15)

# Highlight the diagonal with a black outline
for i in range(normalized_confusion_matrix.shape[0]):
    ax.add_patch(plt.Rectangle((i, i), 1, 1, fill=False, edgecolor='black', lw=3))
ax.add_patch(plt.Rectangle((0, 0), 14, 14, fill=False, edgecolor='dimgrey', lw=3))

# Highlight the last 6x6 region with a black outline
ax.add_patch(plt.Rectangle((0, 0), 6, 6, fill=False, edgecolor='red', lw=5))
# Adjust the limits of the x and y axes to avoid cutting off the outer edges
ax.set_xlim(-0.5, normalized_confusion_matrix.shape[1] - 0.5 + 1)
ax.set_ylim(normalized_confusion_matrix.shape[0] - 0.5 + 1, -0.5)

ax.add_patch(plt.Rectangle((6, 6), 4, 4, fill=False, edgecolor='red', lw=5))

for label in ax.get_yticklabels():
    x, y = label.get_position()
    label.set_position((x + 0.025, y))
ax.tick_params(axis='both', width=0)

for label in ax.get_xticklabels():
    x, y = label.get_position()
    label.set_position((x, y  - 0.025))



plt.title('Number of presynaptics per starter Confusion Matrix', fontsize=30, pad=20)
plt.xlabel('Starter cell location', fontsize=20, labelpad=20)
plt.ylabel('Presynaptic cell location', fontsize=20, labelpad=20)
#offset the x-axis labels to account for the adjusted x-axis limits
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)

plt.show()

In [None]:
import numpy as np

def get_ancestor_rank1(area_acronym):
    try:
        ancestors = bg_atlas.get_structure_ancestors(area_acronym)
        if "TH" in ancestors:
            return "TH"
        elif "RSP" in ancestors:
            return "RSP"
        elif "TEa" in ancestors:
            return "TEa"
        elif "AUD" in ancestors:
            return "AUD"
        elif "VISp" in ancestors:
            return area_acronym
        elif "VIS" in ancestors:
            return ancestors[-1]
        else:
            return ancestors[1] if len(ancestors) > 1 else 'Unknown'
    except KeyError:
        return 'Unknown'

ara_starters["area_acronym_ancestor_rank1"] = ara_starters["area_acronym"].apply(get_ancestor_rank1)

# Filtering data
starters = ara_starters[ara_starters['starter'] == True]
non_starters = ara_starters[ara_starters['starter'] == False]

# Creating the confusion matrix
confusion_matrix = pd.DataFrame(0, index=non_starters['area_acronym_ancestor_rank1'].unique(), columns=starters['area_acronym_ancestor_rank1'].unique())

for _, starter_row in starters.iterrows():
    main_barcode = starter_row['main_barcode']
    starter_area = starter_row['area_acronym_ancestor_rank1']
    
    linked_non_starters = non_starters[non_starters['main_barcode'] == main_barcode]
    for _, non_starter_row in linked_non_starters.iterrows():
        non_starter_area = non_starter_row['area_acronym_ancestor_rank1']
        confusion_matrix.loc[non_starter_area, starter_area] += 1

# Sort the confusion matrix by index and columns alphabetically
confusion_matrix = confusion_matrix.sort_index(axis=0).sort_index(axis=1)

# Remove rows and columns with all zeros
filtered_confusion_matrix = confusion_matrix.loc[(confusion_matrix != 0).any(axis=1)]
filtered_confusion_matrix = filtered_confusion_matrix.loc[:, (filtered_confusion_matrix != 0).any(axis=0)]

# Remove specific unwanted rows from the confusion matrix
filtered_confusion_matrix = filtered_confusion_matrix.drop("Unknown", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("fiber tracts", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("grey", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("ECT", axis=0, errors='ignore')
filtered_confusion_matrix = filtered_confusion_matrix.drop("Unknown", axis=1)
filtered_confusion_matrix = filtered_confusion_matrix.drop("fiber tracts", axis=1)



# Filter to include only areas of interest
areas_of_interest = [
    'VISp1',
    'VISp2/3',
    'VISp4',
    'VISp5',
    'VISp6a',
    'VISp6b',
    'VISal',
    'VISpm',
    'VISl',
    'VISli',
    'AUD',
    'RSP',
    'TEa',
    'TH',
]
filtered_confusion_matrix = filtered_confusion_matrix.reindex(index=areas_of_interest, columns=areas_of_interest, fill_value=0)

filtered_confusion_matrix = filtered_confusion_matrix.loc[areas_of_interest, areas_of_interest]

# Normalize the confusion matrix by sum of the columns
normalized_confusion_matrix = filtered_confusion_matrix.div(filtered_confusion_matrix.sum(axis=0), axis=1)

# Plotting the fully normalized confusion matrix using seaborn heatmap
plt.figure(figsize=(20, 18), dpi=80)

# Define a mask to hide zero values
mask = normalized_confusion_matrix == 0

# Plot the heatmap with zero values masked
ax = sns.heatmap(normalized_confusion_matrix, cmap="magma_r", cbar=False, yticklabels=True, square=True, linewidths=1, linecolor='white', mask=mask, annot=False) #, vmax=2000
ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')

# Annotate with appropriate color based on background
for (i, j), val in np.ndenumerate(normalized_confusion_matrix):
    if not mask.iloc[i, j]:
        text_color = 'white' if val > 0.10 else 'black'
        #if val is nan, ignore it
        if val == val:
            ax.text(j + 0.5, i + 0.5, f'{val:.2f}', ha='center', va='center', color=text_color, fontsize=15)

# Highlight the diagonal with a black outline
for i in range(normalized_confusion_matrix.shape[0]):
    ax.add_patch(plt.Rectangle((i, i), 1, 1, fill=False, edgecolor='black', lw=3))
ax.add_patch(plt.Rectangle((0, 0), 14, 14, fill=False, edgecolor='dimgrey', lw=3))

# Highlight the last 6x6 region with a black outline
ax.add_patch(plt.Rectangle((0, 0), 6, 6, fill=False, edgecolor='red', lw=5))
# Adjust the limits of the x and y axes to avoid cutting off the outer edges
ax.set_xlim(-0.5, normalized_confusion_matrix.shape[1] - 0.5 + 1)
ax.set_ylim(normalized_confusion_matrix.shape[0] - 0.5 + 1, -0.5)

ax.add_patch(plt.Rectangle((6, 6), 4, 4, fill=False, edgecolor='red', lw=5))

for label in ax.get_yticklabels():
    x, y = label.get_position()
    label.set_position((x + 0.025, y))
ax.tick_params(axis='both', width=0)

for label in ax.get_xticklabels():
    x, y = label.get_position()
    label.set_position((x, y  - 0.025))



plt.title('Column-wise Normalized Confusion Matrix', fontsize=30, pad=20)
plt.xlabel('Starter cell location', fontsize=20, labelpad=20)
plt.ylabel('Presynaptic cell location', fontsize=20, labelpad=20)
#offset the x-axis labels to account for the adjusted x-axis limits
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)

plt.show()