In [None]:
# NB need to use environment with python3.9 or above for ccf_streamlines to run
import nrrd
import numpy as np
import matplotlib.pyplot as plt
import ccf_streamlines.projection as ccfproj
import pathlib
import pandas as pd
import numpy as np
import os
import matplotlib.colors as mcolors
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from matplotlib.colors import LogNorm
import copy

In [None]:
lcm_directory = pathlib.Path(
    "/camp/lab/znamenskiyp/home/shared/projects/turnerb_A1_MAPseq/FIAA45.6a/LCM"
)
# load datasets
barcodes_across_sample = pd.read_pickle(
    "/camp/lab/znamenskiyp/home/shared/projects/turnerb_A1_MAPseq/FIAA45.6a/Sequencing/barcode_matrix_soma_thresholded.pkl"
)
ROI_3D = np.load(lcm_directory / "ROI_3D_10.npy")

In [None]:
# this is the path to where the ccf streamlines downloads is (so don't change when looking a different mice)
convert_to_flat_path = pathlib.Path(
    "/camp/lab/znamenskiyp/home/shared/projects/turnerb_MAPseq/A1_MAPseq/FIAA32.6a/LCM_registration"
)

In [None]:
# ignore barcodes that are only found in one sample
barcodes_across_sample = barcodes_across_sample[
    barcodes_across_sample.astype(bool).sum(axis=1) > 1
]

In [None]:
cortical_samples = [
    4,
    5,
    6,
    7,
    8,
    9,
    10,
    11,
    13,
    14,
    15,
    16,
    17,
    18,
    19,
    20,
    25,
    26,
    27,
    28,
    29,
    30,
    31,
    32,
    33,
    37,
    38,
    39,
    40,
    41,
    42,
    43,
    44,
    45,
    46,
    51,
    52,
    53,
    54,
    55,
    56,
    57,
    58,
    59,
    60,
    61,
    62,
    63,
    66,
    70,
    71,
    72,
    73,
    74,
    75,
    76,
    77,
    78,
    79,
    80,
    81,
    82,
    83,
    84,
    85,
    86,
    87,
    88,
    93,
    94,
    95,
    96,
    97,
    98,
    99,
    100,
    101,
    102,
    103,
    104,
    105,
    106,
    107,
    108,
    109,
    110,
    111,
    112,
    113,
    117,
    118,
    119,
    120,
    121,
    122,
    123,
    124,
    125,
    126,
    127,
    128,
    132,
    133,
    134,
    135,
    136,
    137,
    138,
    139,
    140,
    141,
    142,
    143,
    147,
    148,
    149,
    150,
    151,
    153,
    154,
    155,
    156,
    157,
    158,
    159,
    161,
    162,
    163,
    164,
    165,
    167,
    168,
    169,
    170,
    172,
    173,
    174,
    175,
    177,
    178,
    179,
    180,
    181,
    182,
    183,
    184,
    185,
    186,
]

In [None]:
cortical_samples = [
    3,
    4,
    5,
    8,
    9,
    10,
    11,
    12,
    14,
    15,
    16,
    17,
    18,
    19,
    20,
    21,
    22,
    23,
    27,
    28,
    29,
    30,
    31,
    32,
    33,
    34,
    39,
    40,
    41,
    42,
    43,
    44,
    45,
    46,
    47,
    48,
    49,
    50,
    54,
    57,
    58,
    59,
    60,
    61,
    62,
    63,
    64,
    65,
    66,
    67,
    72,
    73,
    74,
    76,
    77,
    78,
    79,
    80,
    81,
    82,
    83,
    84,
    85,
    86,
    87,
    88,
    89,
    90,
    91,
    92,
    93,
    94,
    95,
    96,
    97,
    98,
    99,
    100,
    101,
    102,
    103,
    104,
    105,
    109,
    110,
    111,
    112,
    113,
    114,
    115,
    116,
    117,
    118,
    119,
    120,
    126,
    127,
    128,
    129,
    131,
    132,
    133,
    134,
    135,
    136,
    139,
    144,
    145,
    146,
    147,
    148,
    151,
    152,
    153,
    154,
    155,
    156,
    157,
    159,
    160,
    161,
    162,
    163,
    164,
    166,
    167,
    168,
    169,
    171,
    172,
    173,
    174,
    175,
    176,
]

In [None]:
# remove any samples from 3D array that aren't cortical for flatmap visualisation
mask = np.isin(ROI_3D, cortical_samples)
ROI_3D[~mask] = 0

In [None]:
bf_boundary_finder = ccfproj.BoundaryFinder(
    projected_atlas_file=convert_to_flat_path / "flatmap_butterfly.nrrd",
    labels_file=convert_to_flat_path / "labelDescription_ITKSNAPColor.txt",
)

# We get the left hemisphere region boundaries with the default arguments
bf_left_boundaries = bf_boundary_finder.region_boundaries()

# And we can get the right hemisphere boundaries that match up with
# our projection if we specify the same configuration
bf_right_boundaries = bf_boundary_finder.region_boundaries(
    # we want the right hemisphere boundaries, but located in the right place
    # to plot both hemispheres at the same time
    hemisphere="right_for_both",
    # we also want the hemispheres to be adjacent
    view_space_for_other_hemisphere="flatmap_butterfly",
)

In [None]:
proj_top = ccfproj.Isocortex2dProjector(
    # Specify our view lookup file
    convert_to_flat_path / "flatmap_butterfly.h5",
    # Specify our streamline file
    convert_to_flat_path / "surface_paths_10_v3.h5",
    # Specify that we want to project both hemispheres
    hemisphere="both",
    # The top view contains space for the right hemisphere, but is empty.
    # Therefore, we tell the projector to put both hemispheres side-by-side
    view_space_for_other_hemisphere="flatmap_butterfly",
)

In [None]:
# remove tubes in ROI flatmap that aren't in normalised barcode path
tubes = np.arange(
    np.min(barcodes_across_sample.columns), np.max(barcodes_across_sample.columns), 1
)
tubes_not_in = [i for i in tubes if i not in barcodes_across_sample.columns.to_list()]
for x in tubes_not_in:
    ROI_3D[ROI_3D == x] = 0

In [None]:
ROI_projection_max = proj_top.project_volume(ROI_3D)

In [None]:
barcode_matrix = np.zeros(
    (
        len(barcodes_across_sample),
        max(barcodes_across_sample.columns.to_list()) + 1,
    )
)
for column in barcodes_across_sample:
    barcode_matrix[:, column] = barcodes_across_sample[column].to_numpy()

In [None]:
total_counts = np.sum(barcode_matrix, axis=0)
# now set zero values to -1
total_counts[0] = -1

In [None]:
new_mat = np.log10(1 + total_counts[ROI_projection_max.astype(int)]).T

cmap = plt.cm.get_cmap("magma").copy()

cmap.set_bad(color=[0.3, 0.3, 0.3, 1])  # Set NaN values as grey

plt.imshow(new_mat, cmap=cmap)
plt.axis("off")
plt.colorbar(label="log 10 barcode counts", fraction=0.03, pad=0.04)
for k, boundary_coords in bf_left_boundaries.items():
    plt.plot(*boundary_coords.T, c="white", lw=0.5)
for k, boundary_coords in bf_right_boundaries.items():
    plt.plot(*boundary_coords.T, c="white", lw=0.5)
plt.title(f"Distribution of total MAPseq counts FIAA45.6a")

In [None]:
proj_strength = np.zeros(total_counts.shape)
for i in np.unique(ROI_3D):
    if int(i) > 0:
        vol = len(ROI_3D[ROI_3D==i])*25
        proj_strength[int(i)] = total_counts[int(i)]/vol
    

In [None]:
proj_strength[int(i)]

In [None]:
proj_strength[0] = -1

In [None]:
new_mat = np.log10(1 + proj_strength[ROI_projection_max.astype(int)]).T

cmap = plt.cm.get_cmap("magma").copy()

cmap.set_bad(color=[0.3, 0.3, 0.3, 1])  # Set NaN values as grey

plt.imshow(new_mat, cmap=cmap)
plt.axis("off")
plt.colorbar(label="log 10 barcode counts/$um^{3}$", fraction=0.03, pad=0.04)
for k, boundary_coords in bf_left_boundaries.items():
    plt.plot(*boundary_coords.T, c="white", lw=0.5)
for k, boundary_coords in bf_right_boundaries.items():
    plt.plot(*boundary_coords.T, c="white", lw=0.5)
plt.title(f"Distribution of MAPseq counts/volume FIAA45.6a")

In [None]:
# now look at where soma locations are
# take sum of 'neurons' with max barcode count in each sample to see distribution of where the soma is
source_thresholded_soma_only_actual_val = pd.DataFrame(
    columns=barcodes_across_sample.columns
)
for i, r in barcodes_across_sample.iterrows():
    soma_sample = r.idxmax()
    row_data = [0] * len(barcodes_across_sample.columns)
    barcode_row = pd.DataFrame([row_data], columns=barcodes_across_sample.columns)
    barcode_row[r.idxmax()] = 1
    source_thresholded_soma_only_actual_val = pd.concat(
        [source_thresholded_soma_only_actual_val, barcode_row]
    )

In [None]:
soma_matrix = np.zeros(
    (
        len(barcodes_across_sample),
        max(barcodes_across_sample.columns.to_list()) + 1,
    )
)
for column in source_thresholded_soma_only_actual_val:
    soma_matrix[:, column] = source_thresholded_soma_only_actual_val[column].to_numpy()
soma_counts = np.sum(soma_matrix, axis=0)


# now set zero values to -1
soma_counts[0] = -1

In [None]:
# new_mat = log_soma[ROI_projection_max.astype(int)].T
new_mat = np.log10(1 + soma_counts[ROI_projection_max.astype(int)]).T
cmap = plt.cm.get_cmap("magma").copy()

cmap.set_bad(color=[0.3, 0.3, 0.3, 1])  # Set NaN values as grey

plt.imshow(new_mat, cmap=cmap)
plt.axis("off")
plt.colorbar(label="log 10 neuron counts", fraction=0.03, pad=0.04)
for k, boundary_coords in bf_left_boundaries.items():
    plt.plot(*boundary_coords.T, c="white", lw=0.5)
for k, boundary_coords in bf_right_boundaries.items():
    plt.plot(*boundary_coords.T, c="white", lw=0.5)
plt.title(f"Location of barcode cell bodies FIAA45.6a")

In [None]:
source_thresholded_soma_only_actual_val

In [None]:
barcodes_across_sample

In [None]:
soma_counts.max()