# Protein quantification

In [None]:
# Import packages 
import sys
import numpy as np
import pandas as pd
import seaborn as sns
from natsort import natsorted
from scipy.io import loadmat, savemat
from skimage.filters import threshold_otsu
from skimage.color import label2rgb
from tqdm.notebook import tqdm

import anndata as ad
import scanpy as sc
# Customized packages 
from starmap.sequencing import *

## Path

In [None]:
# IO path 
base_path = 'Z:/Data/Analyzed/2022-09-05-Hu-Tissue/'

in_path = os.path.join(base_path, 'input')
if not os.path.exists(in_path):
    os.mkdir(in_path)
    
out_path = os.path.join(base_path, 'output')
if not os.path.exists(out_path):
    os.mkdir(out_path)
    
seg_path = os.path.join(in_path, "rep3/protein/")
if not os.path.exists(seg_path):
    os.mkdir(seg_path)
    
grid_path = os.path.join(seg_path, 'grid')
if not os.path.exists(grid_path):
    os.mkdir(grid_path)

In [None]:
current_sample = 'STAR'

In [None]:
# load h5ad object with cell locations 
cdata = sc.read_h5ad(os.path.join(out_path, '2022-11-20-Brain-combined-3mad-ct-final.h5ad'))
cdata

## Segmentation

In [None]:
# get centroids
df = cdata.obs.loc[cdata.obs['protocol-replicate'] == f'{current_sample}map-rep3' , :].copy()
df['column_scaled'] = df['column'] * 0.25
df['row_scaled'] = df['row'] * 0.25
centroids = df.loc[:, ['column_scaled', 'row_scaled']].to_numpy()
centroids

In [None]:
# load overlay image 
overlay = load_nissl_image(os.path.join(seg_path, 'segmentation'), fname=f"{current_sample}_overlay.tif")
# overlay = load_nissl_image(os.path.join(seg_path,), fname=f"test.tif")

In [None]:
figsize=(overlay.shape[1] / 1000 * 5, overlay.shape[0] / 1000 * 5)

In [None]:
# Plot dots on segmentation mask
plt.figure(figsize=figsize)
plt.imshow(overlay, cmap='gray')
plt.plot(centroids[:, 0], centroids[:, 1], '.', color='red', markersize=3)
plt.axis('off')
# plt.show()
points_seg_path = os.path.join(seg_path, f"{current_sample}_centroid.png")
print(f"Saving points_seg.png")
plt.savefig(points_seg_path)
# plt.clf()
# plt.close()

In [None]:
%%time
# Segmentation

print("Gaussian & Thresholding")
blurred_overlay_seg = gaussian(overlay.astype(np.float), 2)
threhold = 32

# manual threshold 
blurred_overlay_seg = blurred_overlay_seg > threhold

# dialation  
blurred_overlay_seg = binary_dilation(blurred_overlay_seg, selem=disk(3))

print("Assigning markers")
centroids = centroids.astype(int)
markers = np.zeros(blurred_overlay_seg.shape, dtype=np.uint8)
for i in range(centroids.shape[0]):
    y, x = centroids[i, :]
    if x < blurred_overlay_seg.shape[0] and y < blurred_overlay_seg.shape[1]:
        markers[x-1, y-1] = 1
markers = ndi.label(markers)[0]

print("Watershed")
labels = watershed(blurred_overlay_seg, markers, mask=blurred_overlay_seg)
labels_line = watershed(blurred_overlay_seg, markers, mask=blurred_overlay_seg, watershed_line=True)

print(f"Labeled {len(np.unique(labels)) - 1} cells")
plt.figure(figsize=(10,20))
plt.imshow(label2rgb(labels_line, bg_label=0))

print(f"Saving files to {seg_path}")
tifffile.imsave(os.path.join(seg_path, f"{current_sample}_labeled_cells_line.tif"), labels_line.astype(np.uint16))
tifffile.imsave(os.path.join(seg_path, f"{current_sample}_labeled_cells.tif"), labels.astype(np.uint16))

In [None]:
centroids.shape

## Quantification

In [None]:
current_sample = 'STAR'
current_df = cdata.obs.loc[cdata.obs['protocol-replicate'] == f'{current_sample}map-rep3' , :].copy()
centroids = current_df.loc[:, ['column', 'row']].to_numpy()
centroids.max(axis=0)

In [None]:
# Load pre-defined cell segmentation 
labels = load_label_image(os.path.join(seg_path, "segmentation"), fname=f'{current_sample}_labeled_cells_org.tif')
labels.shape

In [None]:
# load overlay image 
gfap = load_nissl_image(os.path.join(seg_path, 'raw'), fname=f"{current_sample}_Gfap.tif")
neun = load_nissl_image(os.path.join(seg_path, 'raw'), fname=f"{current_sample}_NeuN.tif")

gfap_mask = load_nissl_image(os.path.join(seg_path, 'mask_test'), fname=f"{current_sample}_Gfap.tif")
neun_mask = load_nissl_image(os.path.join(seg_path, 'mask_test'), fname=f"{current_sample}_NeuN.tif")

neun.shape

In [None]:
gfap_mask = gfap_mask > 0
neun_mask = neun_mask > 0 

In [None]:
# get reads assignment 
reads_assignment = labels[centroids[:, 1], centroids[:, 0]]
reads_assignment

In [None]:
%%time
        
areas = [0, ]
neun_intensity = [0, ]
gfap_intensity = [0, ]

neun_pixel = [0, ]
gfap_pixel = [0, ]

region_label = [0, ]

# Iterate through cells
print('Iterate cells...')
for i, region in enumerate(tqdm(regionprops(labels))):
      # print(region.label)
    region_label.append(region.label)
    areas.append(region.area)
    
    # neun intensity
    curr_intensity = np.sum(neun[region.coords[:, 0], region.coords[:, 1]])
    curr_pixel = np.sum(neun_mask[region.coords[:, 0], region.coords[:, 1]])
    
    neun_intensity.append(round(curr_intensity, 2))
    neun_pixel.append(round(curr_pixel, 2))
        
    # gfap intensity
    curr_intensity = np.sum(gfap[region.coords[:, 0], region.coords[:, 1]])
    curr_pixel = np.sum(gfap_mask[region.coords[:, 0], region.coords[:, 1]])
    
    gfap_intensity.append(round(curr_intensity, 2))
    gfap_pixel.append(round(curr_pixel, 2))

In [None]:
neun_intensity = np.array(neun_intensity)
neun_pixel = np.array(neun_pixel)
gfap_intensity = np.array(gfap_intensity)
gfap_pixel = np.array(gfap_pixel)
region_label = np.array(region_label)
areas = np.array(areas)

In [None]:
# find duplicate assignment 
from collections import Counter
dup_assignment = [item for item, count in Counter(reads_assignment).items() if count > 1]

for i in dup_assignment:
    reads_assignment[reads_assignment == i] = 0

In [None]:
indexes = np.array([np.where(region_label == i)[0][0] for i in reads_assignment])

In [None]:
current_df['seg_label'] = reads_assignment
current_df['area'] = areas[indexes]
current_df['Gfap'] = gfap_intensity[indexes]
current_df['NeuN'] = neun_intensity[indexes]
current_df['Gfap_pixel'] = gfap_pixel[indexes]
current_df['NeuN_pixel'] = neun_pixel[indexes]

In [None]:
cdata.obs.loc[cdata.obs['protocol-replicate'] == f'{current_sample}map-rep3' , 'seg_label'] = current_df['seg_label']
cdata.obs.loc[cdata.obs['protocol-replicate'] == f'{current_sample}map-rep3' , 'area'] = current_df['area']
cdata.obs.loc[cdata.obs['protocol-replicate'] == f'{current_sample}map-rep3' , 'Gfap_intensity'] = current_df['Gfap']
cdata.obs.loc[cdata.obs['protocol-replicate'] == f'{current_sample}map-rep3' , 'NeuN_intensity'] = current_df['NeuN']
cdata.obs.loc[cdata.obs['protocol-replicate'] == f'{current_sample}map-rep3' , 'Gfap_pixel'] = current_df['Gfap_pixel']
cdata.obs.loc[cdata.obs['protocol-replicate'] == f'{current_sample}map-rep3' , 'NeuN_pixel'] = current_df['NeuN_pixel']

In [None]:
from datetime import datetime
date = datetime.today().strftime('%Y-%m-%d')
cdata.write_h5ad(f"{out_path}/{date}-Brain-combined-3mad-ct-final.h5ad")

## Grid segmentation

In [None]:
from skimage.util import regular_seeds, img_as_ubyte
from skimage.segmentation import watershed

# Test
# img = np.ones((6, 6), dtype=int)
# img = img_as_ubyte(img * 255)
# seeds = regular_seeds((6, 6), 4)
# w1 = watershed(img, seeds, compactness=0.01)
# plt.imshow(w1, cmap='Greys')

0.142 micron per pixel

In [None]:
grid_interval = 30 # micron

read_dim = (labels.shape[0] * 0.142, labels.shape[1] * 0.142)
read_area = read_dim[0] * read_dim[1]
grid_area = grid_interval**2
n_seeds = int(read_area / grid_area)
n_seeds

In [None]:
scaled_dim = (round(labels.shape[0] * 0.25), round(labels.shape[1] * 0.25))
img = np.ones(scaled_dim, dtype=int)
img = img_as_ubyte(img * 255)
seeds = regular_seeds(scaled_dim, n_seeds)
w1 = watershed(img, seeds, compactness=0.01)

In [None]:
plt.imshow(w1, cmap='Greys')

In [None]:
tifffile.imsave(os.path.join(grid_path, f"{current_sample}_grid_{grid_interval}.tif"), w1.astype(np.uint16))

## Grid quantification

In [None]:
current_sample = 'RIBO'

grid_seg = load_label_image(grid_path, fname=f"{current_sample}_grid_{grid_interval}_org.tif")

In [None]:
# load overlay image 
gfap = load_nissl_image(os.path.join(seg_path, 'raw'), fname=f"{current_sample}_Gfap.tif")
neun = load_nissl_image(os.path.join(seg_path, 'raw'), fname=f"{current_sample}_NeuN.tif")

gfap_mask = load_nissl_image(os.path.join(seg_path, 'mask_test'), fname=f"{current_sample}_Gfap.tif")
neun_mask = load_nissl_image(os.path.join(seg_path, 'mask_test'), fname=f"{current_sample}_NeuN.tif")

neun.shape

In [None]:
gfap_mask = gfap_mask > 0
neun_mask = neun_mask > 0 

In [None]:
# load reads 

transfer_dict = {'44440': 'Sept1',
                '44443': 'Sept11',
                '44446': 'Sept4',
                '44447': 'Sept7',
                '44448': 'Sept8',
                '44450': 'Sept9',}


temp1 = pd.read_csv(os.path.join(in_path, 'rep3/batch2', f'{current_sample}_remain_reads.csv'), index_col=0)
temp1 = temp1.loc[:, ['gene_name', 'column', 'row', 'cell_barcode']]
temp2 = pd.read_csv(os.path.join(in_path, 'rep3/batch2', f'{current_sample}_unassigned_reads_adjust_overlap.csv'), index_col=0)
temp2 = temp2.loc[:, ['gene_name', 'column', 'row', 'cell_barcode']]

total_reads = pd.concat([temp1, temp2])
total_reads = total_reads.reset_index(drop=True)
total_reads['gene_name'] = total_reads['gene_name'].replace(transfer_dict)
total_reads

In [None]:
genes = [i for i in sorted(total_reads['gene_name'].unique())]
# genes

In [None]:
points = total_reads.loc[:, ['row', 'column']].values.astype(int)
reads = total_reads.loc[:, ['gene_name']].values.flatten()

In [None]:
# get reads assignment 
reads_assignment = grid_seg[points[:, 0], points[:, 1]]
reads_assignment

In [None]:
%%time
        
neun_intensity = []
gfap_intensity = []

neun_pixel = []
gfap_pixel = []

total_blocks = grid_seg.max()
cell_by_barcode = np.zeros((total_blocks, len(genes)))
gene_seq_to_index = {}  # map from sequence to index into matrix

for i, k in enumerate(genes):
    gene_seq_to_index[k] = i
    

# Iterate through cells
print('Iterate cells...')
for i, region in enumerate(tqdm(regionprops(grid_seg))):
      # print(region.label)
    
    # neun intensity
    curr_intensity = np.sum(neun[region.coords[:, 0], region.coords[:, 1]])
    curr_pixel = np.sum(neun_mask[region.coords[:, 0], region.coords[:, 1]])
    
    neun_intensity.append(round(curr_intensity, 2))
    neun_pixel.append(round(curr_pixel, 2))
        
    # gfap intensity
    curr_intensity = np.sum(gfap[region.coords[:, 0], region.coords[:, 1]])
    curr_pixel = np.sum(gfap_mask[region.coords[:, 0], region.coords[:, 1]])
    
    gfap_intensity.append(round(curr_intensity, 2))
    gfap_pixel.append(round(curr_pixel, 2))
    
    assigned_reads = reads[np.argwhere(reads_assignment == region.label).flatten()]
    for j in assigned_reads:
        if j in gene_seq_to_index:
            cell_by_barcode[i, gene_seq_to_index[j]] += 1

In [None]:
cell_by_barcode.shape

In [None]:
quantification_df = pd.DataFrame(cell_by_barcode, columns=genes)
quantification_df['Gfap_intensity'] = gfap_intensity
quantification_df['Gfap_pixel'] = gfap_pixel
quantification_df['NeuN_intensity'] = neun_intensity
quantification_df['NeuN_pixel'] = neun_pixel
quantification_df

In [None]:
from datetime import datetime
date = datetime.today().strftime('%Y-%m-%d')
quantification_df.to_csv(os.path.join(out_path, f"{date}-{current_sample}map-grid-quantification.csv"))