# Reads pattern

In [1]:
import sys
import numpy as np
import pandas as pd
import seaborn as sns
from starmap.sequencing import *
from natsort import natsorted
from scipy.io import loadmat, savemat
from skimage.filters import threshold_otsu

In [2]:
from functools import wraps
from time import time

# Timer
def timer(func):
    @wraps(func)
    def _time_it(*args, **kwargs):
        start = int(round(time() * 1000))
        try:
            return func(*args, **kwargs)
        finally:
            end_ = int(round(time() * 1000)) - start
            end_ = round(end_ / 1000, 4)
            print(f"Total execution time: {end_ if end_ > 0 else 0} s")
    return _time_it


# Trim reads 
@timer
def trim_reads(sample_dir, save_as=True):
    
    print(f"Trimming reads...")
    current_coordinates = np.loadtxt(os.path.join(sample_dir, 'trim.txt'), dtype=int, delimiter=',')
    current_dots = loadmat(os.path.join(sample_dir, 'merged_goodPoints_max3d.mat'))
    # Get reads
    bases = [str(i[0][0]) for i in current_dots["merged_reads"]]
    bases = np.array(bases)
    # Get reads location
    temp = current_dots["merged_points"]
    # Trim reads 
    to_remove = (temp[:, 0] < current_coordinates[0, 0]) | (temp[:, 0] > current_coordinates[1, 0]) | (temp[:, 1] < current_coordinates[0, 1]) | (temp[:, 1] > current_coordinates[1, 1])
    temp = temp[~to_remove, :]
    temp[:, 0] = temp[:, 0] - current_coordinates[0, 0]
    temp[:, 1] = temp[:, 1] - current_coordinates[0, 1]
    bases = bases[~to_remove]

    if save_as:
        output_dict = {'trimmed_reads': bases, 'trimmed_points': temp}
        savemat(os.path.join(sample_dir, 'trimmed_goodPoints_max3d.mat'), output_dict)

    # Convert to 0 indexed and switch axis for python
    temp = temp[:, :2]
    points = np.zeros(temp.shape)
    points[:, 0] = np.round(temp[:, 1]-1)
    points[:, 1] = np.round(temp[:, 0]-1)
    print(f"Number of reads: {len(bases)}")
    
    return points, bases


# Load genes.csv
def load_genes(base_path):
    genes2seqs = {}
    seqs2genes = {}
    with open(os.path.join(base_path, "genes.csv"), encoding='utf-8-sig') as f:
        for l in f:
            fields = l.rstrip().split(",")
            # print("".join([str(s+1) for s in encode_SOLID(fields[1][::-1])]))
            curr_seg = "".join([str(s+1) for s in encode_SOLID(fields[1][::-1])])
            curr_seg = curr_seg[5:] + curr_seg[:4]
            # print(curr_seg)
            genes2seqs[fields[0]] = curr_seg
            seqs2genes[genes2seqs[fields[0]]] = fields[0]
            
    return genes2seqs, seqs2genes

In [20]:
# Parameter
points_id = 'max3d'

# IO path 
base_path = './'
out_path = os.path.join(base_path, 'output', 'reads_pattern')
if not os.path.exists(out_path):
    os.mkdir(out_path)
    
sample_dirs = [d for d in os.listdir(base_path) if d.startswith("AD")]
sample_dirs

['AD_mouse9494', 'AD_mouse9723', 'AD_mouse9735', 'AD_mouse9498']

In [21]:
genes = ['NRGN', 'GPM6A', 'CPLX1', 'CPLX2']


for current_sample in sample_dirs:
    
    print(f"Current sample: {current_sample}")

    # Load reads 
    points, bases = trim_reads(current_sample)

    # Load genes
    genes2seqs, seqs2genes = load_genes(base_path)

    # Load dapi label
    seg = load_label_image(os.path.join(current_sample, 'segmentation'), fname='labeled_cells.tif')

    # Load 2D Nissl image 
    bk_img = load_nissl_image(os.path.join(current_sample, 'trimmed_images'), fname="dots_pi_max_overlay.tif")

    current_out_path = os.path.join(out_path, current_sample)
    if not os.path.exists(current_out_path):
        os.mkdir(current_out_path)
        
    points = points.astype(int)
    reads_assignment = seg[points[:, 0], points[:, 1]]
    
    # Get assigned reads 
    assigned_index = np.argwhere(reads_assignment != 0).flatten()
    assigned_bases = bases[assigned_index]
    assigned_points = points[assigned_index, :]
    
    for i, gene in enumerate(genes):
        print(gene)
#         curr_index = np.argwhere(assigned_bases == genes2seqs[gene]).flatten()
#         curr_points = assigned_points[curr_index, :]
#         n_reads = curr_points.shape[0]
        
        curr_index = np.argwhere(bases == genes2seqs[gene]).flatten()
        curr_points = points[curr_index, :]
        n_reads = curr_points.shape[0]

        # Plot
        plt.figure(figsize=(np.floor(bk_img.shape[1] / 1000 * 5), np.floor(bk_img.shape[0] / 1000 * 5)))
        plt.imshow(bk_img, cmap='gray')
        plt.plot(curr_points[:, 1], curr_points[:, 0], '.', color='red', markersize=5)
        plt.axis('off')
        expr_figure_path = os.path.join(current_out_path, f"{i+1}.{gene}_{n_reads}.png")
        plt.savefig(expr_figure_path)
        plt.clf()
        plt.close()

Current sample: AD_mouse9494
Trimming reads...
Number of reads: 5758057
Total execution time: 51.054 s
NRGN
GPM6A
CPLX1
CPLX2
Current sample: AD_mouse9723
Trimming reads...
Number of reads: 2995745
Total execution time: 21.408 s
NRGN
GPM6A
CPLX1
CPLX2
Current sample: AD_mouse9735
Trimming reads...
Number of reads: 4559723
Total execution time: 33.213 s
NRGN
GPM6A
CPLX1
CPLX2
Current sample: AD_mouse9498
Trimming reads...
Number of reads: 3658964
Total execution time: 26.632 s
NRGN
GPM6A
CPLX1
CPLX2


In [16]:
# Plot
plt.figure(figsize=(np.floor(bk_img.shape[1] / 1000 * 5), np.floor(bk_img.shape[0] / 1000 * 5)))
plt.imshow(bk_img, cmap='gray')
plt.plot(assigned_points[:, 1], assigned_points[:, 0], '.', color='red', markersize=1)
plt.axis('off')
expr_figure_path = os.path.join(current_out_path, "assigned_points.png")
plt.savefig(expr_figure_path)
plt.clf()
plt.close()