# Reads assignment example

In [99]:
import sys
import numpy as np
import pandas as pd
import seaborn as sns
from starmap.sequencing import *
from natsort import natsorted
from scipy.io import loadmat, savemat
from skimage.filters import threshold_otsu
from skimage.color import label2rgb
from tqdm import tqdm

### Functions

In [100]:
from functools import wraps
from time import time

# Timer
def timer(func):
    @wraps(func)
    def _time_it(*args, **kwargs):
        start = int(round(time() * 1000))
        try:
            return func(*args, **kwargs)
        finally:
            end_ = int(round(time() * 1000)) - start
            end_ = round(end_ / 1000, 4)
            print(f"Total execution time: {end_ if end_ > 0 else 0} s")
    return _time_it


@timer
# Load reads and their positions from mat file
def load_reads(fpath, reads_file):
    S = loadmat(os.path.join(fpath, reads_file))
    bases = [str(i[0][0]) for i in S["merged_reads"]]
    points = S["merged_points"]
    temp = np.zeros(points.shape)
    temp[:, 0] = np.round(points[:, 1]-1)
    temp[:, 1] = np.round(points[:, 0]-1)
    temp[:, 2] = np.round(points[:, 2]-1)
    
    print(f"Number of reads: {len(bases)}")
    
    return bases, temp


# Load gene table from genes.csv
def load_genes(fpath):
    genes2seq = {}
    seq2genes = {}
    with open(os.path.join(fpath, "genes.csv"), encoding='utf-8-sig') as f:
        for l in f:
            fields = l.rstrip().split(",")
            genes2seq[fields[0]] = "".join([str(s+1) for s in encode_SOLID(fields[1][::-1])])
            seq2genes[genes2seq[fields[0]]] = fields[0]
    return genes2seq, seq2genes

## Input

In [101]:
# IO path 
base_path = './'
out_path = os.path.join(base_path, 'output')
if not os.path.exists(out_path):
    os.mkdir(out_path)
    
sample_dict = {'20h_labeling': '20h labeling',
               '1h_labeling_6h_wash': '1h labeling, 6h wash',
               '1h_labeling_4h_wash': '1h labeling, 4h wash',
               '1h_labeling_2h_wash': '1h labeling, 2h wash',
               '1h_labeling_1h_wash': '1h labeling, 1h wash',
               '1h_labeling': '1h labeling'}

sample_dirs = list(sample_dict.keys())

## Run pipeline for individual sample

In [26]:
# Iterate through each sample dir
current_dir = sample_dirs[0]
# print(test_dir)


print(f"Current sample: {current_dir}")

# Load genes
genes2seqs, seqs2genes = load_genes(base_path)

# Load reads 
bases, points = load_reads(current_dir, "merged_goodPoints_max3d_new.mat")
bases = np.array(bases)
points = np.array(points)

Current sample: 1h_labeling
Number of reads: 5219398
Total execution time: 35.842 s


In [27]:
structure_dict = {'whole_cell': 'cell.tif'}

total_cells = None
expr_out_path = os.path.join(out_path, current_dir)
if not os.path.exists(expr_out_path):
    os.mkdir(expr_out_path)
        
expr_dict = {}
for current_structure in structure_dict.keys():
    print(f"====Processing: {current_structure}====")

    # Load segmentation
    current_seg = load_label_image(current_dir, fname=structure_dict[current_structure])
    
    # Reads assignment to current_structure
    current_out_path = os.path.join(expr_out_path, current_structure)
    if not os.path.exists(current_out_path):
        os.mkdir(current_out_path)

    points = points.astype(int)
    reads_assignment = current_seg[points[:, 2], points[:, 0], points[:, 1]]

    reads_info = pd.DataFrame({'x':points[:, 0], 'y':points[:, 1], 'z':points[:, 2], 'cell_label':reads_assignment})
    reads_info = reads_info.astype(np.int32)
    reads_info['orig_index'] = reads_info['cell_label'] - 1
    reads_info['gene'] = bases
    
    reads_info.to_csv(os.path.join(current_out_path, 'reads-info.csv'))

====Processing: whole_cell====


## Generate reads pattern plot 

In [102]:
# load gene cluters
gene_cluster_df = pd.read_excel('./gene_modules/2022-01-06-gene-cluster-label.xlsx')
gene_cluster_df

Unnamed: 0,gene,cluster_kinetics,cluster_GO
0,AARS,2,
1,ASXL1,1,DNA-binding
2,COX7B,3,
3,CTGF,2,
4,DLX4,2,
...,...,...,...
803,ZRANB1,1,
804,ZSCAN12,2,DNA-binding
805,ZSCAN29,2,DNA-binding
806,ZXDC,1,DNA-binding


In [103]:
cluster_GO_dict = {'DNA-binding':0, 'Cell-cell junction':1, 'RNA-binding':2, 'Cell division':3, np.nan:999}
gene_cluster_df['cluster_GO'] = gene_cluster_df['cluster_GO'].map(cluster_GO_dict)
gene_cluster_df['cluster_GO'] = gene_cluster_df['cluster_GO'].astype(np.int64)

In [50]:
# Iterate through each sample dir
test_dirs = sample_dirs[:1]

color_dict = {0:'red', 1:'yellow', 2:'cyan', 3:'magenta'}
plot_field = 'cluster_GO'

for current_dir in sample_dirs:

    print(f"Current sample: {current_dir}")

    # Load genes
    genes2seqs, seqs2genes = load_genes(base_path)

    # Load reads 
    bases, points = load_reads(current_dir, "merged_goodPoints_max3d_new.mat")
    bases = np.array(bases)
    points = np.array(points)
    
    structure_dict = {'whole_cell': 'cell.tif'}

    fig_out_path = base_path + 'figures' + '/' + plot_field + '/' + current_dir
    if not os.path.exists(fig_out_path):
        os.makedirs(fig_out_path)

    # Load raw image
    current_img = load_label_image(current_dir, fname='cell_bnd.tif')
    
    for current_structure in structure_dict.keys():
        print(f"====Processing: {current_structure}====")

        # Load segmentation
        current_seg = load_label_image(current_dir, fname=structure_dict[current_structure])

        points = points.astype(int)
        reads_assignment = current_seg[points[:, 2], points[:, 0], points[:, 1]]

        reads_info = pd.DataFrame({'x':points[:, 0], 'y':points[:, 1], 'z':points[:, 2], 'cell_label':reads_assignment})
        reads_info = reads_info.astype(np.int32)
        reads_info['orig_index'] = reads_info['cell_label'] - 1
        reads_info['gene'] = bases
        
    for current_cluster in sorted(gene_cluster_df[plot_field].unique()):
        if current_cluster == 999:
            continue
            
        print(f"Current field: {plot_field}, Current cluster: {current_cluster}")
        current_gene_list = gene_cluster_df.loc[gene_cluster_df[plot_field] == current_cluster, 'gene'].to_list()
        current_reads_df = reads_info.loc[reads_info['gene'].isin(current_gene_list), :]

        # remove unassigned reads
        current_reads_df = current_reads_df.loc[current_reads_df['cell_label'] != 0, :]

        plt.figure(figsize=(current_img.shape[0]/1000, current_img.shape[1]/1000), dpi=1000)
        plt.imshow(current_img, cmap='gray')
        plt.plot(current_reads_df['y'], current_reads_df['x'], '.', color=color_dict[current_cluster], markersize=.3, markeredgewidth=0.0)
        plt.axis('off')
        plt.tight_layout(pad=0)
        # plt.show()
        current_fig_path = f"{fig_out_path}/cluster_{current_cluster}.tiff"
        plt.savefig(current_fig_path, dpi=1000, pil_kwargs={"compression": "tiff_lzw"}, bbox_inches='tight', pad_inches=0)
        plt.clf()
        plt.close()
        
    plt.figure(figsize=(current_img.shape[0]/1000, current_img.shape[1]/1000), dpi=1000)
    plt.imshow(current_img, cmap='gray')
    for current_cluster in sorted(gene_cluster_df[plot_field].unique()):
        if current_cluster == 999:
            continue
            
        current_gene_list = gene_cluster_df.loc[gene_cluster_df[plot_field] == current_cluster, 'gene'].to_list()
        current_reads_df = reads_info.loc[reads_info['gene'].isin(current_gene_list), :]

        # remove unassigned reads
        current_reads_df = current_reads_df.loc[current_reads_df['cell_label'] != 0, :]

        plt.plot(current_reads_df['y'], current_reads_df['x'], '.', color=color_dict[current_cluster], markersize=.3, markeredgewidth=0.0)
    plt.axis('off')
    plt.tight_layout(pad=0)
    # plt.show()
    current_fig_path = f"{fig_out_path}/cluster_all.tiff"
    plt.savefig(current_fig_path, dpi=1000, pil_kwargs={"compression": "tiff_lzw"}, bbox_inches='tight', pad_inches=0)
    plt.clf()
    plt.close()

Current sample: 20h_labeling
Number of reads: 3083234
Total execution time: 21.824 s
====Processing: whole_cell====
Current field: cluster_GO, Current cluster: 0
Current field: cluster_GO, Current cluster: 1
Current field: cluster_GO, Current cluster: 2
Current field: cluster_GO, Current cluster: 3
Current sample: 1h_labeling_6h_wash
Number of reads: 3266939
Total execution time: 23.327 s
====Processing: whole_cell====
Current field: cluster_GO, Current cluster: 0
Current field: cluster_GO, Current cluster: 1
Current field: cluster_GO, Current cluster: 2
Current field: cluster_GO, Current cluster: 3
Current sample: 1h_labeling_4h_wash
Number of reads: 4825256
Total execution time: 34.249 s
====Processing: whole_cell====
Current field: cluster_GO, Current cluster: 0
Current field: cluster_GO, Current cluster: 1
Current field: cluster_GO, Current cluster: 2
Current field: cluster_GO, Current cluster: 3
Current sample: 1h_labeling_2h_wash
Number of reads: 4336044
Total execution time: 31.

## Test

In [78]:
from skimage.measure import find_contours
from matplotlib.colors import ListedColormap
from skimage.segmentation import find_boundaries
from skimage.color import label2rgb

In [161]:

cell_bnd = find_boundaries(current_cell_label)
dapi_bnd = find_boundaries(current_dapi_label)

cell_bnd = cell_bnd.astype(np.uint8)
cell_bnd[dapi_bnd] = 2
bg = label2rgb(cell_bnd, colors=[(0,0,0), (0,0,255)], bg_label=0, bg_color=(1,1,1))

# import matplotlib.pyplot as plt
plt.figure(figsize=(current_img.shape[0]/1000, current_img.shape[1]/1000), dpi=1000)
plt.imshow(bg)
plt.axis('off')
plt.tight_layout(pad=0)
current_fig_path = f"./output/test.tiff"
plt.savefig(current_fig_path, dpi=1000, pil_kwargs={"compression": "tiff_lzw"}, bbox_inches='tight', pad_inches=0)
plt.clf()
plt.close()

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


In [164]:
gene_cluster_df

Unnamed: 0,gene,cluster_kinetics,cluster_GO
0,AARS,2,999
1,ASXL1,1,0
2,COX7B,3,999
3,CTGF,2,999
4,DLX4,2,999
...,...,...,...
803,ZRANB1,1,999
804,ZSCAN12,2,0
805,ZSCAN29,2,0
806,ZXDC,1,0


In [173]:
# Iterate through each sample dir
test_dirs = sample_dirs[:1]

color_dict = {0:'#F8766D', 1:'#7CAE02', 2:'#00BFC4', 3:'#C77CFF'}
# plot_field = 'cluster_kinetics'
# plot_field = 'cluster_GO'
# plot_field = 'DC1 target'
plot_field = 'm6A label new'

linewidth = .1

for current_dir in sample_dirs:

    print(f"Current sample: {current_dir}")

    # Load genes
    genes2seqs, seqs2genes = load_genes(base_path)

    # Load reads 
    bases, points = load_reads(current_dir, "merged_goodPoints_max3d_new.mat")
    bases = np.array(bases)
    points = np.array(points)
    
    structure_dict = {'whole_cell': 'cell.tif'}

    fig_out_path = base_path + 'figures' + '/' + plot_field + '/' + current_dir
    if not os.path.exists(fig_out_path):
        os.makedirs(fig_out_path)

    # Load raw image
    current_dapi_label = load_label_image(current_dir, fname='dapi_label.tiff')
    current_cell_label = load_label_image(current_dir, fname='overlay_label.tiff')
    
    cell_bnd = find_boundaries(current_cell_label)
    dapi_bnd = find_boundaries(current_dapi_label)

    cell_bnd = cell_bnd.astype(np.uint8)
    cell_bnd[dapi_bnd] = 2
    bg = label2rgb(cell_bnd, colors=[(0,0,0), (0,0,255)], bg_label=0, bg_color=(1,1,1))


#     current_dapi_label = current_dapi_label.astype('float')
#     dapi_bnd = find_contours(current_dapi_label>0, level=.5)
    
#     current_cell_label = current_cell_label.astype('float')
#     cell_bnd = find_contours(current_cell_label>0, level=.5)

#     bg = np.ones(current_dapi_label.shape)
    
    for current_structure in structure_dict.keys():
        print(f"====Processing: {current_structure}====")

        # Load segmentation
        current_seg = load_label_image(current_dir, fname=structure_dict[current_structure])

        points = points.astype(int)
        reads_assignment = current_seg[points[:, 2], points[:, 0], points[:, 1]]

        reads_info = pd.DataFrame({'x':points[:, 0], 'y':points[:, 1], 'z':points[:, 2], 'cell_label':reads_assignment})
        reads_info = reads_info.astype(np.int32)
        reads_info['orig_index'] = reads_info['cell_label'] - 1
        reads_info['gene'] = bases
        
    for current_cluster in sorted(gene_cluster_df[plot_field].unique()):
        if current_cluster == 999:
            continue
            
        print(f"Current field: {plot_field}, Current cluster: {current_cluster}")
        current_gene_list = gene_cluster_df.loc[gene_cluster_df[plot_field] == current_cluster, 'gene'].to_list()
        current_reads_df = reads_info.loc[reads_info['gene'].isin(current_gene_list), :]

        # remove unassigned reads
        current_reads_df = current_reads_df.loc[current_reads_df['cell_label'] != 0, :]

        plt.figure(figsize=(current_img.shape[0]/1000, current_img.shape[1]/1000), dpi=1000)
        plt.imshow(bg)
#         for contour in dapi_bnd:
#             plt.plot(contour[:, 1], contour[:, 0], linewidth=linewidth, c='b')

#         for contour in cell_bnd:
#             plt.plot(contour[:, 1], contour[:, 0], linewidth=linewidth, c='k')
    
        plt.plot(current_reads_df['y'], current_reads_df['x'], '.', color=color_dict[current_cluster], markersize=.3, markeredgewidth=0.0)
        plt.axis('off')
        plt.tight_layout(pad=0)
        # plt.show()
        current_fig_path = f"{fig_out_path}/cluster_{current_cluster}.tiff"
        plt.savefig(current_fig_path, dpi=1000, pil_kwargs={"compression": "tiff_lzw"}, bbox_inches='tight', pad_inches=0)
        plt.clf()
        plt.close()
        
    plt.figure(figsize=(current_img.shape[0]/1000, current_img.shape[1]/1000), dpi=1000)
    plt.imshow(bg)
    
#     for contour in dapi_bnd:
#         plt.plot(contour[:, 1], contour[:, 0], linewidth=linewidth, c='b')

#     for contour in cell_bnd:
#         plt.plot(contour[:, 1], contour[:, 0], linewidth=linewidth, c='k')
    
    for current_cluster in sorted(gene_cluster_df[plot_field].unique()):
        if current_cluster == 999:
            continue
            
        current_gene_list = gene_cluster_df.loc[gene_cluster_df[plot_field] == current_cluster, 'gene'].to_list()
        current_reads_df = reads_info.loc[reads_info['gene'].isin(current_gene_list), :]

        # remove unassigned reads
        current_reads_df = current_reads_df.loc[current_reads_df['cell_label'] != 0, :]

        plt.plot(current_reads_df['y'], current_reads_df['x'], '.', color=color_dict[current_cluster], markersize=.3, markeredgewidth=0.0)
    plt.axis('off')
    plt.tight_layout(pad=0)
    # plt.show()
    current_fig_path = f"{fig_out_path}/cluster_all.tiff"
    plt.savefig(current_fig_path, dpi=1000, pil_kwargs={"compression": "tiff_lzw"}, bbox_inches='tight', pad_inches=0)
    plt.clf()
    plt.close()

Current sample: 20h_labeling
Number of reads: 3083234
Total execution time: 22.21 s
====Processing: whole_cell====
Current field: m6A label new, Current cluster: 0


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Current field: m6A label new, Current cluster: 1


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Current sample: 1h_labeling_6h_wash
Number of reads: 3266939
Total execution time: 23.602 s
====Processing: whole_cell====
Current field: m6A label new, Current cluster: 0


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Current field: m6A label new, Current cluster: 1


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Current sample: 1h_labeling_4h_wash
Number of reads: 4825256
Total execution time: 34.858 s
====Processing: whole_cell====
Current field: m6A label new, Current cluster: 0


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Current field: m6A label new, Current cluster: 1


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Current sample: 1h_labeling_2h_wash
Number of reads: 4336044
Total execution time: 36.238 s
====Processing: whole_cell====
Current field: m6A label new, Current cluster: 0


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Current field: m6A label new, Current cluster: 1


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Current sample: 1h_labeling_1h_wash
Number of reads: 5557806
Total execution time: 45.109 s
====Processing: whole_cell====
Current field: m6A label new, Current cluster: 0


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Current field: m6A label new, Current cluster: 1


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Current sample: 1h_labeling
Number of reads: 5219398
Total execution time: 42.251 s
====Processing: whole_cell====
Current field: m6A label new, Current cluster: 0


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


Current field: m6A label new, Current cluster: 1


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).


In [170]:
# load gene cluters
gene_cluster_df = pd.read_excel('./gene_modules/2022-01-09-m6A-DC1-gene-label.xlsx')
gene_cluster_df.columns = ['gene', 'm6A label', 'DC1 target']
gene_cluster_df

Unnamed: 0,gene,m6A label,DC1 target
0,DGCR11,other,other
1,SNHG20,other,non target
2,SSR2,m6A,other
3,TSPAN3,m6A,other
4,AASS,other,other
...,...,...,...
993,PTRH2,m6A,target
994,CBX4,m6A,target
995,NDUFS4,other,non target
996,NDUFS7,non m6A,non target


In [171]:
gene_cluster_df['m6A label new'] = 999
gene_cluster_df.loc[(gene_cluster_df['m6A label'] == 'm6A') & (gene_cluster_df['DC1 target'] == 'target'), 'm6A label new'] = 0
gene_cluster_df.loc[gene_cluster_df['m6A label'] == 'non m6A', 'm6A label new'] = 1
gene_cluster_df

Unnamed: 0,gene,m6A label,DC1 target,m6A label new
0,DGCR11,other,other,999
1,SNHG20,other,non target,999
2,SSR2,m6A,other,999
3,TSPAN3,m6A,other,999
4,AASS,other,other,999
...,...,...,...,...
993,PTRH2,m6A,target,0
994,CBX4,m6A,target,0
995,NDUFS4,other,non target,999
996,NDUFS7,non m6A,non target,1


In [167]:
field = 'm6A label'
cluster_GO_dict = {'m6A':0, 'non m6A':1, 'other':2}

# field = 'DC1 target'
# cluster_GO_dict = {'target':0, 'non target':1, 'other':2}

gene_cluster_df[field] = gene_cluster_df[field].map(cluster_GO_dict)
gene_cluster_df[field] = gene_cluster_df[field].astype(np.int64)

In [92]:
# Iterate through each sample dir
test_dirs = sample_dirs[:1]

color_dict = {0:'#F8766D', 1:'#7CAE02', 2:'#00BFC4', 3:'#C77CFF'}
plot_field = field
# plot_field = 'cluster_GO'
linewidth = .1

for current_dir in test_dirs:

    print(f"Current sample: {current_dir}")

    # Load genes
    genes2seqs, seqs2genes = load_genes(base_path)

    # Load reads 
    bases, points = load_reads(current_dir, "merged_goodPoints_max3d_new.mat")
    bases = np.array(bases)
    points = np.array(points)
    
    structure_dict = {'whole_cell': 'cell.tif'}

    fig_out_path = base_path + 'figures' + '/' + plot_field + '/' + current_dir
    if not os.path.exists(fig_out_path):
        os.makedirs(fig_out_path)

    # Load raw image
    current_dapi_label = load_label_image(current_dir, fname='dapi_label.tiff')
    current_cell_label = load_label_image(current_dir, fname='overlay_label.tiff')
    
    current_dapi_label = current_dapi_label.astype('float')
    dapi_bnd = find_contours(current_dapi_label>0, level=.5)
    
    current_cell_label = current_cell_label.astype('float')
    cell_bnd = find_contours(current_cell_label>0, level=.5)

    bg = np.ones(current_dapi_label.shape)
    
    for current_structure in structure_dict.keys():
        print(f"====Processing: {current_structure}====")

        # Load segmentation
        current_seg = load_label_image(current_dir, fname=structure_dict[current_structure])

        points = points.astype(int)
        reads_assignment = current_seg[points[:, 2], points[:, 0], points[:, 1]]

        reads_info = pd.DataFrame({'x':points[:, 0], 'y':points[:, 1], 'z':points[:, 2], 'cell_label':reads_assignment})
        reads_info = reads_info.astype(np.int32)
        reads_info['orig_index'] = reads_info['cell_label'] - 1
        reads_info['gene'] = bases
        
    for current_cluster in sorted(gene_cluster_df[plot_field].unique()):
        if current_cluster == 999:
            continue
            
        print(f"Current field: {plot_field}, Current cluster: {current_cluster}")
        current_gene_list = gene_cluster_df.loc[gene_cluster_df[plot_field] == current_cluster, 'gene'].to_list()
        current_reads_df = reads_info.loc[reads_info['gene'].isin(current_gene_list), :]

        # remove unassigned reads
        current_reads_df = current_reads_df.loc[current_reads_df['cell_label'] != 0, :]

        plt.figure(figsize=(current_img.shape[0]/1000, current_img.shape[1]/1000), dpi=1000)
        plt.imshow(bg, cmap='gray_r')
        for contour in dapi_bnd:
            plt.plot(contour[:, 1], contour[:, 0], linewidth=linewidth, c='b')

        for contour in cell_bnd:
            plt.plot(contour[:, 1], contour[:, 0], linewidth=linewidth, c='g')
    
        plt.plot(current_reads_df['y'], current_reads_df['x'], '.', color=color_dict[current_cluster], markersize=.3, markeredgewidth=0.0)
        plt.axis('off')
        plt.tight_layout(pad=0)
        # plt.show()
        current_fig_path = f"{fig_out_path}/cluster_{current_cluster}.tiff"
        plt.savefig(current_fig_path, dpi=1000, pil_kwargs={"compression": "tiff_lzw"}, bbox_inches='tight', pad_inches=0)
        plt.clf()
        plt.close()
        
    plt.figure(figsize=(current_img.shape[0]/1000, current_img.shape[1]/1000), dpi=1000)
    plt.imshow(bg, cmap='gray_r')
    for contour in dapi_bnd:
        plt.plot(contour[:, 1], contour[:, 0], linewidth=linewidth, c='b')

    for contour in cell_bnd:
        plt.plot(contour[:, 1], contour[:, 0], linewidth=linewidth, c='g')
    
    for current_cluster in sorted(gene_cluster_df[plot_field].unique()):
        if current_cluster == 999:
            continue
            
        current_gene_list = gene_cluster_df.loc[gene_cluster_df[plot_field] == current_cluster, 'gene'].to_list()
        current_reads_df = reads_info.loc[reads_info['gene'].isin(current_gene_list), :]

        # remove unassigned reads
        current_reads_df = current_reads_df.loc[current_reads_df['cell_label'] != 0, :]

        plt.plot(current_reads_df['y'], current_reads_df['x'], '.', color=color_dict[current_cluster], markersize=.3, markeredgewidth=0.0)
    plt.axis('off')
    plt.tight_layout(pad=0)
    # plt.show()
    current_fig_path = f"{fig_out_path}/cluster_all.tiff"
    plt.savefig(current_fig_path, dpi=1000, pil_kwargs={"compression": "tiff_lzw"}, bbox_inches='tight', pad_inches=0)
    plt.clf()
    plt.close()

Current sample: 20h_labeling
Number of reads: 3083234
Total execution time: 25.328 s
====Processing: whole_cell====
Current field: DC1 target, Current cluster: 0
Current field: DC1 target, Current cluster: 1
Current field: DC1 target, Current cluster: 2
