In [1]:
import os
import random
import time
import re
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scanpy as sc
import seaborn as sns
from matplotlib.colors import ListedColormap
from textwrap import wrap

In [2]:
data_dir = Path.cwd() / 'data'
result_dir = Path.cwd() / 'results'
fig_dir = Path.cwd() / 'figures'
adata_name = 'SAGE-20201013.h5ad'
model = 'SAGE'

In [3]:
adata = sc.read(result_dir / adata_name)
num_cluster = adata.obs['spage2vec'].max() + 1
df_heart = adata.obs[adata.obs['spage2vec'] > -1]
df_heart

Unnamed: 0,gene,spotX,spotY,pcw,section,louvain,spage2vec,cell_type_id
4.5_1_11,MYRF,3242.580,1736.800,4,1,7,1,
4.5_1_18,ITLN1,3217.077,1764.718,4,1,21,18,
4.5_1_14,CPE,3260.000,1745.267,4,1,7,1,
4.5_1_15,CRABP2,3269.818,1748.345,4,1,18,20,
4.5_1_12,TNNI1,3098.909,1738.303,4,1,16,0,
...,...,...,...,...,...,...,...,...
9.5_3_538710,MYH7,18068.310,24232.630,9,3,0,6,
9.5_3_538718,MYOM1,18049.890,24246.370,9,3,0,6,
9.5_3_538694,MYL2,18028.650,24213.420,9,3,0,6,
9.5_3_538712,COL1A1,18189.620,24235.260,9,3,10,9,


In [4]:
zeileis_28 = [
    "#023fa5", "#7d87b9", "#bec1d4", "#d6bcc0", "#bb7784", "#8e063b", "#4a6fe3",
    "#8595e1", "#b5bbe3", "#e6afb9", "#e07b91", "#d33f6a", "#11c638", "#8dd593",
    "#c6dec7", "#ead3c6", "#f0b98d", "#ef9708", "#0fcfc0", "#9cded6", "#d5eae7",
    "#f3e1eb", "#f6c4e1", "#f79cd4",
    '#7f7f7f', "#c7c7c7", "#1CE6FF", "#336600",  # these last ones were added,
][:num_cluster]

In [5]:
ct_cmap = np.array([
    '#F8766D', '#E58700', '#C99800', '#A3A500', '#6BB100',
    '#00BA38', '#00BF7D', '#00C0AF', '#00BCD8', '#00B0F6', 
    '#619CFF', '#B983FF', '#E76BF3', '#FD61D1', '#FF67A4'
])

In [6]:
def spatial_plot(pcw):
    n_section = 3
    fig, axes = plt.subplots(1, n_section, figsize=(7 * n_section, 7))
    for s in range(n_section):
        if pcw == 6 and s == 2:
            axes[s].legend(
                *scatter.legend_elements(num=None), 
                loc='center left', 
                bbox_to_anchor=(0, 0.5), 
                ncol=2, 
                fontsize=15, 
                markerscale=2
            )
            axes[s].axis('off')
            continue
        subset = (df_heart['pcw'] == pcw) & (df_heart['section'] == s+1)
        spot_x = df_heart.loc[subset,'spotX']
        spot_y = df_heart.loc[subset,'spotY']
        spot_y = spot_y.max() - spot_y
        scatter = axes[s].scatter(
            spot_x, 
            spot_y, 
            s=1, 
            c=df_heart.loc[subset,'spage2vec'], 
            marker='.', 
            cmap=ListedColormap(zeileis_28),
            alpha=1.0, 
            linewidths=0
        )
        axes[s].set_title('{}.5_{}'.format(pcw, s+1), fontsize=20)
        axes[s].tick_params(labelsize=12)
        axes[s].set_xticks([])
        axes[s].set_yticks([])
    fig.tight_layout()
    fig.savefig(fig_dir / 'spatial-{}-{}.png'.format(model, pcw), dpi=200)
    plt.close()

In [7]:
for pcw in [4, 6, 9]:
    spatial_plot(pcw)

In [8]:
df_cell = {}
for name in ['6.5_1', '6.5_2']:
    df_cell_segmentation = pd.read_csv(data_dir / 'spots_w_cell_segmentation_PCW{}.csv'.format(name))
    df_cell_calling = pd.read_csv(data_dir / 'cell_calling_PCW{}.csv'.format(name))
    df_cell[name] = pd.merge(
        df_cell_segmentation, df_cell_calling[['cell', 'celltype']], 
        how='left', left_on='parent_id', right_on='cell'
    )
    df_cell[name]['pcw'] = int(name[0])
    df_cell[name]['section'] = int(name[-1])
    df_cell[name] = df_cell[name].set_index('{}_'.format(name) + df_cell[name].index.astype(str))
df_heart_cell = pd.concat(df_cell.values())
cell_types = df_heart_cell['celltype'].dropna().unique()
cell_type_id = [(cell_type, int(re.search(r'\((\d+)\)', cell_type).group(1))) for cell_type in cell_types]
cell_type_id.append(('Uncalled', -1))
cell_type_id.sort(key=lambda x: x[1])
cell_type_id = dict(cell_type_id)
df_heart_cell['celltype'].fillna(value='Uncalled', inplace=True)
df_heart_cell['cell_type_id'] = df_heart_cell['celltype'].map(cell_type_id)
id_cell_type = {v: k for (k, v) in cell_type_id.items()}
id_cell_type[2] = '(2) Fibroblast-like (related to cardiac skeleton connective tissue)'
id_cell_type[3] = '(3) Epicardium-derived cells'
id_cell_type[4] = '(4) Fibroblast-like (smaller vascular development)'
id_cell_type[8] = '(8) Fibroblast-like (larger vascular development)'
id_cell_type[14] = '(14) Cardiac neural crest cells & Schwann progenitor cells'

In [9]:
fig, ax = plt.subplots()
labels = ['\n'.join(wrap(line, 30)) for line in list(id_cell_type.values())[1:]]
ax.text(0, 0.5, '\n'.join(labels), va='center')
ax.axis('off')
fig.tight_layout()
fig.savefig(fig_dir / 'ct.png')
plt.close()

In [10]:
fig, axes = plt.subplots(1, 3, figsize=(21, 7))
subset = (df_heart['pcw'] == 6) & (df_heart['section'] == 2)
spot_x = df_heart.loc[subset,'spotX']
spot_y = df_heart.loc[subset,'spotY']
spot_y = spot_y.max() - spot_y
axes[0].scatter(
    spot_x, 
    spot_y, 
    s=1, 
    c=df_heart.loc[subset,'spage2vec'], 
    marker='.', 
    cmap=ListedColormap(zeileis_28),
    alpha=1.0, 
    linewidths=0
)
axes[0].set_title('spage2vec', fontsize=20)
axes[0].set_xticks([])
axes[0].set_yticks([])
subset = (df_heart_cell['section'] == 2) & (df_heart_cell['cell_type_id'] >= 0)
labels = ['\n'.join(wrap(line, 30)) for line in list(id_cell_type.values())[1:]]
spot_x = df_heart_cell.loc[subset,'spotX']
spot_y = df_heart_cell.loc[subset,'spotY']
spot_y = spot_y.max() - spot_y
scatter = axes[1].scatter(
    spot_x, 
    spot_y, 
    s=1, 
    c=df_heart_cell.loc[subset,'cell_type_id'], 
    marker='.', 
    cmap=ListedColormap(ct_cmap), 
    alpha=1.0, 
    linewidths=0
)
axes[1].set_title('pciSeq', fontsize=20)
axes[1].set_xticks([])
axes[1].set_yticks([])
axes[2].legend(
    handles=scatter.legend_elements(num=None)[0], 
    labels=labels, 
    loc='center left', 
    bbox_to_anchor=(0, 0.5), 
    fontsize=12
)
axes[2].axis('off')
fig.tight_layout()
fig.savefig(fig_dir / 'spatial-{}-0.png'.format(model), dpi=200)
plt.close()

In [11]:
fig, axes = plt.subplots(1, 3, figsize=(21, 7))
subset = (df_heart['pcw'] == 6) & (df_heart['section'] == 2)
spot_x = df_heart.loc[subset,'spotX']
spot_y = df_heart.loc[subset,'spotY']
spot_y = spot_y.max() - spot_y
clusters = [2, 19, 20, 21]
colors = ['tab:orange', 'blue', 'tab:blue', 'tab:green']
axes[0].scatter(
    spot_x[~df_heart['spage2vec'].isin(clusters)], 
    spot_y[~df_heart['spage2vec'].isin(clusters)], 
    s=1, 
    c='gray', 
    marker='.', 
    alpha=0.2, 
    linewidths=0
)
for cluster, color in zip(clusters, colors):
    axes[0].scatter(
        spot_x[df_heart['spage2vec'] == cluster], 
        spot_y[df_heart['spage2vec'] == cluster], 
        label=cluster, 
        c=color, 
        s=2, marker='.', alpha=1.0, linewidths=0
    )
axes[0].legend(loc='best', markerscale=15, fontsize=15)
axes[0].set_title('spage2vec', fontsize=20)
axes[0].set_xticks([])
axes[0].set_yticks([])
subset = (df_heart_cell['section'] == 2) & (df_heart_cell['cell_type_id'] >= 0)
spot_x = df_heart_cell.loc[subset,'spotX']
spot_y = df_heart_cell.loc[subset,'spotY']
spot_y = spot_y.max() - spot_y
clusters = [9, 0, 14]
colors = ['tab:orange', 'tab:blue', 'tab:green']
axes[1].scatter(
    spot_x[~df_heart_cell['cell_type_id'].isin(clusters)], 
    spot_y[~df_heart_cell['cell_type_id'].isin(clusters)], 
    s=1, 
    c='gray', 
    marker='.', 
    alpha=0.2, 
    linewidths=0
)
for cluster, color in zip(clusters, colors):
    axes[1].scatter(
        spot_x[df_heart_cell['cell_type_id'] == cluster], 
        spot_y[df_heart_cell['cell_type_id'] == cluster], 
        c=color, 
        label='\n'.join(wrap(id_cell_type[cluster], 30)), 
        s=2, marker='.', alpha=1.0, linewidths=0
    )
axes[1].set_title('pciSeq', fontsize=20)
axes[1].set_xticks([])
axes[1].set_yticks([])
axes[2].legend(
    *axes[1].get_legend_handles_labels(), 
    loc='center left', 
    bbox_to_anchor=(0, 0.5), 
    markerscale=15, 
    fontsize=18
)
axes[2].axis('off')
fig.tight_layout()
fig.savefig(fig_dir / 'spatial-{}-1.png'.format(model), dpi=200)
plt.close()

In [12]:
fig, axes = plt.subplots(1, 3, figsize=(21, 7))
pcws = [4, 6, 9]
clusters = [13, 14, 15, 16, 17]
for i in range(3):
    subset = (df_heart['pcw'] == pcws[i]) & (df_heart['section'] == 2)
    spot_x = df_heart.loc[subset,'spotX']
    spot_y = df_heart.loc[subset,'spotY']
    spot_y = spot_y.max() - spot_y
    axes[i].scatter(
        spot_x[~df_heart['spage2vec'].isin(clusters)], 
        spot_y[~df_heart['spage2vec'].isin(clusters)], 
        s=1, 
        c='gray', 
        marker='.', 
        alpha=0.2, 
        linewidths=0
    )
    for cluster in clusters:
        axes[i].scatter(
            spot_x[df_heart['spage2vec'] == cluster], 
            spot_y[df_heart['spage2vec'] == cluster], 
            label=cluster, 
            s=1, marker='.', alpha=1.0, linewidths=0
        )
    axes[i].set_title('{}.5_2'.format(pcws[i]), fontsize=20)
    axes[i].set_xticks([])
    axes[i].set_yticks([])
axes[1].legend(loc='best', markerscale=15, fontsize=15)
fig.tight_layout()
fig.savefig(fig_dir / 'spatial-{}-2.png'.format(model), dpi=200)
plt.close()

In [13]:
fig, axes = plt.subplots(1, 2, figsize=(14, 7))
clusters = [16, 18, 2]
for s in range(2):
    subset = (df_heart['pcw'] == 6) & (df_heart['section'] == s+1)
    spot_x = df_heart.loc[subset,'spotX']
    spot_y = df_heart.loc[subset,'spotY']
    spot_y = spot_y.max() - spot_y
    axes[s].scatter(
        spot_x[~df_heart['spage2vec'].isin(clusters)], 
        spot_y[~df_heart['spage2vec'].isin(clusters)], 
        s=1, 
        c='gray', 
        marker='.', 
        alpha=0.2, 
        linewidths=0
    )
    for cluster in clusters:
        axes[s].scatter(
            spot_x[df_heart['spage2vec'] == cluster], 
            spot_y[df_heart['spage2vec'] == cluster], 
            label=cluster, 
            s=2, marker='.', alpha=1.0, linewidths=0
        )
    axes[s].set_title('6.5_{}'.format(s+1), fontsize=20)
    axes[s].set_xticks([])
    axes[s].set_yticks([])
axes[1].legend(loc='best', markerscale=15, fontsize=15)
fig.tight_layout()
fig.savefig(fig_dir / 'spatial-{}-3.png'.format(model), dpi=200)
plt.close()