In [None]:
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.spatial

import anndata
import scanpy as sc
import umap

import torch
from scmg.model.contrastive_embedding import CellEmbedder, embed_adata, decode_cell_state_embedding

from scmg.preprocessing.data_standardization import GeneNameMapper
gene_name_mapper = GeneNameMapper()


In [None]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams["figure.autolayout"] = False
matplotlib.rc('pdf', fonttype=42)
plt.rcParams['font.family'] = 'FreeSans'
sc.set_figure_params(vector_friendly=True, dpi_save=300)
plt.rcParams['axes.grid'] = False

In [None]:
plot_output_path = 'classify_genes_plots/'
os.makedirs(plot_output_path, exist_ok=True)

In [None]:
adata_normal_all = sc.read_h5ad('adata_decoded_human_10x.h5ad')
sc.pp.scale(adata_normal_all, max_value=100)
adata_normal_all

In [None]:
adata_normal_all.shape[1] - np.sum(adata_normal_all.var['max_ct_exp_measured'] > 0.2)

In [None]:
np.sum(adata_normal_all.var['max_ct_exp_measured'] > 0.2) - np.sum((adata_normal_all.var['max_ct_exp_measured'] > 0.2) & (adata_normal_all.var['entropy_decoded'] < 6.4))

In [None]:
adata_normal = adata_normal_all.T.copy()
gene_mask = (adata_normal.obs['max_ct_exp_measured'] > 0.2) & (adata_normal.obs['entropy_decoded'] < 6.4)
adata_normal = adata_normal[gene_mask].copy()
adata_normal

In [None]:
pert_data_files = [
    '/GPUData_xingjie/SCMG/perturbation_data/AdamsonWeissman2016_GSM2406681_10X010.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/FrangiehIzar2021_RNA.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/hESC_TF_screen.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/JiangSatija2024_IFNB.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/JiangSatija2024_IFNG.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/JiangSatija2024_INS.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/JiangSatija2024_TGFB.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/JiangSatija2024_TNFA.h5ad',
#    '/GPUData_xingjie/SCMG/perturbation_data/Joung_TFScreen_HS_2023.h5ad',
#    '/GPUData_xingjie/SCMG/perturbation_data/knockTF_human.h5ad',
#    '/GPUData_xingjie/SCMG/perturbation_data/knockTF_mouse.h5ad',
    #'/GPUData_xingjie/SCMG/perturbation_data/omnipath.h5ad',
#    '/GPUData_xingjie/SCMG/perturbation_data/PertOrg.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/ReplogleWeissman2022_K562_essential.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/ReplogleWeissman2022_K562_gwps.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/ReplogleWeissman2022_rpe1.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/TianKampmann2021_CRISPRa.h5ad',
    '/GPUData_xingjie/SCMG/perturbation_data/TianKampmann2021_CRISPRi.h5ad',
    #'/GPUData_xingjie/SCMG/hESC_perturb_seq/pseudo_bulk.h5ad', # Test
]

adata_pert_list = []
for pdf in pert_data_files:
    adata_pert_list.append(sc.read_h5ad(pdf))
    print(os.path.basename(pdf), adata_pert_list[-1].shape[0])

adata_pert_all = anndata.concat(adata_pert_list, axis=0)
adata_pert_all.var['gene_name'] = adata_pert_list[0].var['gene_name']

In [None]:
# Mask out the direct target genes
for i in range(adata_pert_all.shape[0]):
    pg = adata_pert_all.obs['perturbed_gene'].iloc[i]
    
    if pg in adata_pert_all.var_names:
        adata_pert_all.X[i, adata_pert_all.var_names.get_loc(pg)] = 0

In [None]:
adata_pert_all.var['n_strong_perts'] = np.sum(np.abs(adata_pert_all.X) > 0.5, axis=0)

# Filter out genes without strong perturbation effects
adata_pert = adata_pert_all.T.copy()
adata_pert = adata_pert[adata_pert.obs['n_strong_perts'] > 5].copy()
adata_pert

In [None]:
from sklearn.metrics import pairwise_distances
sim_mtx_normal = 1 - pairwise_distances(adata_normal.X, metric='correlation', n_jobs=24)

In [None]:
adata_normal.obs['top_sim'] = np.sort(sim_mtx_normal, axis=1)[:, -2]
adata_normal.obs['top_sim'].hist(bins=100, range=(0, 1))
plt.show()
adata_normal.obs['n_high_corrs'] = np.sum(sim_mtx_normal > 0.4, axis=1) - 1
adata_normal.obs['n_high_corrs'].hist(bins=100, range=(1, 100))

In [None]:
sim_mtx_pert = 1 - pairwise_distances(adata_pert.X, metric='correlation', n_jobs=24)

In [None]:
adata_pert.obs['top_sim'] = np.sort(sim_mtx_pert, axis=1)[:, -2]
adata_pert.obs['top_sim'].hist(bins=100, range=(0, 1))
plt.show()
adata_pert.obs['n_high_corrs'] = np.sum(sim_mtx_pert > 0.4, axis=1) - 1
adata_pert.obs['n_high_corrs'].hist(bins=100, range=(1,100))

In [None]:
adata_pert.obs[adata_pert.obs['gene_name'].isin(['INSIG1', 'MVD', 'FDPS'])]

In [None]:
adata_normal_selected = adata_normal[adata_normal.obs['n_high_corrs'] > 4].copy()
adata_normal_selected

In [None]:
adata_pert_selected = adata_pert[adata_pert.obs['n_high_corrs'] > 4].copy()
adata_pert_selected

In [None]:
from sklearn.decomposition import PCA
pca_normal = PCA(n_components=50)
pca_normal.fit(adata_normal_selected.X)
pca_normal_std = np.std(pca_normal.transform(adata_normal_selected.X))
pca_normal_std

In [None]:
pca_pert = PCA(n_components=50)
pca_pert.fit(adata_pert_selected.X)
pca_pert_std = np.std(pca_pert.transform(adata_pert_selected.X))
pca_pert_std

In [None]:
pca_scale_factor = pca_normal_std / pca_pert_std
pca_scale_factor

In [None]:
selected_genes = np.unique(
    list(adata_normal_selected.obs.index)
    + list(adata_pert_selected.obs.index)
)

gene_selection_mask = adata_normal_all.var.index.isin(selected_genes)

pcs_normal = pca_normal.transform(adata_normal_all[:, gene_selection_mask].X.T)
pcs_pert = pca_pert.transform(adata_pert_all[:, gene_selection_mask].X.T)

In [None]:
adata_obs = adata_normal_all[:, gene_selection_mask].var.copy()
adata_obs['is_normal_program'] = adata_obs.index.isin(adata_normal_selected.obs.index)
adata_obs['is_pert_program'] = adata_obs.index.isin(adata_pert_selected.obs.index)

adata = anndata.AnnData(
    X=np.concatenate([
        adata_obs['is_normal_program'].values[:, None] * pcs_normal, 
        pca_scale_factor * adata_obs['is_pert_program'].values[:, None] * pcs_pert], axis=1),
    obs=adata_obs,
)

In [None]:
sc.pp.neighbors(adata, n_neighbors=5, use_rep='X', 
        metric='correlation',
        #metric='cosine',
        #metric='euclidean',
)
sc.tl.umap(adata, random_state=0)

In [None]:
sc.tl.leiden(adata, flavor="igraph", n_iterations=20, resolution=10, random_state=0)

In [None]:
#adata.write_h5ad('adata_exp_programs_cluster.h5ad')
adata = sc.read_h5ad('adata_exp_programs_cluster.h5ad')
adata

In [None]:
fig, ax = plt.subplots(figsize=(8, 8), dpi=100)
sc.pl.umap(adata, color='leiden', legend_loc='on data', ax=ax, 
           legend_fontsize=10, legend_fontoutline=2)
len(adata.obs['leiden'].cat.categories)

In [None]:
genes_to_show = ['DDIT3', 'ATF3', 'ATF4', 'ATF5', 'POU5F1', 'SOX2', 'NANOG', 'SOX10', 'ELOB', 'BAX', 'MDM2', 'INSIG1', 'MVD', 'FDPS']
genes_to_show = [g for g in genes_to_show if g in adata.obs['gene_name'].values]

adata.obs.set_index('gene_name').loc[genes_to_show]

In [None]:
gene_of_interest = 'BBC3'

print(adata.obsp['distances'][list(adata.obs['gene_name']).index(gene_of_interest)].data)
adata.obs.iloc[adata.obsp['distances'][list(adata.obs['gene_name']).index(gene_of_interest)].nonzero()[1]]


In [None]:
np.array(adata[adata.obs['leiden'] == '71'].obs['gene_name'])[0:200]

In [None]:
cluster_name_map = {
    '0' : 'Hox genes',
    '1' : 'DNA replication/repair',
    '2' : 'myeloid',
    '3' : 'mixed cell types',
    '4' : 'cilia',
    '5' : 'cilia',
    '6' : 'mixed cell types',
    '7' : 'pancreatic',
    '8' : 'mixed cell types',
    '9' : 'mixed cell types',
    '10' : 'interferon signaling',
    '11' : 'epithelial',
    '12' : 'neuronal',
    '13' : 'macrophage',
    '14' : 'mixed cell types',
    '15' : 'neuronal',
    '16' : 'mast cell',
    '17' : 'lysosome/autophagy',
    '18' : 'endothelial',
    '19' : 'neuronal',
    '20' : 'retinal epithelium',
    '21' : 'endothelial',
    '22' : 'immune system',
    '23' : 'ribosome biogenesis',
    '24' : 'proliferation',
    '25' : 'muscle',
    '26' : 'neural development',
    '27' : 'muscle',
    '28' : 'erythroid',
    '29' : 'cilia',
    '30' : 'pancreatic islet',
    '31' : 'mixed cell types',
    '32' : 'epithelial',
    '33' : 'spliceosome',
    '34' : 'immune system',
    '35' : 'endothelial',
    '36' : 'mesenchymal',
    '37' : 'adipocyte',
    '38' : 'cilia',
    '39' : 'myeloid',
    '40' : 'glia',
    '41' : 'immune system',
    '42' : 'epithelial',
    '43' : 'megakaryocyte',
    '44' : 'mesenchymal',
    '45' : 'intestine',
    '46' : 'mixed cell types',
    '47' : 'neuronal',
    '48' : 'neuronal',
    '49' : 'neuronal',
    '50' : 'neural development',
    '51' : 'mitochondrial ribosome',
    '52' : 'mixed cell types',
    '53' : 'epithelial',
    '54' : 'mixed cell types',
    '55' : 'mixed cell types',
    '56' : 'kidney',
    '57' : 'mixed cell types',
    '58' : 'kidney',
    '59' : 'epidermal',
    '60' : 'cilia',
    '61' : 'mesenchymal',
    '62' : 'natural killer',
    '63' : 'B cell',
    '64' : 'mixed cell types',
    '65' : 'respiratory epithelium',
    '66' : 'mixed cell types',
    '67' : 'visual perception',
    '68' : 'mixed cell types',
    '69' : 'mesothelial',
    '70' : 'epithelial',
    '71' : 'chromatin structure',
    '72' : 'neuronal',
    '73' : 'mixed cell types',
    '74' : 'peripheral neurons',
    '75' : 'mixed cell types',
    '76' : 'mixed cell types',
    '77' : 'neuronal',
    '78' : 'mixed cell types',
    '79' : 'intestine',
    '80' : 'epithelial',
    '81' : 'mixed cell types',
    '82' : 'neuronal',
    '83' : 'Golgi vesicle transport',
    '84' : 'ribosomal protein genes',
    '85' : 'liver',
    '86' : 'proliferation',
    '87' : 'mixed cell types',
    '88' : 'visual perception',
    '89' : 'cell cycle (M phase)',
    '90' : 'macrophage',
    '91' : 'p53 signaling',
    '92' : 'macrophage',
    '93' : 'cilia',
    '94' : 'immune system',
    '95' : 'mixed cell types',
    '96' : 'T cell',
    '97' : 'mixed cell types',
    '98' : 'kidney',
    '99' : 'mixed cell types',
    '100' : 'mixed cell types',
    '101' : 'macrophage',
    '102' : 'cell cycle (G1/S)',
    '103' : 'B cell',
    '104' : 'immune system',
    '105' : 'mixed cell types',
    '106' : 'mixed cell types',
    '107' : 'mixed cell types',
    '108' : 'T cell',
    '109' : 'DNA replication/repair',
    '110' : 'mixed cell types',
    '111' : 'ribosome biogenesis',
    '112' : 'epithelial',
    '113' : 'epithelial',
    '114' : 'endothelial',
    '115' : 'glia',
    '116' : 'epithelial',
    '117' : 'mixed cell types',
    '118' : 'mixed cell types',
    '119' : 'cholesterol biosynthesis',
    '120' : 'mesenchymal',
    '121' : 'neuronal',
    '122' : 'mesenchymal',
    '123' : 'mixed cell types',
    '124' : 'melanin biosynthesis',
    '125' : 'glia',
    '126' : 'epithelial',
    '127' : 'liver',
    '128' : 'mixed cell types',
    '129' : 'TNF signaling',
    '130' : 'cilia',
    '131' : 'T cell',
    '132' : 'bone',
    '133' : 'integrated stress response',
    '134' : 'neuronal',
    '135' : 'mixed cell types',
    '136' : 'cell cycle (prometaphase)',
    '137' : 'heart',
    '138' : 'muscle',
    '139' : 'visual perception',
    '140' : 'neuronal',
    '141' : 'neuronal',
    '142' : 'mixed cell types',
    '143' : 'unfolded protein response',
    '144' : 'visual perception',
    '145' : 'bone',
    '146' : 'visual perception',
    '147' : 'smooth muscle',
    '148' : 'mixed cell types',
    '149' : 'immune system',
    '150' : 'interferon signaling',
    '151' : 'neuronal',
    '152' : 'mixed cell types',
    '153' : 'mixed cell types',
    '154' : 'melanin biosynthesis',
    '155' : 'mixed cell types',
    '156' : 'mixed cell types',
    '157' : 'mixed cell types',
    '158' : 'epithelial',
    '159' : 'cell cycle (prometaphase)',
    '160' : 'mixed cell types',
    '161' : 'immune system',
    '162' : 'mixed cell types',
    '163' : 'mixed cell types',
    '164' : 'muscle',
    '165' : 'intestine',
    '166' : 'neuronal',
    '167' : 'muscle',
    '168' : 'neuronal',
    '169' : 'mixed cell types',
    '170' : 'mixed cell types',
    '171' : 'epidermal',
    '172' : 'pluripotency',
    '173' : 'erythroid',
    '174' : 'mixed cell types',
    '175' : 'mixed cell types',
    '176' : 'mixed cell types',
    '177' : 'mixed cell types',
    '178' : 'mixed cell types',
    '179' : 'mixed cell types',
    '180' : 'mixed cell types',
    '181' : 'mixed cell types',
    '182' : 'mixed cell types',
    '183' : 'mixed cell types',
    '184' : 'mixed cell types',
    '185' : 'pluripotency',
    '186' : 'mitochondrial encoded',
}

adata.obs['initial_name'] = adata.obs['leiden'].map(cluster_name_map)

In [None]:
# Compute the mean embedding for each cluster
cluster_names = np.unique(adata.obs['initial_name'])
initial_cluster_embeddings = []
for cn in cluster_names:
    initial_cluster_embeddings.append(
        np.mean(adata[adata.obs['initial_name'] == cn].X, axis=0)
    )
initial_cluster_embeddings = np.array(initial_cluster_embeddings)

# Find the closest initial cluster for each gene
from scipy.spatial.distance import cdist
dist_mtx = cdist(adata.X, initial_cluster_embeddings, metric='correlation')
closest_initial_clusters = np.argmin(dist_mtx, axis=1)
adata.obs['closest_initial_cluster'] = [cluster_names[i] for i in closest_initial_clusters]
adata.obs['closest_initial_cluster_dist'] = np.min(dist_mtx, axis=1)

In [None]:
adata.obs['closest_initial_cluster_dist'].hist(bins=100, range=(0, 2))
adata.obs['cluster_name'] = adata.obs['closest_initial_cluster']
adata.obs.loc[adata.obs['closest_initial_cluster_dist'] > 0.3, 'cluster_name'] = 'mixed cell types'

In [None]:
adata.obs[adata.obs['closest_initial_cluster'] == 'actin cytoskeleton'].sort_values('closest_initial_cluster_dist')[:10]

In [None]:
print(len(np.unique(adata.obs['cluster_name'])))
adata.obs['cluster_name'].value_counts()

In [None]:
np.array(adata.obs[adata.obs['cluster_name'] == 'p53 signaling'].sort_values('closest_initial_cluster_dist')['gene_name'])

In [None]:
adata_show = adata.copy()
adata_show.obs['initial_name'] = adata_show.obs['initial_name'].replace('mixed cell types', np.nan)
adata_show.obs['cluster_name'] = adata_show.obs['cluster_name'].replace('mixed cell types', np.nan)

fig, ax = plt.subplots(figsize=(8, 8), dpi=100)
sc.pl.umap(adata_show, color='cluster_name', legend_loc='on data', ax=ax, 
           legend_fontsize=10, legend_fontoutline=2)

fig.savefig(os.path.join(plot_output_path, 'exp_programs_umap_clusters_with_mixed_cell_types.pdf'))

In [None]:
adata_anno = adata[adata.obs['cluster_name'] != 'mixed cell types'].copy()

sc.pp.neighbors(adata_anno, n_neighbors=10, use_rep='X', 
        metric='correlation',
        #metric='cosine',
        #metric='euclidean',
)
sc.tl.umap(adata_anno, random_state=0, min_dist=1)

fig, ax = plt.subplots(figsize=(8, 8), dpi=100)
sc.pl.umap(adata_anno, color='cluster_name', legend_loc='on data', ax=ax, 
           legend_fontsize=10, legend_fontoutline=2)

fig.savefig(os.path.join(plot_output_path, 'exp_programs_umap_clusters.pdf'))

In [None]:
adata_normal_all.var['annotation'] = 'others'
adata_normal_all.var.loc[adata_normal_all.var['max_ct_exp_measured'] <= 0.2, 'annotation'] = 'low detection'
adata_normal_all.var.loc[adata_normal_all.var['entropy_decoded'] >= 6.4, 'annotation'] = 'uniformly expressed'
adata_normal_all.var.loc[adata_normal_all.var.index.isin(adata.obs.index), 'annotation'] = adata.obs['cluster_name']
adata_normal_all.var.to_csv('gene_exp_cluster_annotation_all.csv')

In [None]:
gene_cluster_counts = adata_anno.obs['cluster_name'].value_counts()
#gene_cluster_counts = adata.obs['initial_name'].value_counts()

print(gene_cluster_counts.sum())
gene_cluster_counts

In [None]:
adata_anno.obs.to_csv('gene_exp_cluster_annotation.csv')

In [None]:
essential_programs = ['chromatin structure', 'DNA replication/repair', 'cell cycle (G1/S)', 'cell cycle (prometaphase)', 'cell cycle (M phase)', 
    'spliceosome', 'proliferation',
    'ribosome biogenesis', 'ribosomal protein genes', 'mitochondrial encoded', 'cholesterol biosynthesis', 'mitochondrial ribosome', 
    'Golgi vesicle transport', 'unfolded protein response', 
    'integrated stress response', 'p53 signaling', 'lysosome/autophagy',]

program_names = gene_cluster_counts.index.astype(str)[::-1]
program_counts = gene_cluster_counts.values[::-1]
colors = ['orange' if program_names[i] in essential_programs else 'blue' for i in range(len(program_names))]

In [None]:
fig, ax = plt.subplots(figsize=(3, 14))

ax.barh(program_names, program_counts, color=colors)
#ax.set_xscale('log')
ax.set_xlabel('Number of Genes')
ax.set_ylabel('Gene Cluster')
ax.set_title('Gene Exp Cluster Size')

ax.legend(handles=[
    matplotlib.lines.Line2D([], [], marker='o', color='orange', linestyle='None', markersize=5, label='Essential'),
    matplotlib.lines.Line2D([], [], marker='o', color='blue', linestyle='None', markersize=5, label='Differentiation'),
], loc='lower right', fontsize=12)

fig.savefig(os.path.join(plot_output_path, 'exp_programs_cluster_size.pdf'))

In [None]:
adata_ref = sc.read_h5ad('adata_decoded_human_10x.h5ad')

In [None]:
scaled_adata = adata_ref.copy()
sc.pp.scale(scaled_adata, max_value=10)

In [None]:
cluster_labels = np.unique(adata.obs[adata.obs['initial_name'] == 'mixed cell types']['leiden'])

for cluster_label in cluster_labels:
    selected_genes = np.array(adata[adata.obs['leiden'] == cluster_label].obs.index)
    scaled_adata.obs[cluster_label] = scaled_adata[:, scaled_adata.var.index.isin(selected_genes)].X.mean(axis=1)

fig = sc.pl.umap(scaled_adata, color=cluster_labels, vmax=3, vmin=-3, cmap='coolwarm', 
        ncols=7, return_fig=True)

In [None]:
cluster_labels = np.array(adata_anno.obs['cluster_name'].cat.categories)

for cluster_label in cluster_labels:
    selected_genes = np.array(adata_anno[adata_anno.obs['cluster_name'] == cluster_label].obs.index)
    scaled_adata.obs[cluster_label] = scaled_adata[:, scaled_adata.var.index.isin(selected_genes)].X.mean(axis=1)
    
len(cluster_labels)

In [None]:
cluster_labels = [
    'chromatin structure', 'DNA replication/repair', 'cell cycle (G1/S)', 'cell cycle (prometaphase)', 'cell cycle (M phase)', 
    'spliceosome', 'proliferation',
    'ribosome biogenesis', 'ribosomal protein genes', 'mitochondrial encoded', 'cholesterol biosynthesis', 'mitochondrial ribosome', 
    'Golgi vesicle transport', 'unfolded protein response', 
    'integrated stress response', 'p53 signaling', 'lysosome/autophagy',  
       
    'pluripotency', 'Hox genes', 'glia', 'neural development', 'neuronal', 'peripheral neurons', 
    'visual perception', 'retinal epithelium', 'melanin biosynthesis', 
    'epithelial', 'respiratory epithelium', 'kidney', 'intestine', 'pancreatic', 'pancreatic islet', 'liver', 'epidermal', 
    'mesothelial', 'adipocyte',  'mesenchymal', 'smooth muscle', 'endothelial', 'bone', 

    'interferon signaling', 'TNF signaling', 'immune system', 'myeloid', 'macrophage', 'B cell', 'T cell', 'natural killer',
    'mast cell', 'erythroid', 'megakaryocyte',
       
    'muscle', 'heart', 'cilia', 
       
    ]
len(cluster_labels)

In [None]:
fig = sc.pl.umap(scaled_adata, color=cluster_labels, vmax=3, vmin=-3, cmap='coolwarm', 
        ncols=7, return_fig=True)

fig.savefig(os.path.join(plot_output_path, f'gene_module_exp_umap.pdf'))

In [None]:
#fig = sc.pl.umap(scaled_adata[scaled_adata.obs['cell_type'].isin([
#        #'inflammatory cell', 
#        #'Brown adipocyte cells',
#        #'Adipocyte cells (Cyp2e1+)',
#        'cortical thymic epithelial cell'
#        ])], 
#                 color='109', vmax=3, vmin=-3, cmap='coolwarm', return_fig=True)
#

In [None]:
adata_named = adata_ref.copy()

adata_named.var.index = list(adata_named.var['gene_name'])
adata_named.var_names_make_unique()

In [None]:
np.array(adata_anno[adata_anno.obs['cluster_name'] == 'endothelial'].obs['gene_name'])[0:30]

In [None]:
genes_to_plot = ['TFPI', 'PLXND1', 'MEOX1', 'SELE', 'SEMA3G', 'RIPOR1', 'PREX2',
       'CALCRL', 'SLC9A3R2', 'TIE1', 'KIF26A', 'HYAL2', 'ADGRF5', 'LMCD1',
       'LIMS2', 'NUAK1', 'SCARF1', 'EDN1', 'ANGPT2', 'MADCAM1', 'PPM1F',
       'CARD10', 'PDGFB', 'ACR', 'CCM2L', 'BMX', 'FLT1', 'EHD4', 'PRX',
       'RASIP1']
genes_to_plot = [g for g in genes_to_plot if g in adata_named.var_names]

sc.pl.umap(adata_named, color=genes_to_plot, cmap='inferno_r', ncols=6)