In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
from matplotlib.collections import PathCollection

In [2]:
import os
os.getcwd( )
os.chdir('../')

In [None]:
adata = sc.read_h5ad('./data/integrated/47.integrated.h5ad)

In [None]:
## Figure 1B; Figure S1D
import matplotlib.pyplot as plt
import scanpy as sc

# Define the columns and rows you want
nrows, ncols = 2, 2  # Adjust to your desired number of rows and columns

colors_to_plot = ["celltype_B", 
                  #"CD52" 
                  #"tissue.origin", 
                  #"celltype_B", 
                  "celltype_C"
                 ]

cmap = plt.cm.get_cmap('inferno_r')  # brown-white gradient

# Create a figure and subplots with nrows and ncols
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(13, 5))

# Flatten the axes array to iterate over it easily
axs = axs.flatten()

# Iterate over the colors and corresponding axes
for idx, (color, ax) in enumerate(zip(colors_to_plot, axs)):
    sc.pl.embedding(
        adata, 
        basis="X_umap.rpca",
        color=color,
        sort_order=False,
        add_outline=False, 
        legend_loc="right margin",
        cmap=cmap,
        legend_fontsize=10,
        frameon=False,
        title='cell type',
        size=1,
        alpha=0.8,
        ax=ax,
        show=False
    )

# Hide any extra unused subplots if fewer colors than subplots
if len(axs) > len(colors_to_plot):
    for i in range(len(colors_to_plot), len(axs)):
        fig.delaxes(axs[i])

plt.tight_layout()
#plt.savefig('./outs/all_umap_cell.pdf', dpi=300, transparent=True, bbox_inches='tight')
plt.show()

In [None]:
##Figure 1D
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Rectangle

# Assuming adata is your AnnData object and it contains 'cancer.id', 'cancer', 'celltype_B', and 'celltype_C' in .obs
df = adata.obs[['cancer.id', 'celltype_B', 'celltype_C', 'cancer']]

# Compute the fraction of cell types in 'celltype_C' for each sample (cancer.id)
celltype_C_counts = pd.crosstab(df['cancer.id'], df['celltype_C'])
celltype_C_fractions = celltype_C_counts.div(celltype_C_counts.sum(axis=1), axis=0)

# Get the sample ranking based on the 'Mo/Mφ/cDC/OC' fraction in 'celltype_B'
celltype_B_counts = pd.crosstab(df['cancer.id'], df['celltype_B'])
sample_ranking = celltype_B_counts['Mo/Mφ/cDC/OC'].sort_values(ascending=False).index

# Reorder 'celltype_C' fractions based on the ranking from 'celltype_B'
ranked_celltype_C_fractions = celltype_C_fractions.loc[sample_ranking]

# Get the 'cancer' type information for the samples based on the ranking
cancer_info = df.set_index('cancer.id').loc[sample_ranking]['cancer']

# Create a distinct color palette for all cell types in 'celltype_C'
n_celltypes = len(celltype_C_fractions.columns)  # Number of distinct cell types in celltype_C
celltype_colors = sns.color_palette("tab20", n_colors=n_celltypes)  # tab20 provides distinct colors for larger number of categories

# Create a color palette for 'cancer' types
unique_cancers = cancer_info.unique()
cancer_colors = sns.color_palette("Dark2", n_colors=len(unique_cancers))  # Use Set1 for distinct colors for 'cancer' types
cancer_color_map = dict(zip(unique_cancers, cancer_colors))  # Map 'cancer' types to colors

# Plot the stacked bar plot
fig, ax = plt.subplots(figsize=(7, 4))
ranked_celltype_C_fractions.plot(kind='bar', stacked=True, color=celltype_colors, width=1, ax=ax)

# Add color blocks on top of the bars to indicate 'cancer' type
for i, sample in enumerate(sample_ranking):
    # Use .loc with sample and select the first value in case of duplicates
    cancer_type = cancer_info.loc[sample].values[0] if isinstance(cancer_info.loc[sample], pd.Series) else cancer_info.loc[sample]
    cancer_color = cancer_color_map[cancer_type]
    
    # Add a rectangle on top of each bar with a height of 0.05 (adjust height if necessary)
    ax.add_patch(Rectangle((i - 0.45, 1.01), 0.9, 0.05, color=cancer_color, clip_on=False))  # Rectangle above each bar

# Customize the plot
ax.set_title("Fraction of Cell Types per Sample (Ranked by 'Myeloid')", fontsize=10)
ax.set_xlabel('Sample ID (cancer.id)', fontsize=4)
ax.set_ylabel('Fraction of Cells (celltype_C)', fontsize=8)

# Adjust legend for cell types (to avoid overriding)
handles, labels = ax.get_legend_handles_labels()
celltype_legend = ax.legend(handles, labels, title='Cell Types (celltype_C)', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')

# Add a separate legend for 'cancer' types
cancer_legend_patches = [Rectangle((0,0),1,1, color=cancer_color_map[cancer]) for cancer in unique_cancers]
cancer_legend = plt.legend(cancer_legend_patches, unique_cancers, title="Cancer Types", bbox_to_anchor=(1.05, 0.5), loc='center left', fontsize='small')

# Add both legends to the plot
ax.add_artist(celltype_legend)
plt.tight_layout()

#plt.savefig('./outs/cell_fraction.pdf', dpi=100, transparent=True, bbox_inches='tight')
# Show the plot
plt.show()

In [None]:
##Figure 1E; Figure S1N
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from scipy.cluster.hierarchy import linkage, fcluster
from matplotlib import colormaps

# Assuming adata is loaded

# Extract relevant data
celltypes = adata.obs['celltype_C'].values
samples = adata.obs['cancer.id'].values
cancer_types = adata.obs['cancer'].values

# Map each sample to its corresponding cancer type
sample_to_cancer = dict(zip(samples, cancer_types))

# Calculate frequencies with all celltypes
df = pd.crosstab(celltypes, samples)
all_celltypes = adata.obs['celltype_C'].cat.categories
df = df.reindex(all_celltypes, fill_value=0)

# Convert counts to frequencies (proportions)
df_freq = df.divide(df.sum(axis=0), axis=1)

# Row Z-score normalization
df_zscore = (df_freq - df_freq.mean(axis=1).values.reshape(-1, 1)) / df_freq.std(axis=1).values.reshape(-1, 1)

# Select specific cancer types and cell types
selected_cancer_types = ['BC', 'KC', 'LC', 'CC', 'EC', 'TC','BDC','PC','SKCM']
selected_celltypes = ['CD14hi Mono','CD16hi Mono', 'Mφ', 'OC', 'CD4 Treg','CD8 Tex']

# Filter dataframe based on selected cancer types and cell types
df_zscore = df_zscore.loc[selected_celltypes]
samples_to_keep = [sample for sample in df_zscore.columns if sample_to_cancer[sample] in selected_cancer_types]
df_zscore = df_zscore[samples_to_keep]

# Assign a unique color to each cancer type
unique_cancers = list(set(selected_cancer_types))
cmap = colormaps['Paired'].with_extremes(under='blue', over='red')

cancer_to_color = dict(zip(unique_cancers, cmap.colors))

# Hierarchical clustering
link = linkage(df_zscore.T, method='average')

# Adjust the number of clusters
num_clusters = 4
cluster_assignments = fcluster(link, t=num_clusters, criterion='maxclust')

# Sort samples by cluster assignments
sorted_samples = [sample for _, sample in sorted(zip(cluster_assignments, df_zscore.columns))]

# Reorder dataframe according to sorted_samples
df_zscore = df_zscore[sorted_samples]
cancer_ordered_colors = [cancer_to_color[sample_to_cancer[sample]] for sample in sorted_samples]

# Adjust the figure size
num_unique_cells = df_zscore.shape[0]
figsize_width = max(20, len(samples_to_keep) * 0.8)
figsize_height = max(20, num_unique_cells * 0.8)
sns.set(font_scale=2.0)

# Variables to adjust the color scale range and annotation font size
color_vmin = -1
color_vmax = 1
annot_fontsize = 8  # Adjust this value as needed

# Adjust the figure size
num_unique_cells = df_zscore.shape[0]
figsize_width = 40  # Increase as needed
figsize_height =8  # Increase as needed

# Create the heatmap with formatted annotations
g = sns.clustermap(df_zscore, cmap='Spectral_r', col_cluster=True, row_cluster=True,
                   figsize=(figsize_width, figsize_height), vmin=color_vmin, vmax=color_vmax,
                   col_colors=cancer_ordered_colors, linewidths=0, linecolor='white', 
                   annot=True, fmt=".2f", annot_kws={"size": annot_fontsize})

# Set cluster lines size
cluster_linewidth = 1.5
for line in g.ax_col_dendrogram.lines:
    line.set_linewidth(cluster_linewidth)
for line in g.ax_row_dendrogram.lines:
    line.set_linewidth(cluster_linewidth)

# Create legend for cancer types
legend_patches = [mpatches.Patch(color=cancer_to_color[cancer], label=cancer) for cancer in unique_cancers]
plt.legend(handles=legend_patches, bbox_to_anchor=(1, 1), loc='upper right')

#plt.savefig("./patients_clustering_based_on_T_and_Mye_Z-scored.pdf", format="pdf", dpi=300)

plt.show()

In [None]:
##Figure S1K
# Import necessary libraries
import pandas as pd
import numpy as np
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import zscore
from scipy.cluster.hierarchy import linkage, fcluster
import matplotlib.patches as mpatches
import matplotlib.cm as cm

# Assuming adata is loaded

# Extract relevant data
celltypes = adata.obs['rpca_clusters'].values  # unbiased: integrated_snn_res.1.5 #celltype_C
samples = adata.obs['cancer.id'].values
cancer_types = adata.obs['cancer'].values
tissues = adata.obs['tissue.origin'].values  # Extract tissue information from adata.obs

# Allow exclusion of specific cancer types via user input
excluded_cancer_types = [
    #'NB',  # Replace or add cancer types to exclude
    #'cancer_type_to_exclude_2'
]
mask = ~np.isin(cancer_types, excluded_cancer_types)  # Filter out the excluded cancer types

# Filter samples, cancer types, tissues, and cell types based on the mask
samples = samples[mask]
cancer_types = cancer_types[mask]
celltypes = celltypes[mask]
tissues = tissues[mask]

# Map each sample to its cancer type and tissue after filtering
sample_to_cancer = dict(zip(samples, cancer_types))
sample_to_tissue = dict(zip(samples, tissues))

# Assign a unique color to each cancer type
unique_cancers = list(set(cancer_types))
cmap_cancer = cm.get_cmap('Paired', len(unique_cancers))  # Using Paired for cancer
cancer_to_color = dict(zip(unique_cancers, cmap_cancer(np.linspace(0, 1, len(unique_cancers)))))
cancer_to_color['NA'] = 'gray'  # Assign 'gray' to NA cancer types if there are any missing

# Assign a unique color to each tissue type using a different color theme
unique_tissues = list(set(tissues))
cmap_tissue = cm.get_cmap('Accent', len(unique_tissues))  # Using Set2 for tissue.origin
tissue_to_color = dict(zip(unique_tissues, cmap_tissue(np.linspace(0, 1, len(unique_tissues)))))
tissue_to_color['NA'] = 'gray'  # Assign 'gray' to NA tissue types if there are any missing

# Create a list of colors for each sample based on its cancer type and tissue type
cancer_ordered_colors = [cancer_to_color[sample_to_cancer[sample]] for sample in samples]
tissue_ordered_colors = [tissue_to_color[sample_to_tissue[sample]] for sample in samples]

# Calculate frequencies with all cell types
df = pd.crosstab(celltypes, samples)
all_celltypes = adata.obs['rpca_clusters'].cat.categories
df = df.reindex(all_celltypes, fill_value=0)

# Convert counts to frequencies (proportions)
df_freq = df.divide(df.sum(axis=0), axis=1)

# Row-wise Z-scoring (scaling after filtering)
df_zscored = df_freq.transpose().apply(zscore).transpose()

# Replace inf and -inf with NaN
df_zscored.replace([np.inf, -np.inf], np.nan, inplace=True)

# Fill NaN values with 0
df_zscored.fillna(0, inplace=True)

# Ensure all potential cell types are present in the dataframe. Fill missing values with zeros.
df_zscored = df_zscored.reindex(all_celltypes, fill_value=0)

# Hierarchical clustering and getting cluster assignments
row_linkage = linkage(df_zscored.transpose(), method="complete")
desired_clusters = 4
cluster_assignments = fcluster(row_linkage, desired_clusters, criterion='maxclust')
cluster_colors = sns.color_palette('Set3', n_colors=desired_clusters)
column_cluster_colors = [cluster_colors[i-1] for i in cluster_assignments]

# After clustering, update the 'samples' list to match the order of the clustered samples
samples = df_zscored.columns.tolist()

# Update the cancer_ordered_colors and tissue_ordered_colors lists based on the new order of samples
cancer_ordered_colors = [cancer_to_color[sample_to_cancer[sample]] for sample in samples]
tissue_ordered_colors = [tissue_to_color[sample_to_tissue[sample]] for sample in samples]

# Combine cancer colors, tissue colors, and cluster colors into a DataFrame
color_df = pd.DataFrame({
    'Cancer_Type': cancer_ordered_colors,
    'Tissue': tissue_ordered_colors,
    'Cluster': column_cluster_colors
}, index=samples)

# User-defined color scale range
vmin = -1.5  # Set minimum value of color scale
vmax = 1.5   # Set maximum value of color scale

# Plotting the heatmap using df_zscored
g = sns.clustermap(df_zscored, method='average', cmap='Spectral_r', center=0, col_cluster=True, row_cluster=True, 
                   figsize=(30, max(10, len(all_celltypes) * 0.5)), col_colors=color_df, linewidths=0, linecolor='white',
                   vmin=vmin, vmax=vmax)  # Color scale range can be customized here

# Create legend for cancer types and tissues
legend_patches_cancer = [mpatches.Patch(color=cancer_to_color[cancer], label=cancer) for cancer in unique_cancers]
legend_patches_tissue = [mpatches.Patch(color=tissue_to_color[tissue], label=tissue) for tissue in unique_tissues]

# Combine the legends
plt.legend(handles=legend_patches_cancer + legend_patches_tissue, bbox_to_anchor=(1, 1), loc='center left')

# Show or save the plot
#plt.savefig("./outs/patients_clustering_imm_for_circular_hirchical.pdf", format="pdf", dpi=300)
plt.show()


In [None]:
##Figure S1L
# row z score
# Import necessary libraries
import pandas as pd
import numpy as np
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import zscore
from scipy.cluster.hierarchy import linkage, fcluster
import matplotlib.patches as mpatches

# Assuming adata is loaded

# Extract relevant data
celltypes = adata.obs['celltype_C'].values  # unbiased: integrated_snn_res.1.5 #celltype_C
samples = adata.obs['cancer.id'].values
cancer_types = adata.obs['cancer'].values

# Exclude specific cancer types
excluded_cancer_types = [#'CC', 'TC', 'EC',
   # 'ctrl'
]
mask = ~np.isin(cancer_types, excluded_cancer_types)

# Filter samples and cancer types
samples = samples[mask]
cancer_types = cancer_types[mask]
celltypes = celltypes[mask]

# Map each sample to its cancer type after filtering
sample_to_cancer = dict(zip(samples, cancer_types))

# Assign a unique color to each cancer type
unique_cancers = list(set(cancer_types))
cmap = plt.cm.get_cmap('Paired', len(unique_cancers))
cancer_to_color = dict(zip(unique_cancers, cmap.colors))

# Create a list of colors for each sample based on its cancer type
cancer_ordered_colors = [cancer_to_color[sample_to_cancer[sample]] for sample in samples]

# Calculate frequencies with all cell types
df = pd.crosstab(celltypes, samples)
all_celltypes = adata.obs['celltype_C'].cat.categories
df = df.reindex(all_celltypes, fill_value=0)

# Convert counts to frequencies (proportions)
df_freq = df.divide(df.sum(axis=0), axis=1)

# Row-wise Z-scoring
df_zscored = df_freq.transpose().apply(zscore).transpose()

# Ensure all potential cell types are present in the dataframe. Fill missing values with zeros.
df_zscored = df_zscored.reindex(all_celltypes, fill_value=0)

# Hierarchical clustering and getting cluster assignments
row_linkage = linkage(df_zscored.transpose(), method="average")
desired_clusters = 4
cluster_assignments = fcluster(row_linkage, desired_clusters, criterion='maxclust')
cluster_colors = sns.color_palette('Set3', n_colors=desired_clusters)
column_cluster_colors = [cluster_colors[i-1] for i in cluster_assignments]

# After clustering, update the 'samples' list to match the order of the clustered samples
samples = df_zscored.columns.tolist()

# Update the cancer_ordered_colors list based on the new order of samples
cancer_ordered_colors = [cancer_to_color[sample_to_cancer[sample]] for sample in samples]

# Combine cancer colors and cluster colors into a DataFrame
color_df = pd.DataFrame({
    'Cancer_Type': cancer_ordered_colors,
    'Cluster': column_cluster_colors
}, index=samples)

# Define the custom color map (you can change this to any valid matplotlib colormap)
custom_cmap = 'Spectral_r'  # Change to any valid colormap like 'viridis', 'plasma', 'coolwarm', etc.

# Define the range for the color scale (you can adjust these values)
color_scale_min = -1.5  # Change to desired min value
color_scale_max = 1.5   # Change to desired max value

# Adjusting figure size and fontsize based on number of unique cell types
num_unique_cells = df_zscored.shape[0]
plt.figure(figsize=(4, max(2, num_unique_cells * 0.4)))
sns.set(font_scale=0.8)

# Plotting the heatmap using df_zscored
g = sns.clustermap(df_zscored, method='average', cmap=custom_cmap, center=0, col_cluster=True, row_cluster=True, 
                   figsize=(20, max(5, num_unique_cells * 0.3)), col_colors=color_df, linewidths=0, linecolor='white',
                   vmin=color_scale_min, vmax=color_scale_max)  # Set the color scale range

# Create legend for cancer types
legend_patches = [mpatches.Patch(color=cancer_to_color[cancer], label=cancer) for cancer in unique_cancers]
plt.legend(handles=legend_patches, bbox_to_anchor=(1, 1), loc='center left')
#plt.savefig("./outs/patients clustering_only_imm_(celltype_C)_ unbiased_Z-scored.pdf", format="pdf", dpi=300)

plt.show()

In [None]:
##Figure 1F
from scipy.stats import pearsonr
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Assuming the data has been loaded already:
# adata = sc.read_h5ad('./pan_Mets_2/data/integrated_till_0712/h5ad/adata_merged.h5ad') 

# 1. Specify cell types and cancer types at the beginning
#cell_type1_names = ["CD14hi Mono","CD16hi Mono", "Mφ", "OC"]
cell_type1_names = ["exhausting CD8 T","CD4 Treg", "CD8 Tex"]
cell_type2_names = ["CD14hi Mono","CD16hi Mono", "Mφ", "OC"]

cancer_types = ["LC","BC","KC","CC"]#"BC",

selected_samples = []

# Filtering based on selected samples
if selected_samples:
    adata = adata[adata.obs["cancer.id"].isin(selected_samples)]

# 2. Filter the data based on the specified cancer types
adata_filtered = adata[adata.obs["cancer"].isin(cancer_types)]

# 3. Calculate the combined cell type percentages for each patient
grouped = adata_filtered.obs.groupby("cancer.id")["celltype_C"]
cell_type1_percentage = grouped.apply(lambda x: np.sum(x.isin(cell_type1_names)) / len(x))
cell_type2_percentage = grouped.apply(lambda x: np.sum(x.isin(cell_type2_names)) / len(x))

# Create a DataFrame for plotting
df = pd.DataFrame({
    "cell_type1": cell_type1_percentage,
    "cell_type2": cell_type2_percentage
})

# 4. Plot the scatter plot with annotation and correlation line
plt.figure(figsize=(5, 5))
sns.set_style("white")
sns.regplot(x="cell_type1", y="cell_type2", data=df, scatter_kws={'s': 50, 'alpha': 0.5}, line_kws={"color": "green"})

for i, txt in enumerate(df.index):
    plt.annotate(txt, (df["cell_type1"].iloc[i], df["cell_type2"].iloc[i]), fontsize=8, alpha=0.7)

plt.xlabel(f"Percentage of {' & '.join(cell_type1_names)}")
plt.ylabel(f"Percentage of {' & '.join(cell_type2_names)}")
plt.title(f"Correlation in {' & '.join(cancer_types)}")

# 5. Calculate and display the Pearson correlation coefficient and p-value
corr_coeff, p_value = pearsonr(df["cell_type1"], df["cell_type2"])
plt.text(0.95, 0.05, f"Pearson Correlation: {corr_coeff:.2f}\nP-value: {p_value:.2e}",
         transform=plt.gca().transAxes, fontsize=12, verticalalignment='bottom', horizontalalignment='right')

sns.despine()
#plt.savefig("./corelation_mye-t_all_cancer.pdf", format="pdf", dpi=300)

plt.show()

In [None]:
##Figure S1M
from scipy.stats import pearsonr
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Assuming the data has been loaded already:
# adata = sc.read_h5ad('./pan_Mets_2/data/integrated_till_0712/h5ad/adata_merged.h5ad') 

# 1. Specify cell types and cancer types at the beginning
cell_type1_names = ["CD14hi Mono", 
                    "CD16hi Mono"
                   ]


cell_type2_names = ["Mφ", "OC"
                   ]

#cell_type2_names = ["CD4 Treg", "CD8 Tex"
#                    , "exhausting CD8 T"
#                   ]
cancer_types = ["LC", "BC", "KC"]  # List of cancer types

selected_samples = []

# Filtering based on selected samples
if selected_samples:
    adata = adata[adata.obs["cancer.id"].isin(selected_samples)]

# 2. Filter the data based on the specified cancer types
adata_filtered = adata[adata.obs["cancer"].isin(cancer_types)]

# **Remove samples where `cancer.id` is "BC12"**
adata_filtered = adata_filtered[adata_filtered.obs["cancer.id"] != ""]

# Create a figure for the subplots
plt.figure(figsize=(15, 5))  # Set the figure size for 3 subplots

# Loop through each cancer type and create a scatter plot
for idx, cancer_type in enumerate(cancer_types):
    
    # Filter for the specific cancer type
    adata_cancer_type = adata_filtered[adata_filtered.obs["cancer"] == cancer_type]

    # 3. Calculate the combined cell type percentages for each patient
    grouped = adata_cancer_type.obs.groupby("cancer.id")["celltype_C"]
    cell_type1_percentage = grouped.apply(lambda x: np.sum(x.isin(cell_type1_names)) / len(x))
    cell_type2_percentage = grouped.apply(lambda x: np.sum(x.isin(cell_type2_names)) / len(x))

    # Create a DataFrame for plotting
    df = pd.DataFrame({
        "cell_type1": cell_type1_percentage,
        "cell_type2": cell_type2_percentage
    })

    # 4. Plot the scatter plot with annotation and correlation line
    plt.subplot(1, 3, idx + 1)  # Create a 1x3 grid of subplots
    sns.set_style("white")
    sns.regplot(x="cell_type1", y="cell_type2", data=df, scatter_kws={'s': 50, 'alpha': 0.5}, line_kws={"color": "blue"})

    for i, txt in enumerate(df.index):
        plt.annotate(txt, (df["cell_type1"].iloc[i], df["cell_type2"].iloc[i]), fontsize=8, alpha=0.7)

    plt.xlabel(f"Percentage of {' & '.join(cell_type1_names)}")
    plt.ylabel(f"Percentage of {' & '.join(cell_type2_names)}")
    plt.title(f"{cancer_type} Correlation")

    # 5. Calculate and display the Pearson correlation coefficient and p-value
    if len(df) > 1:  # Ensure there are enough points to calculate correlation
        corr_coeff, p_value = pearsonr(df["cell_type1"], df["cell_type2"])
        plt.text(0.95, 0.05, f"Pearson Correlation: {corr_coeff:.2f}\nP-value: {p_value:.2e}",
                 transform=plt.gca().transAxes, fontsize=10, verticalalignment='bottom', horizontalalignment='right')

# Adjust spacing between subplots
plt.tight_layout()

# Show or save the plot
#plt.savefig("./correlation_split_by_cancer_type_no_BC12.pdf", format="pdf", dpi=300)
plt.show()


In [None]:
##prepare Figure S1G
# calculate score: option and prapare the violin plot
#pre_plasma
gene_list=['MDM4', 'ELK3', 'HLA-DPA1', 'ERG', 'SH3TC1', 'NPY', 'LINC00114', 'SEMA6A', 'PPFIBP1', 'PSD3', 'BLNK', 'F2RL3', 'SSBP2', 'TCF4', 'RPS4Y2', 'UBE2J1', 'GAB1', 'SMIM3', 'EBF1', 'LCN6', 'RCSD1', 'EGFL7', 'P2RY14', 'MAGED1', 'ZNF22-AS1', 'MYB', 'H2AC6', 'IRF1-AS1', 'ESD', 'GPSM1', 'GBP4', 'SMAD1', 'BCL11A', 'LAT2', 'MSI2', 'TMEM243', 'MPPED2', 'TMEM217', 'BTBD3', 'CDCA7L', 'BLACE', 'GABPB1', 'SIAH2', 'MTF2', 'LHPP', 'TP53INP1', 'P4HA2', 'SCHIP1', 'HHIP', 'SNX2', 'SOCS2', 'CCDC81', 'CLEC14A', 'MYO5C', 'ITM2C', 'VPREB1', 'TIFA', 'RIMKLB', 'LRFN2', 'MDK', 'DEPP1', 'H1-2', 'LINC01507', 'ATP1B3', 'ZNF22', 'IGLL1', 'LINC01374', 'PTPRE', 'CMTM7', 'TKT', 'CD79A', 'ABHD17B', 'DNTT', 'RAG1', 'LY6H', 'SLC44A1', 'CD74', 'CD24', 'BAHCC1', 'STMN1', 'MPP1', 'MLXIP', 'TSPAN14', 'SPTBN1', 'NAV1', 'NFIA-AS2', 'PXDN', 'TMEM263', 'SOD2', 'XXYLT1-AS2', 'ARPP21', 'HLA-DMA', 'VDAC1', 'LDLRAD4', 'HLA-DRA', 'TOP2B', 'NEIL1', 'CD9', 'PRCD', 'CAV1', 'DUSP26', 'AHDC1', 'RAG2', 'VPREB3', 'FAM241A', 'HLA-DPB1', 'CMTM8', 'CD99', 'DOK4', 'JUP', 'LINC00305', 'ADGRF1', 'MYL6B', 'CD79B', 'SPANXB1', 'TCF3', 'COL5A1', 'POU2AF1', 'XPNPEP2', 'SPANXC', 'GPR176', 'GABPB1-AS1', 'ZCCHC7', 'POLA2', 'TLE4', 'GLRX', 'STARD4-AS1', 'CKMT2', 'HPS4', 'VASN', 'SYNE3', 'SH2D4B', 'LINC01013', 'ADA', 'FXYD2', 'DDAH2', 'LAPTM5', 'QRSL1', 'ERGIC1', 'MT1X', 'GH1', 'FAM30A', 'YBX3', 'PPP1R14B', 'CYGB', 'HMHB1', 'MYLK', 'AKAP12', 'GNG7', 'LINC00544', 'PHACTR1', 'FHIT', 'MZB1', 'CD34', 'ALPG', 'MEF2C', 'SLC16A2', 'PPP2R2C', 'CD38', 'CALHM6', 'SLC8A1-AS1', 'SOX4', 'XBP1', 'GNG11', 'DBN1', 'SARDH', 'HHIP-AS1', 'COBL', 'MAP1LC3B', 'WASHC4', 'LINC00426', 'NKX6-3', 'MDM2', 'MME', 'NPPC', 'RNF152', 'AQP5']
sc.tl.score_genes(adata, gene_list, score_name='pre_plasma') 
#pro_B
gene_list=['PRDX1', 'LRR1', 'CCDC34', 'ERG', 'PAX5', 'NPY', 'RACGAP1', 'BLNK', 'CENPU', 'SSBP2', 'TCF4', 'SYNGR1', 'KIF11', 'HAUS1', 'EBF1', 'LCN6', 'SMIM3', 'NUSAP1', 'EGFL7', 'PAG1', 'H4C3', 'KIFC1', 'CDCA5', 'VSIG10', 'MYB', 'CBX5', 'GGH', 'SPC24', 'CENPM', 'BCL11A', 'TCTEX1D2', 'MKI67', 'LAT2', 'MSI2', 'TSPAN7', 'SAC3D1', 'CDCA7L', 'GALNT14', 'SOCS2', 'TUBA1B', 'IGFBP7', 'E2F1', 'CENPW', 'CCDC81', 'PCLAF', 'CCNA2', 'CARHSP1', 'CLSPN', 'MYO5C', 'CLEC14A', 'MAD2L2', 'ITM2C', 'VPREB1', 'NRM', 'MXD3', 'MDK', 'DHFR', 'PAQR4', 'HELLS', 'CDKN3', 'UHRF1', 'CCN2', 'IGLL1', 'PARP1', 'H2AX', 'CMTM7', 'BIRC5', 'ZWINT', 'CENPF', 'DNTT', 'ESCO2', 'CDCA7', 'E2F2', 'DNAJC9', 'CD24', 'SGO1', 'CKS1B', 'PAICS', 'BAHCC1', 'SAE1', 'STMN1', 'GINS2', 'FBXO5', 'PKMYT1', 'UBE2T', 'MPP1', 'HMGA1', 'GMNN', 'HDAC2', 'NFIA-AS2', 'PXDN', 'RNASEH2B', 'NASP', 'TMEM263', 'AURKB', 'TOP2A', 'SKA3', 'ARPP21', 'CRMP1', 'TOP2B', 'CDK1', 'CD9', 'ASF1B', 'PXMP2', 'RAG2', 'KIF15', 'VPREB3', 'CMTM8', 'CHEK1', 'JUP', 'BAALC', 'NKD2', 'MYL6B', 'CD79B', 'PHGDH', 'UGT3A2', 'MCM7', 'COL5A1', 'CDT1', 'CDC45', 'CEP55', 'POU2AF1', 'PTTG1', 'GPR176', 'SMC2', 'ZCCHC7', 'FEN1', 'CENPH', 'MCM3', 'TMEM106C', 'PCNA', 'SMARCA4', 'RAD51AP1', 'UBE2C', 'LMNB1', 'SCCPDH', 'FABP5', 'C2orf48', 'CCNB2', 'USP1', 'HPS4', 'LINC01013', 'ADA', 'TPX2', 'NUF2', 'EZH2', 'NCAPG', 'RRM1', 'QRSL1', 'RRM2', 'ERGIC1', 'CDK6', 'PIMREG', 'TUBB', 'IDH2', 'TYMS', 'RMI2', 'CRNDE', 'FAM30A', 'FHL1', 'SSRP1', 'MACROH2A1', 'UBASH3B', 'ZNF704', 'CYGB', 'HMHB1', 'MYLK', 'AKAP12', 'GNG7', 'RAD51', 'MCM4', 'SMC4', 'TK1', 'MZB1', 'CENPN', 'GTSE1', 'CD34', 'MND1', 'PPP2R2C', 'CKS2', 'MEG9', 'FAM111B', 'SLC8A1-AS1', 'SOX4', 'CTNNAL1', 'SPC25', 'HADH', 'TCF19', 'LINC00426', 'MAD2L1', 'MME', 'STK32B', 'HMGB3', 'KAZALD1', 'RNF152', 'AQP5']
sc.tl.score_genes(adata, gene_list, score_name='pro_B') 
#immature_B
gene_list=['MDM4', 'BAZ2A', 'RAPGEF5', 'RASAL1', 'FCRLA', 'LINC00472', 'RUBCNL', 'PAX5', 'HS3ST1', 'DDX54', 'SSBP2', 'PLD4', 'TCF4', 'EBF1', 'SAG', 'TTC21A', 'RCSD1', 'CFAP251', 'MYB', 'FOXD3-AS1', 'H2AC6', 'GUCY2C', 'SMAD1', 'RIMS3', 'DNMT3B', 'SBSPON', 'HCK', 'MSI2', 'TMEM243', 'MPPED2', 'BTBD3', 'IGLC3', 'CDHR3', 'NCF1', 'TRABD', 'LHPP', 'IGLC2', 'P4HA2', 'IGLC6', 'MYO1C', 'HHIP', 'FAM3B', 'AEBP1', 'STX7', 'ITM2C', 'VPREB1', 'TIFA', 'CDC40', 'TAX1BP3', 'H1-2', 'MYBPC2', 'CDC25B', 'ORAI2', 'H2BC21', 'IGLL1', 'P2RX5', 'LINC01374', 'POU4F1', 'CCDC112', 'NSMCE1', 'IGKC', 'TM7SF2', 'CMTM7', 'EMP2', 'C7orf50', 'CD79A', 'H2BC8', 'KREMEN2', 'KLHL14', 'BEST3', 'RAG1', 'CD74', 'PCDH9', 'CD24', 'BMP3', 'SYVN1', 'STMN1', 'ENTPD8', 'LIMD2', 'IGF2', 'CD19', 'MLXIP', 'TCL1A', 'SPTBN1', 'RGS16', 'ARPP21', 'HLA-DQB1', 'LDLRAD4', 'HLA-DMA', 'ATP8A1', 'H3-3B', 'HLA-DRA', 'NEIL1', 'CD9', 'KIF12', 'RAG2', 'TFDP2', 'VPREB3', 'NTS', 'HLA-DPB1', 'HRK', 'CMTM8', 'CRYM', 'REXO2', 'CD37', 'CD79B', 'BACH2', 'TCOF1', 'CD72', 'CCDC191', 'TCF3', 'CD81', 'WASF1', 'POU2AF1', 'IGLL5', 'CECR2', 'PLEKHA2', 'ZCCHC7', 'LAMP5', 'CCDC69', 'SYK', 'IGLC5', 'ZEB2', 'MPZL1', 'STARD4-AS1', 'MIR181A1HG', 'SPINT2', 'NOXA1', 'NFATC4', 'RGS2', 'LAMA5', 'TLE1', 'CXXC5', 'TOX2', 'QRSL1', 'IGHM', 'PPP1R14A', 'STIM2', 'ACSM3', 'H2BC5', 'SLC9A7', 'SNHG7', 'TMEM38A', 'CSNK1E', 'IRF4', 'C3orf52', 'HIP1', 'NKAIN4', 'YBX3', 'AFF3', 'ROR1', 'AKAP12', 'SHOX2', 'GNG7', 'TCL1B', 'BCL7A', 'PHACTR1', 'KANK2', 'MZB1', 'AMBP', 'DNASE1L3', 'CEMIP', 'ABHD15', 'HLA-DMB', 'ADAM23', 'BTK', 'PTMA', 'CD38', 'CFAP73', 'DTX1', 'SOX4', 'STRBP', 'RNGTT', 'SERHL2', 'HHIP-AS1', 'C16orf74', 'NIBAN3', 'APBB2', 'SPIB', 'MME', 'NPPC', 'MCTP2']
sc.tl.score_genes(adata, gene_list, score_name='immature_B') 
#B_mem
gene_list=['CPNE5', 'ADAM19', 'RALGPS2', 'SP140', 'AIM2', 'FCRL5', 'BIRC3', 'FCRLA', 'HLA-DPA1', 'RUBCNL', 'PAX5', 'EEF2', 'ARHGAP24', 'CRACR2B', 'BLNK', 'EBI3', 'TCF4', 'NXPH4', 'HLA-DRB1', 'FCRL1', 'CCDC50', 'SYNGR2', 'EBF1', 'NFKBID', 'HERPUD1', 'FCRL2', 'TRAF5', 'JCHAIN', 'TMEM156', 'COL4A4', 'EEF1B2', 'COTL1', 'IGHA1', 'SYNPO', 'SNX3', 'OSBPL10', 'IFT57', 'BCL11A', 'ARID5B', 'IRF8', 'TAGLN2', 'CD22', 'CD1C', 'GAPT', 'IGLC3', 'NCF1', 'IGHA2', 'WDFY4', 'IGLC2', 'AP1S3', 'SNX2', 'PLP2', 'VOPP1', 'ADAM28', 'PDLIM1', 'GPR183', 'ALOX5', 'IGHG3', 'COBLL1', 'BTNL9', 'PKHD1L1', 'AK8', 'PMAIP1', 'ORAI2', 'HVCN1', 'P2RX5', 'SEMA4B', 'LSP1', 'PRDM2', 'PKIG', 'IGKC', 'BAIAP3', 'PTPN6', 'CAPG', 'CD79A', 'RNASET2', 'TMEM273', 'PTCHD1', 'MTARC2', 'IGHE', 'SNX22', 'CD74', 'HLA-DOB', 'CD24', 'CD52', 'BANK1', 'CXCR5', 'SEL1L3', 'SMARCB1', 'CLECL1', 'CD19', 'CD70', 'SDR16C5', 'TMEM154', 'CD83', 'HLA-DMA', 'HLA-DQB1', 'HLA-DRB5', 'LTB', 'CTSH', 'HLA-DRA', 'FGD2', 'FCMR', 'LY86', 'TBC1D9', 'VPREB3', 'HLA-DPB1', 'CYB561A3', 'DRAM2', 'OSTN-AS1', 'RIC3', 'TLR10', 'ARPC3', 'CD37', 'FCER2', 'RPL13A', 'CD79B', 'MS4A1', 'TCOF1', 'SWAP70', 'BACE2', 'METTL8', 'POU2AF1', 'MARCHF1', 'BHLHE41', 'OSBPL10-AS1', 'TPD52', 'RASGRP3', 'RRAS2', 'SCIMP', 'EVI2B', 'POU2F2', 'PNOC', 'RASSF6', 'EZR', 'SYPL1', 'TEX9', 'GPM6A', 'LINC00494', 'RHBDF2', 'MARCKS', 'BLK', 'CD82', 'FXYD1', 'SETBP1', 'PLEKHG7', 'DDAH2', 'CXXC5', 'LAPTM5', 'COL4A3', 'HLA-DQA1', 'QRSL1', 'IGHG2', 'IGHM', 'ACP5', 'PPP1R14A', 'CD40', 'ARHGAP25', 'CNFN', 'SMIM14', 'TNFRSF13C', 'FAM30A', 'ISG20', 'CHCHD10', 'RIPOR2', 'GNG7', 'LINC00926', 'FCGR2B', 'EML6', 'TFEB', 'MEF2C', 'HLA-DMB', 'ZNF860', 'DOK7', 'COCH', 'ZBTB32', 'LYPLAL1', 'CNR2', 'PRKCB', 'KLK1', 'CD180', 'TNFRSF13B', 'TSC22D3', 'SSPN', 'NIBAN3', 'DNAJC5B', 'IGHG1', 'SPIB', 'ISCU', 'BCAS4', 'CYSLTR1']
sc.tl.score_genes(adata, gene_list, score_name='B_mem') 
#B_naive
gene_list=['ADAM19', 'STAP1', 'PDE4B', 'RALGPS2', 'C12orf42', 'FCRL5', 'BIRC3', 'FCRLA', 'ICOSLG', 'HLA-DPA1', 'RPS17', 'RUBCNL', 'PAX5', 'RPL19', 'ARHGAP24', 'BLNK', 'SYT17', 'P2RY10', 'MICAL3', 'HLA-DRB1', 'FCRL1', 'MACROD2', 'RPL21', 'EBF1', 'NFKBID', 'HLA-DQA2', 'RCSD1', 'HERPUD1', 'FCRL2', 'TRAF5', 'RPL10A', 'JCHAIN', 'P2RY14', 'TMEM156', 'BTG1', 'CD200', 'TCTN1', 'RPL13', 'IFT57', 'BCL11A', 'LCN8', 'IRF8', 'RPL18A', 'GGA2', 'CD22', 'RPSA', 'FCRL3', 'GAPT', 'IGLC3', 'RPL12', 'CDCA7L', 'NCF1', 'STAG3', 'IGLC2', 'HHEX', 'SNX2', 'STX7', 'ADAM28', 'PDLIM1', 'COBLL1', 'ORAI2', 'HVCN1', 'REL', 'BTLA', 'P2RX5', 'PRDM2', 'PKIG', 'IGKC', 'PLEKHG1', 'NOP53', 'ABCB4', 'CD79A', 'RPL11', 'SNX22', 'RPS27', 'CD74', 'SPON1', 'PCDH9', 'HLA-DOB', 'CD24', 'FAU', 'RPL23A', 'RPS27A', 'RPL18', 'RPL29', 'CD52', 'BANK1', 'CXCR5', 'SLC38A11', 'RPL30', 'SEL1L3', 'CD19', 'TCL1A', 'MTSS1', 'TMEM154', 'CD83', 'EAF2', 'RPL10', 'HLA-DMA', 'HLA-DQB1', 'HLA-DRB5', 'LTB', 'FGD2', 'HLA-DRA', 'NEIL1', 'FCMR', 'LY86', 'VPREB3', 'HLA-DPB1', 'CYB561A3', 'DRAM2', 'TLR10', 'CD37', 'FCER2', 'IL4R', 'UGT8', 'RPL8', 'RPL13A', 'CD79B', 'MS4A1', 'TCOF1', 'CD72', 'SWAP70', 'BACH2', 'POU2AF1', 'MARCHF1', 'IGLL5', 'OSBPL10-AS1', 'RPS19', 'GABPB1-AS1', 'HLA-DOA', 'FAM177B', 'ZCCHC7', 'TPD52', 'TSPAN13', 'RASGRP3', 'RRAS2', 'RPS8', 'POU2F2', 'PNOC', 'ZDHHC19', 'EZR', 'RHBDF2', 'BLK', 'SH3BP5', 'COL19A1', 'RPLP2', 'LARGE2', 'RPS23', 'LAPTM5', 'RPL15', 'HLA-DQA1', 'TAGAP', 'SERPINB9P1', 'IGHM', 'LINC01480', 'CD40', 'CNFN', 'SMIM14', 'TNFRSF13C', 'FAM30A', 'ISG20', 'CHPT1', 'LINC01215', 'AFF3', 'PEG10', 'GNG7', 'KCNG1', 'RAB30', 'LINC00926', 'KMO', 'FCGR2B', 'PHACTR1', 'BCL7A', 'RPS11', 'CXCR4', 'MEF2C', 'HLA-DMB', 'SNX29', 'CALHM6', 'STRBP', 'CNR2', 'RPS5', 'CD180', 'TSC22D3', 'C16orf74', 'IGHD', 'NIBAN3', 'PLPP5', 'LARGE1', 'SPIB', 'CNTNAP2']
sc.tl.score_genes(adata, gene_list, score_name='B_naive') 
#plasma
gene_list=['CPNE5', 'CLDN3', 'CCPG1', 'CRELD2', 'FCRL5', 'CCR10', 'FBXO16', 'C2orf88', 'CRACR2B', 'TTLL7', 'GPR160', 'LINC00309', 'CHAC1', 'ARSA', 'UBE2J1', 'DNAJC1', 'HM13', 'MANEA', 'DNAJB9', 'HERPUD1', 'ARX', 'JCHAIN', 'TMEM156', 'NXPE4', 'CADPS2', 'IGHA1', 'LMAN2', 'ASS1', 'MANF', 'MAN1A1', 'RAMP2', 'SELENOS', 'SDF2L1', 'IGHGP', 'IGKV3-20', 'BIK', 'IGHV1-24', 'CST6', 'IGLC3', 'TMEM52', 'IGHA2', 'TP53INP1', 'IGLC2', 'TNFRSF17', 'TUBB2B', 'ST6GALNAC4', 'SCNN1B', 'SPCS3', 'SRPRB', 'BRSK1', 'AMPD1', 'ITM2C', 'IGHG3', 'COBLL1', 'MDK', 'PKHD1L1', 'ELL2', 'TPM4', 'LMTK3', 'DUSP5', 'IGLV3-1', 'ABCB9', 'IGKV1D-39', 'IGLC7', 'ALDH1L2', 'JSRP1', 'MIXL1', 'IGHG4', 'SSR4', 'WNT10A', 'IGKC', 'SLAMF7', 'IGKV1-39', 'CLIC4', 'MYDGF', 'SLC1A4', 'MOXD1', 'LMNA', 'LINC00582', 'KCNMA1', 'KDELR3', 'ZBP1', 'SEL1L3', 'PDK1', 'ATF5', 'SPAG4', 'LYPD6B', 'DERL3', 'IGHV3-30', 'EAF2', 'IGKV3-15', 'PSAT1', 'SEC11C', 'MYL2', 'PDIA6', 'CD9', 'ERLEC1', 'CAV1', 'CD59', 'PLA2G2D', 'KCNN3', 'ITGA8', 'IGLV6-57', 'CADM1', 'HSP90B1', 'DPEP1', 'CLDN14', 'CTHRC1', 'SEPTIN10', 'TXNDC15', 'SPATS2', 'LMAN1', 'TMEM45A', 'POU2AF1', 'BHLHE41', 'KDELR2', 'GAS6', 'IGLL5', 'SMOC1', 'TMED9', 'CHPF', 'TPD52', 'RASGRP3', 'SCAMP5', 'WFS1', 'IGKV4-1', 'GPRC5D', 'FRGCA', 'PARM1', 'IGKV1-5', 'TXNDC11', 'SEL1L', 'CIBAR2', 'TXNDC5', 'SSR3', 'IGF1', 'IGHG2', 'DNAAF1', 'B9D1', 'PIK3CD-AS2', 'PRSS16', 'HID1', 'PRDX4', 'MEI1', 'SLC17A9', 'KDELR1', 'GMPPB', 'TGFBR3L', 'RRBP1', 'GNG7', 'ANKRD28', 'FKBP11', 'RAB30', 'LMF1', 'SELENOM', 'PERP', 'SIL1', 'MZB1', 'SDC1', 'CLPTM1L', 'HYOU1', 'QPRT', 'IL5RA', 'PRDM1', 'SPCS2', 'FKBP2', 'UGT2B17', 'CD38', 'CD27', 'PAIP2B', 'XBP1', 'IGKV1-12', 'PELI1', 'IGLV5-48', 'TNFRSF13B', 'TAS1R3', 'PLPP5', 'TRIB1', 'IGHG1', 'HDLBP', 'TENT5C', 'FNDC3B', 'PDIA4']
sc.tl.score_genes(adata, gene_list, score_name='plasma') 
#pre_B
gene_list=['CCDC34', 'DTYMK', 'RUBCNL', 'PAX5', 'GCSAM', 'RACGAP1', 'PLK1', 'VAT1L', 'CENPU', 'SSBP2', 'PLD4', 'TCF4', 'CDKN2C', 'KIF11', 'CDC6', 'EBF1', 'NUSAP1', 'H4C3', 'KIFC1', 'CDCA5', 'CENPA', 'RFC5', 'MYBL2', 'GGH', 'SPC24', 'CENPM', 'MKI67', 'LAT2', 'MSI2', 'BIK', 'SLC7A3', 'SAC3D1', 'CDCA7L', 'GALNT14', 'TMSB15A', 'ORC6', 'P4HA2', 'TUBA1B', 'CCDC81', 'E2F1', 'CENPW', 'PCLAF', 'CCNA2', 'CARHSP1', 'CLSPN', 'AEBP1', 'PDLIM1', 'MAD2L2', 'NDC80', 'CDCA3', 'VPREB1', 'TIFA', 'NRM', 'MXD3', 'PAQR4', 'DHFR', 'HELLS', 'CDKN3', 'UHRF1', 'IGLL1', 'CDKN2A', 'PARP1', 'H2AX', 'BIRC5', 'ZWINT', 'CENPF', 'ESCO2', 'CDCA7', 'E2F2', 'DNAJC9', 'BEST3', 'KNL1', 'NREP', 'PCDH9', 'CD24', 'CDK2', 'SGO1', 'CKS1B', 'SYVN1', 'TROAP', 'BAHCC1', 'SAE1', 'STMN1', 'GINS2', 'FBXO5', 'UBE2T', 'GMNN', 'CD19', 'TCL1A', 'NASP', 'AURKB', 'EAF2', 'TOP2A', 'SKA3', 'ARPP21', 'LDLRAD4', 'VDAC1', 'N4BP3', 'ASF1B', 'CDK1', 'CD9', 'PXMP2', 'TFDP2', 'C21orf58', 'KIF15', 'VPREB3', 'RAG2', 'CMTM8', 'CHEK1', 'MYL6B', 'CD79B', 'PHGDH', 'CD72', 'MCM7', 'CD81', 'TCF3', 'CDT1', 'ASPM', 'WASF1', 'POU2AF1', 'CDC45', 'CEP55', 'PTTG1', 'CDCA8', 'CHAF1A', 'SMC2', 'FEN1', 'HMMR', 'CENPH', 'MCM3', 'PCNA', 'CCNE2', 'SMARCA4', 'RAD51AP1', 'UBE2C', 'IGLC5', 'LMNB1', 'FABP5', 'C2orf48', 'H3C8', 'CCNB2', 'USP1', 'MIR181A1HG', 'TPX2', 'NUF2', 'H1-5', 'EZH2', 'NCAPG', 'RRM1', 'QRSL1', 'IGHM', 'RRM2', 'CDC20', 'FAIM', 'PIMREG', 'TUBB', 'IDH2', 'IRF4', 'DLEU2', 'TYMS', 'CRNDE', 'RMI2', 'TMPO', 'AFF3', 'MYO3A', 'AKAP12', 'CAPSL', 'RAD51', 'MCM4', 'SMC4', 'RPL39L', 'BCL7A', 'TK1', 'MZB1', 'CENPN', 'GTSE1', 'ACY3', 'MND1', 'ABHD15', 'GLDC', 'CKS2', 'CD38', 'CFAP73', 'FAM111B', 'SOX4', 'STRBP', 'CKAP2L', 'DBN1', 'SERHL2', 'SPC25', 'EIF4A3', 'HADH', 'TCF19', 'NIBAN3', 'SHCBP1', 'APBB2', 'SPIB', 'MAD2L1', 'MME', 'HMGB3']
sc.tl.score_genes(adata, gene_list, score_name='pre_B') 
#erythropoietic_Basophilic_Erythroblast
gene_list=['TST', 'CNRIP1', 'PSMG1', 'CCDC34', 'FKBP4', 'PPT2', 'FAM178B', 'LCA5', 'DHRS11', 'DES', 'CENPU', 'SYNGR1', 'HACD1', 'NECTIN1', 'CDC6', 'SPTA1', 'EGFL7', 'CCNB1', 'CDCA5', 'A4GALT', 'MYB', 'STEAP3', 'AMT', 'ENG', 'GAL', 'HES6', 'DYNC2I2', 'CYTL1', 'PCCB', 'ALDH1A1', 'FXN', 'ISOC2', 'TUBG1', 'HLTF', 'MYH10', 'HEBP1', 'FECH', 'MYL4', 'PRKAR2B', 'ORC6', 'LDB1', 'CCDC26', 'GATA1', 'GPT', 'AKR1C3', 'CENPW', 'PCLAF', 'CCNA2', 'CLSPN', 'FADS2', 'TRAP1', 'RAB13', 'ECI2', 'CDC42BPA', 'ANKLE1', 'UBXN10', 'CDK4', 'TFR2', 'DHFR', 'MCM2', 'NMU', 'FAM124B', 'NET1', 'WDR12', 'SIGMAR1', 'ADD2', 'BIRC5', 'AKR1C1', 'ZWINT', 'MFSD2B', 'XK', 'CENPF', 'CDH1', 'PPIL1', 'LTBP1', 'TAL1', 'ICA1', 'RHAG', 'PAICS', 'LRRCC1', 'GINS2', 'UBE2T', 'PKLR', 'GMNN', 'MBOAT2', 'RFESD', 'TRIP6', 'MNS1', 'MCM10', 'LPCAT3', 'LXN', 'PIR', 'HMGN5', 'TPM1', 'FHL2', 'NECAB1', 'KCNH2', 'CCNE1', 'EPOR', 'NOP16', 'B3GALNT1', 'GAD1', 'KEL', 'PXMP2', 'HPDL', 'GCSH', 'GFI1B', 'CHEK1', 'SLC27A2', 'ARV1', 'HMBS', 'TTLL12', 'CSF1', 'GMPR', 'PGAP4', 'CDT1', 'PLTP', 'AHSP', 'ATAD3A', 'RHOBTB3', 'CMBL', 'CASP3', 'HSD3B7', 'SMC2', 'EEF1AKNMT', 'FEN1', 'MCM3', 'FSCN1', 'CA2', 'SLC38A5', 'HBD', 'SORD', 'CXADR', 'CCNB2', 'HBQ1', 'ICAM4', 'MTHFD1', 'ELOVL6', 'TPX2', 'SLC48A1', 'ANK1', 'DLGAP5', 'CDC20', 'PYCR1', 'APOE', 'TLCD4', 'SLC29A1', 'PARVB', 'TGM2', 'POLE2', 'PRC1', 'SLC40A1', 'TYMS', 'SMIM10', 'STRADB', 'KEAP1', 'SERPINE2', 'APOC1', 'EPCAM', 'MEST', 'SLC25A21', 'PVT1', 'PLA2G12A', 'KLF1', 'FAM89A', 'GBGT1', 'MACROD1', 'CASP6', 'LINC01133', 'CAVIN1', 'SLC16A9', 'MINPP1', 'MCM4', 'DNAJA4', 'SMIM1', 'UBAC1', 'SELENBP1', 'TMEM97', 'MND1', 'GRTP1', 'NMNAT3', 'TCEAL9', 'DLEU1', 'CKS2', 'NFIA', 'UNG', 'CA1', 'MAP7', 'CTNNAL1', 'PMP22', 'SNCA', 'SLC39A8', 'GCAT', 'HADH', 'TMOD1', 'EPDR1', 'PNMT', 'SLC43A3']
sc.tl.score_genes(adata, gene_list, score_name='erythropoietic_Basophilic_Erythroblast') 
#erythropoietic_CD34_Pro_erythroblast
gene_list=['PRSS57', 'CPB1', 'HTR1F', 'CNRIP1', 'MPC2', 'PSMG1', 'STXBP6', 'GGCT', 'FKBP4', 'TPSB2', 'ST6GAL2', 'C2orf88', 'NMI', 'DHRS11', 'DNPH1', 'ABO', 'CENPU', 'EMID1', 'ZNF385D', 'HACD1', 'HDAC7', 'EGFL7', 'MYB', 'MPST', 'TRIB2', 'IGSF10', 'HSPG2', 'LAPTM4B', 'PSTPIP2', 'CYTL1', 'ALDH1A1', 'FXN', 'CREG1', 'ISOC2', 'EHD2', 'TM7SF3', 'MGST2', 'IL1B', 'ATIC', 'NPM3', 'PRKAR2B', 'NPR3', 'CCDC42', 'GATA1', 'IGFBP7', 'EBPL', 'PCLAF', 'PDLIM1', 'RAB13', 'CLEC11A', 'CDC42BPA', 'PLOD2', 'ABCC4', 'CDK4', 'TFR2', 'RNF24', 'ARL2', 'FAM124B', 'NET1', 'BEX3', 'PCAT18', 'PBX1', 'GATA2', 'PKIG', 'MYCT1', 'SVOPL', 'CDCA7', 'DEPTOR', 'ISYNA1', 'TAL1', 'NREP', 'MPIG6B', 'HNRNPAB', 'PAICS', 'STMN1', 'TPSAB1', 'P2RX1', 'MPP1', 'HMGA1', 'DNAJC12', 'HDAC2', 'LXN', 'TPM1', 'HMGN5', 'FHL2', 'NECAB1', 'AMHR2', 'CPXM1', 'UROD', 'MEX3B', 'PXMP2', 'EPSTI1', 'CENPV', 'GCSH', 'CSF2RB', 'REC8', 'SLC27A2', 'TFPI', 'CRYM', 'FAM117A', 'TPGS2', 'CSF1', 'CAT', 'PAFAH1B3', 'GMPR', 'CTNNBL1', 'PLTP', 'TSC22D1', 'CMBL', 'CASP3', 'CYTOR', 'TMEM14C', 'TUBA1C', 'MIR4435-2HG', 'NIPSNAP3A', 'LYL1', 'FSCN1', 'SAMD13', 'HOXA10', 'SCCPDH', 'FABP5', 'HBD', 'SORD', 'FCER1A', 'CXADR', 'IMPDH2', 'ICAM4', 'CD82', 'RYR3', 'CCDC28B', 'SLC39A3', 'NPW', 'PYCR1', 'PIK3CD-AS2', 'PPP1R14A', 'APOE', 'DCTPP1', 'ACSM3', 'CDK6', 'HYAL3', 'NME4', 'ZNF711', 'MARCKSL1', 'SLC40A1', 'TYMS', 'SMIM10', 'STRADB', 'SLC39A4', 'SERPINE2', 'APOC1', 'FAM30A', 'MEST', 'HSPD1', 'PVT1', 'EPCAM', 'TESMIN', 'HOXA9', 'RBPMS2', 'BZW2', 'KLF1', 'STAT5A', 'STON2', 'LEPROT', 'MACROD1', 'CASP6', 'ZNF521', 'CCT2', 'CAVIN1', 'ANKRD28', 'GAR1', 'SMIM1', 'CD34', 'DPPA4', 'AHCY', 'TCEAL9', 'PHTF1', 'CKS2', 'NFIA', 'HPGDS', 'RNF130', 'YARS1', 'ATP2C1', 'SOX4', 'PDZD8', 'MAP7', 'CCDC181', 'SLC39A8', 'NME1', 'HADH', 'ITGA2B', 'PNMT', 'ARHGAP22', 'DKC1', 'DLC1']
sc.tl.score_genes(adata, gene_list, score_name='erythropoietic_CD34_Pro_erythroblast') 
#erythropoietic_Erythrocyte
gene_list=['RPL22L1', 'PA2G4', 'CNRIP1', 'MPC2', 'GYPA', 'PSMG1', 'VDAC3', 'FKBP4', 'DTYMK', 'HBG2', 'ACKR1', 'FAM178B', 'RHD', 'CTSE', 'RANBP1', 'TSPAN17', 'CENPU', 'PRDX3', 'SYNGR1', 'ALAS2', 'SLC25A37', 'NUSAP1', 'CCNB1', 'SPTA1', 'A4GALT', 'MPST', 'PSMB5', 'SPC24', 'HES6', 'RHCE', 'TXN', 'ALDH1A1', 'TMEM14B', 'ISOC2', 'TUBG1', 'MRPL51', 'HEBP1', 'TUBB2A', 'MYL4', 'FECH', 'AK1', 'GATA1', 'TUBA1B', 'BLVRA', 'CPOX', 'AKR1C3', 'CENPW', 'PCLAF', 'CCNA2', 'CCT6A', 'CDK4', 'HBZ', 'HEMGN', 'DHFR', 'CDKN3', 'NMU', 'SLC25A39', 'GLRX5', 'GYPE', 'IFIT1B', 'SLC4A1', 'GCLM', 'YWHAE', 'ADD2', 'BIRC5', 'NARF', 'ZWINT', 'DNAJC9', 'MRPL12', 'TMCC2', 'DUT', 'LMNA', 'H1-0', 'RHAG', 'OAT', 'CKS1B', 'HNRNPAB', 'PAICS', 'HBA2', 'STMN1', 'GINS2', 'H2AZ1', 'UBE2T', 'MPP1', 'HMGA1', 'PRDX2', 'ATP5IF1', 'RFESD', 'GMNN', 'MBOAT2', 'MCM5', 'LXN', 'TUBB4B', 'NASP', 'HMGN5', 'TPM1', 'KCNH2', 'TOP2A', 'CCNE1', 'EPOR', 'FAM210B', 'BLVRB', 'UROD', 'CDK1', 'CD59', 'HBG1', 'BOLA3', 'CR1L', 'SLIRP', 'GFI1B', 'HSP90AA1', 'HMBS', 'TPGS2', 'UROS', 'REXO2', 'CD36', 'FBXO7', 'CAT', 'MCM7', 'GMPR', 'CDT1', 'AHSP', 'PTTG1', 'TMEM14C', 'FEN1', 'HMMR', 'CENPH', 'TUBA1C', 'PCNA', 'CA2', 'SLC38A5', 'UBE2C', 'HBA1', 'HBD', 'CCNB2', 'HBQ1', 'TRIM10', 'RPA3', 'ANK1', 'HMGB1', 'DLGAP5', 'CDC20', 'PYCR1', 'RRM2', 'TLCD4', 'HBM', 'PARVB', 'NME4', 'TUBB', 'IDH2', 'PRC1', 'TYMS', 'GYPB', 'STRADB', 'SMIM10', 'TESC', 'SLC25A21', 'APOC1', 'EPCAM', 'HSPD1', 'PVT1', 'SNRPG', 'IFI27', 'KLF1', 'PBK', 'LINC01133', 'HBB', 'CCT2', 'GAR1', 'TSPO2', 'MINPP1', 'DNAJA4', 'SMIM1', 'UBAC1', 'EPB42', 'SELENBP1', 'UQCC2', 'TK1', 'CA3', 'DMTN', 'ATP5MC1', 'CKS2', 'HAGH', 'NFIA', 'NFE2', 'GYPC', 'ALAD', 'TFDP1', 'CA1', 'SNCA', 'NME1', 'RAN', 'MAD2L1', 'SOX6', 'HMGB3', 'METAP2']
sc.tl.score_genes(adata, gene_list, score_name='erythropoietic_Erythrocyte') 
#erythropoietic_Polychromatic_Erythroblast
gene_list=['TST', 'SLC22A16', 'CNRIP1', 'GYPA', 'CCDC34', 'PPT2', 'FAM178B', 'RHD', 'CTSE', 'TSPAN17', 'CENPU', 'SYNGR1', 'ALAS2', 'ACHE', 'CDC6', 'SPTA1', 'NUSAP1', 'CCNB1', 'A4GALT', 'STEAP3', 'SPC24', 'ENG', 'HES6', 'RHCE', 'DYNC2I2', 'PCCB', 'MKI67', 'ALDH1A1', 'AMMECR1', 'TUBG1', 'HLTF', 'MYH10', 'ORC1', 'HEBP1', 'PLPP4', 'TUBB2A', 'MYL4', 'FECH', 'PRKAR2B', 'ORC6', 'AK1', 'GATA1', 'CPOX', 'C16orf95', 'AKR1C3', 'CENPW', 'PCLAF', 'CCNA2', 'AURKA', 'CLSPN', 'SEC14L4', 'FAM83D', 'CDC42BPA', 'ANKLE1', 'ZFYVE21', 'MCM2', 'TFR2', 'HEMGN', 'DHFR', 'CDKN3', 'C17orf99', 'GYPE', 'SLC4A1', 'GCLM', 'ADD2', 'BIRC5', 'ZWINT', 'MFSD2B', 'XK', 'KIF23', 'AQP1', 'OSBP2', 'CENPF', 'ESCO2', 'PPIL1', 'PIGQ', 'LTBP1', 'TAL1', 'SLC37A4', 'RHAG', 'H1-0', 'TROAP', 'NCAPH', 'GAS2L1', 'LRRCC1', 'GINS2', 'UBE2T', 'POMGNT2', 'PKLR', 'RFESD', 'MBOAT2', 'GMNN', 'MCM10', 'DHRS13', 'LPCAT3', 'LXN', 'PIR', 'HMGN5', 'TPM1', 'FHL2', 'KCNH2', 'TOP2A', 'CCNE1', 'ERMAP', 'EPOR', 'SMIM5', 'FAM210B', 'CDK1', 'UROD', 'B3GALNT1', 'TUBB6', 'KEL', 'CR1L', 'FN3K', 'APOL4', 'GFI1B', 'ARV1', 'HMBS', 'CENPE', 'ABCB10', 'GMPR', 'CDT1', 'ASPM', 'AHSP', 'RHOBTB3', 'ATP1B2', 'SMC2', 'FEN1', 'HMMR', 'DEPDC1', 'CHAC2', 'SPTB', 'CA2', 'RAB3IL1', 'SLC38A5', 'RCL1', 'HBD', 'SPHK1', 'CCNB2', 'HBQ1', 'ICAM4', 'TRIM10', 'TPX2', 'SLC48A1', 'NUF2', 'FUT1', 'ANK1', 'KRT13', 'NCAPG', 'DLGAP5', 'MYZAP', 'CDC20', 'PYCR1', 'RRM2', 'TLCD4', 'SLC29A1', 'HBM', 'POLE2', 'PRC1', 'TYMS', 'GYPB', 'STRADB', 'SMIM10', 'SLC25A21', 'APOC1', 'EPCAM', 'FAM13A', 'IFI27', 'KLF1', 'PBK', 'LINC01133', 'TSPO2', 'SLC43A1', 'MINPP1', 'MCM4', 'DNAJA4', 'UBAC1', 'SMIM1', 'SELENBP1', 'TANGO2', 'TK1', 'CA3', 'DMTN', 'NMNAT3', 'NFIA', 'NFE2', 'RFC4', 'SLC6A8', 'ALAD', 'CA1', 'CTNNAL1', 'SNCA', 'TMOD1', 'PRKAB1', 'EPDR1', 'SOX6', 'SLC43A3']
sc.tl.score_genes(adata, gene_list, score_name='erythropoietic_Polychromatic_Erythroblast') 
#hematopoetic_progenitors_CD34_HSC
gene_list=['PRDX1', 'PRSS57', 'RBPMS', 'HTR1F', 'ERG', 'LOX', 'ROBO4', 'ELMO1', 'GCSAML', 'ZNF385D', 'HACD1', 'TCEAL2', 'HOXA6', 'PRSS2', 'EGFL7', 'NAP1L3', 'HLF', 'ADGRG6', 'MYB', 'CASC15', 'HOXA3', 'IGSF10', 'SPINK2', 'LAPTM4B', 'PREX2', 'PTMS', 'CYTL1', 'BCL11A', 'SH3GL3', 'SERPINB1', 'ALDH1A1', 'CBX2', 'SCN9A', 'CALN1', 'IL12A-AS1', 'SMIM6', 'MSI2', 'EHD2', 'LINC01122', 'NPR3', 'CCDC42', 'HOXA7', 'CDH7', 'SOCS2', 'EBPL', 'C3orf80', 'RAB13', 'ITM2C', 'PLOD2', 'HOXA10-AS', 'MDK', 'HEMGN', 'UBR5-AS1', 'HOPX', 'BEX3', 'PBX1', 'GUCY1B1', 'HOXB-AS3', 'IL18', 'CTSF', 'FAM133A', 'GATA2', 'MYCT1', 'SVOPL', 'SH3D21', 'TCEAL4', 'CRYGD', 'DEPTOR', 'BST2', 'ATP1B1', 'HMGA2', 'MBOAT7', 'DLK1', 'NKX2-3', 'ARMCX1', 'H1-0', 'PCDH9', 'CLEC9A', 'CCNB1IP1', 'PRDM16-DT', 'STMN1', 'HMGA1', 'PCBD1', 'MFAP2', 'NYNRIN', 'LIMCH1', 'TCTEX1D1', 'MED12L', 'XXYLT1-AS2', 'PROM1', 'CPXM1', 'TMEM98', 'SERPING1', 'EMCN', 'SMIM24', 'GNAI1', 'TFPI', 'NKAIN2', 'GNA15', 'CLU', 'BAALC', 'PGM5', 'MYL6B', 'RAB34', 'DPYSL3', 'BAALC-AS2', 'CYYR1', 'NRIP1', 'CHRM3', 'SLC1A6', 'TSC22D1', 'MLLT3', 'CMBL', 'PCDH17', 'BEX2', 'BEX1', 'LYL1', 'SAMD13', 'MMP2', 'HOXA10', 'ANGPT1', 'CFH', 'SYPL1', 'NDN', 'ETS2', 'SPINT2', 'IMPDH2', 'TLCD5', 'AIF1', 'NPM2', 'GUCY1A1', 'MEG3', 'NPDC1', 'DDAH2', 'CAVIN2', 'C1QTNF4', 'SPOCK3', 'DSG2', 'CD164', 'CDK6', 'MSRB3', 'MEIS1', 'ITGA9', 'CCDC175', 'H2AW', 'MMRN1', 'SERPINE2', 'FAM30A', 'FHL1', 'MEST', 'MACROH2A1', 'HOXA9', 'GSTM5', 'AVP', 'UCHL1', 'ARNTL2-AS1', 'BZW2', 'IFITM3', 'LEPROT', 'C9orf43', 'ZNF521', 'CAVIN1', 'ANKRD28', 'KRT18', 'MPDZ', 'SELENOP', 'CRHBP', 'SERPINB6', 'CD34', 'MECOM', 'PRKG2', 'DPPA4', 'HOXB5', 'TCEAL9', 'HOXB6', 'TM4SF1', 'MIR1915HG', 'ZBTB8A', 'CSF3R', 'ATP2C1', 'TALDO1', 'SOX4', 'MAP7', 'KRT8', 'RASSF9', 'LIMS1', 'MGLL', 'SULT1C4', 'TMEM163']
sc.tl.score_genes(adata, gene_list, score_name='hematopoetic_progenitors_CD34_HSC') 
#hematopoetic_progenitors_CD34_MEP_1
gene_list=['COMT', 'PRSS57', 'FADS1', 'CPB1', 'HTR1F', 'CNRIP1', 'STXBP6', 'GGCT', 'TPSB2', 'ST6GAL2', 'ERG', 'C2orf88', 'GCSAML', 'EMID1', 'ZNF385D', 'HACD1', 'EGFL7', 'FBN1', 'MYB', 'NUDT11', 'ADRA2A', 'SLC10A5', 'IGSF10', 'LAPTM4B', 'PSTPIP2', 'RAC3', 'CYTL1', 'SERPINB1', 'ALDH1A1', 'PTGS1', 'CREG1', 'EHD2', 'MSI2', 'FBXW9', 'TRIM24', 'IL1B', 'LINC01122', 'NPR3', 'HOXA7', 'GATA1', 'SOCS2', 'IGFBP7', 'EBPL', 'NTRK1', 'PDLIM1', 'INKA1', 'RAB13', 'CLEC11A', 'PLOD2', 'HOXA10-AS', 'ZNF804A', 'ABCC4', 'MDK', 'RNF24', 'GNAQ', 'BEX3', 'PBX1', 'GUCY1B1', 'IL18', 'GATA2', 'PKIG', 'YWHAE', 'MYCT1', 'SVOPL', 'EREG', 'TCEAL4', 'SNHG19', 'CRYGD', 'DEPTOR', 'HMGA2', 'ISYNA1', 'ATP1B1', 'NREP', 'ARMCX1', 'H1-0', 'MPIG6B', 'CCNB1IP1', 'ATP6V0A2', 'STMN1', 'TPSAB1', 'P2RX1', 'MPP1', 'HMGA1', 'HDAC2', 'TPM1', 'MED12L', 'AMHR2', 'CPXM1', 'MEX3B', 'SERPING1', 'CSF2RB', 'SLC27A2', 'SMIM24', 'GNAI1', 'TFPI', 'KLHL13', 'NKAIN2', 'FAM117A', 'SLC18A2', 'GNA15', 'CLU', 'RTN3', 'RHEX', 'MYL6B', 'CAT', 'GMPR', 'PGAP4', 'CTNNBL1', 'CHRM3', 'TSC22D1', 'TAFA2', 'MLLT3', 'CMBL', 'DTD1', 'AMD1', 'BEX2', 'EFHC2', 'TUBA1C', 'FSCN1', 'NIPSNAP3A', 'LYL1', 'SAMD13', 'HOXA10', 'ANGPT1', 'SCCPDH', 'SYPL1', 'FCER1A', 'FABP5', 'ETS2', 'NDN', 'IMPDH2', 'ICAM4', 'MLLT11', 'SLC8A3', 'DDAH2', 'SLC39A3', 'NPW', 'FREM1', 'MSRB3', 'CDK6', 'HYAL3', 'MEIS1', 'ITGA9', 'ZNF711', 'SLC40A1', 'H2AW', 'MMRN1', 'SERPINE2', 'FAM30A', 'FHL1', 'MEST', 'HOXA9', 'GSTM5', 'ARNTL2-AS1', 'BZW2', 'CPA3', 'STAT5A', 'STON2', 'LEPROT', 'C9orf43', 'ZNF521', 'SPN', 'CCT2', 'CAVIN1', 'ANKRD28', 'MINPP1', 'CRHBP', 'CD34', 'PRKG2', 'DPPA4', 'TCEAL9', 'PHTF1', 'HPGDS', 'RNF130', 'TESPA1', 'TIMP3', 'SLC45A3', 'ATP2C1', 'TALDO1', 'SOX4', 'PDZD8', 'MAP7', 'LIMS1', 'MGLL', 'SPINK4', 'ZBTB16', 'ITGA2B', 'ST8SIA6', 'STXBP5', 'ARHGAP22', 'DKC1']
sc.tl.score_genes(adata, gene_list, score_name='hematopoetic_progenitors_CD34_MEP_1') 
#hematopoetic_progenitors_CD34_MEP_2
gene_list=['LINC00535', 'PRSS57', 'CFAP126', 'CNRIP1', 'EXD3', 'STXBP6', 'TPSB2', 'RHBDD2', 'SCG2', 'TTLL7', 'GCSAML', 'KRT19', 'SND1', 'SIGLEC8', 'EMID1', 'P4HB', 'HDC', 'PTGER3', 'MGST1', 'MYOZ1', 'NUCB2', 'MYB', 'SLC10A5', 'PRG2', 'HSPG2', 'EPX', 'IL1RL1', 'PLD1', 'ZSCAN1', 'AFF2', 'CAV2', 'TPSG1', 'TCN1', 'RFLNB', 'ORAI3', 'EBPL', 'PCLAF', 'NTRK1', 'FAM81B', 'CALR', 'CLEC11A', 'ASRGL1', 'HOXA10-AS', 'ZNF804A', 'CEBPA', 'UGT2B11', 'TTC27', 'HGF', 'PLIN2', 'BEX3', 'PBX1', 'GATA2', 'STAR', 'PKD2', 'WSCD2', 'MIR202HG', 'SVOPL', 'OSTC', 'MYDGF', 'FAM83A', 'LRP4', 'DUOXA2', 'SH3D21', 'EREG', 'CRYGD', 'DEPTOR', 'LTBP1', 'GALC', 'PPM1H', 'ALOX5AP', 'CTSG', 'STMN1', 'TPSAB1', 'ANXA1', 'P2RX1', 'LUM', 'HOXA5', 'HMGA1', 'PAQR5', 'ADCYAP1', 'MORN5', 'CPXM1', 'PDIA6', 'CCNA1', 'ATP8B4', 'CRPPA', 'CSF2RB', 'SLC27A2', 'SMIM24', 'LIF', 'SLC18A2', 'FAM117A', 'ANGPT2', 'KIT', 'GNA15', 'CLU', 'RNASE3', 'RHEX', 'MYL6B', 'CSF1', 'CD63', 'RNASE2', 'VAT1', 'BACE2', 'LMAN1', 'ITGA4', 'LPCAT2', 'NLK', 'RASGRP3', 'MIR4435-2HG', 'FAM83F', 'MS4A3', 'SCCPDH', 'FCER1A', 'FABP5', 'MS4A2', 'DHRS9', 'RGS13', 'RAB27B', 'LINC01140', 'SLC39A3', 'SSR3', 'CMA1', 'NPW', 'MSRB3', 'CDK6', 'HYAL3', 'CRB1', 'NT5DC2', 'TTC29', 'ZNF711', 'SLC40A1', 'TYMS', 'IL4', 'APOC1', 'MEST', 'CHPT1', 'HOXA9', 'ENPP3', 'ZNF704', 'TPSD1', 'ITGB8', 'IRF2BP2', 'BZW2', 'CPA3', 'IKZF2', 'ARMH1', 'ZNF521', 'DTWD2', 'ANKRD28', 'LMO4', 'ALB', 'TIE1', 'IL5RA', 'CD34', 'SEMA3C', 'CAMLG', 'CDK15', 'CLC', 'FKBP2', 'HPGDS', 'FOXJ1', 'GLUL', 'RNF130', 'SLC45A3', 'TENT5A', 'TIMP3', 'SLC24A3', 'ATP2C1', 'SOX4', 'PDZD8', 'LTC4S', 'MGAM2', 'NAALADL1', 'AARD', 'CCDC181', 'ST8SIA6', 'ITGA2B', 'HPGD', 'LINC00323', 'MBOAT4', 'VWA5A']
sc.tl.score_genes(adata, gene_list, score_name='hematopoetic_progenitors_CD34_MEP_2') 
#hematopoetic_progenitors_CD34_MPP
gene_list=['COMT', 'PRDX1', 'PRSS57', 'PRIM1', 'RBPMS', 'HTR1F', 'GGCT', 'FKBP4', 'ERG', 'HENMT1', 'SPARC', 'HACD1', 'CLDN10', 'SMIM3', 'PRSS2', 'EGFL7', 'CDH26', 'MGST1', 'IGF2BP2', 'NUCB2', 'MYB', 'IGSF10', 'SPINK2', 'GGH', 'LAPTM4B', 'RAC3', 'MAP1A', 'NUDT8', 'BCL11A', 'CYTL1', 'SERPINB1', 'CBX2', 'RCN1', 'CALN1', 'LAT2', 'MSI2', 'TM7SF3', 'MGST2', 'NPM3', 'CDCA7L', 'TCEAL5', 'MAPKAPK3', 'LINC01122', 'NPR3', 'HOXA7', 'AIF1L', 'IGFBP7', 'EBPL', 'PCLAF', 'MYO5C', 'RAB13', 'ITM2C', 'CLEC11A', 'HOXA10-AS', 'CDK4', 'MDK', 'TPM4', 'HELLS', 'GPR12', 'UHRF1', 'BEX3', 'IGLL1', 'HOXB-AS3', 'IL18', 'RAMP1', 'TCEAL4', 'SNHG19', 'CDCA7', 'CRYGD', 'DEPTOR', 'SLC2A5', 'ISYNA1', 'ATP1B1', 'MBOAT7', 'NREP', 'ARMCX1', 'ALDH7A1', 'CCNB1IP1', 'TRH', 'PAICS', 'CTSG', 'STMN1', 'IFTAP', 'HMGA1', 'DNAJC12', 'HDAC2', 'CRISPLD1', 'TNFRSF1A', 'NASP', 'AKR1A1', 'MPO', 'FAM216A', 'EAF2', 'TCTEX1D1', 'PROM1', 'CPXM1', 'CD59', 'ATP8B4', 'PXMP2', 'SERPING1', 'GCSH', 'SMIM24', 'GNAI1', 'TFPI', 'FAH', 'KIT', 'GNA15', 'BAALC', 'GAMT', 'MYL6B', 'RAB34', 'RHEX', 'BAALC-AS2', 'PHGDH', 'CAT', 'MCM7', 'CDT1', 'CHRM3', 'CDH2', 'SUCNR1', 'TSC22D1', 'ETV6', 'CENPH', 'MCM3', 'BEX1', 'MMP2', 'HOXA10', 'PRXL2A', 'ANGPT1', 'SCCPDH', 'FABP5', 'PYGL', 'NDN', 'MIR181A1HG', 'SPNS3', 'IMPDH2', 'LRMDA', 'LKAAEAR1', 'CLGN', 'ADA', 'GUCY1A1', 'NPDC1', 'DDAH2', 'FLT3', 'NPW', 'C1QTNF4', 'PLAGL1', 'DCTPP1', 'MSRB3', 'CDK6', 'MEIS1', 'DSG2', 'JAM2', 'TYMS', 'BCAT1', 'MMRN1', 'SERPINE2', 'FAM30A', 'FHL1', 'MEST', 'HSPD1', 'HOXA9', 'GSTM5', 'AVP', 'UCHL1', 'BZW2', 'CPA3', 'ARMH1', 'C9orf43', 'ZNF521', 'CCT2', 'CAVIN1', 'ANKRD28', 'KRT18', 'SELENOP', 'CRHBP', 'CD34', 'KCNE5', 'DPPA4', 'AHCY', 'TCEAL9', 'CKS2', 'HPGDS', 'TM4SF1', 'UNG', 'CSF3R', 'ATP2C1', 'SOX4', 'MAP7', 'MGLL', 'NAALADL1', 'EMILIN1', 'ST3GAL4', 'NME1', 'HADH']
sc.tl.score_genes(adata, gene_list, score_name='hematopoetic_progenitors_CD34_MPP') 
#megakaryocytic_CD34_Megakaryoblast
gene_list=['PLCB4', 'ZFPM2', 'FADS1', 'CPB1', 'PTGES3L', 'CTDSPL', 'GP1BA', 'GSN', 'SELP', 'ST6GAL2', 'LOX', 'CAPN11', 'C2orf88', 'ASAP2', 'NECTIN2', 'GCSAML', 'PDE3A', 'SPX', 'TEAD4', 'CXCL3', 'HACD1', 'SPARC', 'SOS1', 'TREML1', 'PTGER3', 'CDH26', 'DNAAF3', 'ADRA2A', 'KALRN', 'MALL', 'IGSF10', 'PDIA5', 'LCN2', 'LAPTM4B', 'PSTPIP2', 'RAC3', 'EVA1B', 'PTGS1', 'ECM1', 'GJA4', 'PROS1', 'GIPC3', 'TMSB15A', 'STUM', 'MYCN', 'PRKAR2B', 'ANXA3', 'NPR3', 'ITGB3', 'GATA1', 'XRCC2', 'CMTM5', 'FADS2', 'PDLIM1', 'INKA1', 'RAB13', 'PLOD2', 'STAC', 'ABCC4', 'TTC27', 'PBX1', 'GUCY1B1', 'DNM3', 'TTC7B', 'GATA2', 'NOS1AP', 'PLXDC2', 'MYCT1', 'SVOPL', 'FKBP10', 'GP9', 'RAMP1', 'LTBP1', 'SLC44A1', 'MPIG6B', 'CALD1', 'CLEC1B', 'GAS2L1', 'ESAM', 'LPAR4', 'SLC37A1', 'SV2A', 'ALOX12', 'PLAAT1', 'BDNF', 'TPM1', 'FHL2', 'SLC35D3', 'TCTEX1D1', 'MED12L', 'CPXM1', 'NT5M', 'ABHD4', 'VWF', 'SERPING1', 'SCARF1', 'CAMK1', 'GNAI1', 'KLHL13', 'EXOC3L4', 'PIEZO2', 'EGFL6', 'SLC18A2', 'CLU', 'ITPKA', 'RYR2', 'NRGN', 'GMPR', 'ARHGAP6', 'PGAP4', 'PLTP', 'OVOS2', 'EFNB2', 'TMEM45A', 'CDH6', 'MORC1', 'EFHC2', 'RAB3C', 'PDE5A', 'UBE2C', 'ANGPT1', 'GNAZ', 'FCER1A', 'NDN', 'WFDC1', 'RAB38', 'PLS3', 'NFIB', 'ADCY6', 'LGALSL', 'RAB27B', 'OLFM3', 'HTR2A', 'TGFB1I1', 'RTN2', 'CAVIN2', 'RTL8B', 'CKB', 'MEIS1', 'HYAL3', 'COL2A1', 'PF4', 'CCDC175', 'ELOVL7', 'MMRN1', 'SERPINE2', 'RBPMS2', 'MEST', 'GSTM5', 'KAZN', 'RGS18', 'MYLK', 'MAPK12', 'MYH7', 'TRPC6', 'CAVIN1', 'GSTT2B', 'CD34', 'SAMD14', 'KIFC3', 'PPBP', 'DPPA4', 'TCEAL9', 'GP6', 'HPGDS', 'ITGA2', 'TUBA8', 'TIMP3', 'SLC24A3', 'LY6G6F', 'SH2D4A', 'DKK1', 'MLC1', 'PRTFDC1', 'CKAP2L', 'LTC4S', 'LHFPL6', 'MGLL', 'PRKG1', 'EMILIN1', 'GATA2-AS1', 'ITGA2B', 'CIB3', 'STXBP5', 'SEPTIN5', 'CLEC2L', 'FAR2', 'TMEM163', 'RSPH1', 'DLC1']
sc.tl.score_genes(adata, gene_list, score_name='megakaryocytic_CD34_Megakaryoblast') 
#megakaryocytic_Megakaryocyte
gene_list=['COX5A', 'ACTB', 'RPS17', 'HNRNPDL', 'RPS26', 'ATP5MC2', 'RPL19', 'RPS28', 'NAA38', 'EEF2', 'RANBP1', 'RPL22', 'RPL9', 'SNRPB', 'COX8A', 'RPL21', 'RPS12', 'TPI1', 'RPS7', 'CFL1', 'NACA', 'TMA7', 'COX6C', 'RPL10A', 'LSM3', 'PFN1', 'EEF1B2', 'NDUFB2', 'COX7C', 'RPS3', 'HNRNPA1', 'TPT1', 'RPL13', 'RPS16', 'CHCHD2', 'PTMS', 'RPS6', 'TXN', 'LDHA', 'RPL18A', 'RPL14', 'RPS21', 'S100A4', 'RPS9', 'RPSA', 'S100A6', 'SNRPD1', 'COX7B', 'RPL12', 'CYBA', 'HNRNPC', 'COX7A2', 'SOD1', 'RPLP0', 'TUBA1B', 'BANF1', 'PCLAF', 'ATP5F1B', 'ATP5F1E', 'ANP32B', 'NDUFA4', 'HSPB1', 'CALR', 'COX5B', 'HSP90AB1', 'POLR2L', 'PSME1', 'RPL41', 'UQCRB', 'PFDN5', 'BIRC5', 'RPS2', 'HNRNPA2B1', 'RPL11', 'DUT', 'RPS27', 'RPS15A', 'GSTP1', 'ATP5MC3', 'FAU', 'RPL23A', 'RPS27A', 'RPL29', 'RPL18', 'RPS29', 'HINT1', 'SNRPF', 'RPL30', 'STMN1', 'RPL27A', 'SELENOH', 'RPS14', 'H2AZ1', 'RPL34', 'SNRPE', 'TMED2', 'DBI', 'RPL35', 'PSMA7', 'RPL36AL', 'RPL7A', 'RPL7', 'RPL35A', 'RPS25', 'SLC25A6', 'RPL10', 'ENO1', 'COX4I1', 'RPL23', 'HMGN2', 'RPL39', 'HMGB2', 'SEC61G', 'RPS27L', 'RPS15', 'YBX1', 'UQCRQ', 'PPIB', 'ABRACL', 'RPL3', 'HSP90AA1', 'ATP5MF', 'CAMK1', 'SLC25A5', 'ARPC3', 'RBM3', 'RPL8', 'RPL13A', 'RPL37', 'ATP5MPL', 'HSPA8', 'TTK', 'RPLP1', 'RPL38', 'PTTG1', 'RPL36', 'DTD1', 'RPS19', 'SUMO2', 'RPL24', 'RPL6', 'NDUFB1', 'IFITM2', 'RPS8', 'ATP5ME', 'ERH', 'ATP5MD', 'RPS20', 'SRSF3', 'UBE2C', 'RPS10', 'ACTG1', 'RPL32', 'RPL26', 'PSMA4', 'HMGN1', 'RPLP2', 'RPS23', 'RPL28', 'RPL15', 'EEF1A1', 'RPL37A', 'TUBB', 'IDH2', 'MIF', 'PPIA', 'TYMS', 'TESC', 'RPL4', 'ATP5PO', 'SNRPG', 'RPS4X', 'UBA52', 'ACTR3', 'RACK1', 'NPM1', 'RPS24', 'RPL31', 'SEC61B', 'RPS13', 'GAPDH', 'RPS3A', 'RPL5', 'RPL27', 'RPS11', 'PLEK', 'PTMA', 'RPL36A', 'BTF3', 'TMSB10', 'RPS5', 'ENY2', 'EEF1D', 'SNRPD2', 'RAN', 'ATP5MG', 'RPS18', 'EIF3E', 'LDHB', 'CYCS']
sc.tl.score_genes(adata, gene_list, score_name='megakaryocytic_Megakaryocyte') 
#megakaryocytic_Platelet
gene_list=['GP1BA', 'SELP', 'ABCC3', 'SPOCD1', 'C2orf88', 'HSD17B3', 'ASAP2', 'ARG2', 'LEFTY1', 'NCOA4', 'SPX', 'F2RL3', 'SPARC', 'CTTN', 'NEXN', 'AQP10', 'IRX3', 'TREML1', 'FERMT3', 'F13A1', 'H2AC6', 'TUBB1', 'LINC01151', 'MYL9', 'LCN2', 'PTPN18', 'H2BC11', 'ENKUR', 'PARD3', 'SH3BGRL2', 'TDRP', 'MFAP3L', 'RNF212B', 'PTGS1', 'RGS6', 'TSPAN9', 'CAV2', 'TRAPPC3L', 'LINC01088', 'PROS1', 'SPDYC', 'PRKAR2B', 'ANXA3', 'ITGB3', 'CMTM5', 'CABP5', 'PDLIM1', 'C15orf54', 'SCGB1C1', 'TPM4', 'GRAP2', 'GSTA1', 'ARMC3', 'H2BC21', 'BEX3', 'GUCY1B1', 'DNM3', 'EGF', 'TMEM140', 'AP1M2', 'GP9', 'PVALB', 'ZNF438', 'RUFY1', 'GRB14', 'LINC00853', 'CLDN5', 'PF4V1', 'MPIG6B', 'MMD', 'CALD1', 'TBXA2R', 'TNNC2', 'CLEC1B', 'GAS2L1', 'ILK', 'SCN1B', 'ESAM', 'MAP3K7CL', 'MPP1', 'ALOX12', 'PLAAT1', 'DAB2', 'TPM1', 'BEND2', 'HRAT92', 'LINC00642', 'OR2B6', 'SMIM5', 'NT5C3A', 'CD9', 'C19orf33', 'SEC14L5', 'VIL1', 'GFI1B', 'VEPH1', 'ATP9A', 'DCLRE1A', 'AVPR1A', 'CLU', 'NRGN', 'GMPR', 'ARHGAP6', 'PGRMC1', 'OVOS2', 'TSC22D1', 'CFAP161', 'THEM5', 'SCGB1C2', 'H3C10', 'FAXDC2', 'IGF2BP3', 'MLH3', 'PDE5A', 'UPK1A-AS1', 'GNAZ', 'HEXIM2', 'LINC00989', 'SMOX', 'MYEOV', 'RNF11', 'LGALSL', 'MAX', 'RAB27B', 'INKA2-AS1', 'P2RY12', 'MARCHF2', 'CAVIN2', 'TUBA4A', 'HGD', 'PDGFA', 'MEIS1', 'SENCR', 'PF4', 'ACRBP', 'PTCRA', 'ELOVL7', 'TPTEP1', 'LURAP1L', 'YWHAH', 'CHST8', 'PLA2G12A', 'RGS18', 'CTSA', 'MYLK', 'PCP2', 'STON2', 'ITGB5', 'LINC00534', 'GRHL1', 'PEAR1', 'WDR11-AS1', 'DAPP1', 'TNFSF4', 'MTURN', 'FRMD3', 'TRIM40', 'DMTN', 'PPBP', 'KIFC3', 'GP6', 'PDGFRA', 'TUBA8', 'LY6G6F', 'TMEM40', 'LGALS12', 'PDZK1IP1', 'MMP1', 'GNG11', 'CXCL5', 'LIMS1', 'LIPH', 'SNCA', 'PTPRN', 'ITGA2B', 'SEPTIN5', 'ABLIM3', 'RD3L', 'MINDY1', 'VIM-AS1']
sc.tl.score_genes(adata, gene_list, score_name='megakaryocytic_Platelet') 
#myeloid_CD14_Monocyte
gene_list=['PYCARD', 'CPVL', 'BASP1', 'FCGR1A', 'ACTB', 'S100A8', 'NCF2', 'NINJ1', 'BCL2A1', 'FPR1', 'DMXL2', 'ATP6V0B', 'OSCAR', 'ASAH1', 'S100A10', 'TYMP', 'KLF4', 'ATP2B1', 'BRI3', 'GNAI2', 'GPX1', 'RAB31', 'MGST1', 'VIM', 'TYROBP', 'S100A12', 'PSAP', 'LILRA5', 'CTSS', 'COTL1', 'SH3BGRL3', 'MNDA', 'ASGR1', 'TMEM176B', 'S100A6', 'PLBD1', 'S100A4', 'CD302', 'TIMP2', 'IL1B', 'CYBA', 'NUP214', 'ITGAM', 'MXD1', 'CD68', 'PILRA', 'VCAN', 'CES1', 'SCO2', 'CD93', 'THBS1', 'PRAM1', 'FOSL2', 'PLSCR1', 'KCTD12', 'ATP2B1-AS1', 'TIMP1', 'HIF1A', 'MCEMP1', 'FGR', 'PTPRE', 'SPI1', 'PLXDC2', 'SERPINA1', 'IFI30', 'RNASE6', 'TKT', 'G0S2', 'CAPG', 'LGALS3', 'FTL', 'CLEC4A', 'RHOB', 'IRAK3', 'S100A11', 'CKAP4', 'AP2S1', 'SLC11A1', 'GSTP1', 'TNFSF13B', 'NFIL3', 'LILRB2', 'ANXA2', 'SLC2A3', 'ANXA1', 'KLF10', 'LRP1', 'RETN', 'CLEC12A', 'CSTA', 'CEBPB', 'CFD', 'ELOB', 'MT-CO1', 'NPC2', 'LGALS2', 'C5AR1', 'CLEC4E', 'SRGN', 'VSIR', 'HLA-DRA', 'CTSH', 'BLVRB', 'LST1', 'GRN', 'LY86', 'TMEM170B', 'CD86', 'MPEG1', 'PELATON', 'CYP1B1', 'CFP', 'CREB5', 'GCA', 'S100A9', 'IGSF6', 'ANPEP', 'CST3', 'CD36', 'EMP3', 'ATP5MPL', 'RNASE2', 'TNFAIP2', 'JUND', 'CLEC7A', 'LYZ', 'MCL1', 'DUSP1', 'TGFBI', 'STAB1', 'NAMPT', 'HNMT', 'FCN1', 'SAT1', 'ADA2', 'C19orf38', 'ANXA5', 'CTSZ', 'PLAUR', 'ZFAND5', 'MARCKS', 'FCGRT', 'AIF1', 'SLC7A7', 'ZNF385A', 'CCR1', 'RGS2', 'C4orf48', 'FTH1', 'APOBEC3A', 'FCER1G', 'TMEM176A', 'FGL2', 'ZFP36L1', 'CD14', 'MTMR11', 'FOS', 'TSPO', 'HBEGF', 'CDA', 'RBP7', 'NEAT1', 'APLP2', 'AP1S2', 'MAFB', 'LY96', 'QPCT', 'ODF3B', 'GAPDH', 'DUSP6', 'CXCL8', 'CYBB', 'LGALS1', 'OTUD1', 'BST1', 'CTSD', 'AGTRAP', 'GLUL', 'KLF6', 'RNF130', 'CSF3R', 'IER5', 'CEBPD', 'LRRC25', 'MS4A6A', 'IQGAP1', 'STX11', 'LRRK2', 'TRIB1', 'IER3', 'OAZ1', 'TREM1', 'CD300E', 'SGK1', 'STXBP2', 'LCP1']
sc.tl.score_genes(adata, gene_list, score_name='myeloid_CD14_Monocyte') 
#myeloid_CD16_Monocyte
gene_list=['PYCARD', 'SMPDL3A', 'C1QB', 'NCF2', 'NINJ1', 'SLC43A2', 'RNF144B', 'ABCC3', 'HLA-DPA1', 'BCL2A1', 'ASAH1', 'MRAS', 'CPPED1', 'TYMP', 'ZDHHC1', 'CARD16', 'BRI3', 'MTRNR2L1', 'SLC2A6', 'CD300C', 'CMTM6', 'TYROBP', 'RXRA', 'PSAP', 'LILRA5', 'CTSS', 'COTL1', 'PTP4A3', 'LGALS9', 'NEURL1', 'TMEM176B', 'HCK', 'FAM110A', 'CHST7', 'PTGES', 'LINC00877', 'DOCK5', 'SFTPD', 'MSR1', 'MAPKAPK3', 'NAAA', 'TPPP3', 'FCGR2A', 'CD68', 'PILRA', 'NFKBIZ', 'HSBP1', 'CXCL16', 'MS4A4A', 'PRAM1', 'HK3', 'ARRB2', 'C5AR2', 'TIMP1', 'RHOC', 'VASP', 'FGR', 'SIGLEC10', 'SPI1', 'SERPINA1', 'IFI30', 'TCF7L2', 'HES4', 'MS4A7', 'PTPN6', 'TKT', 'LGALS3', 'G0S2', 'LYPD2', 'FTL', 'LYN', 'NR4A1', 'RNASET2', 'RHOB', 'C3AR1', 'C1QA', 'BATF3', 'PLXNB2', 'S100A11', 'SLC11A1', 'ZMYND15', 'LILRB2', 'ANXA2', 'CD300LF', 'WARS1', 'INSIG1', 'NUDT16', 'CLEC12A', 'CSTA', 'CEBPB', 'SNX18', 'CFD', 'SLC31A2', 'MTSS1', 'PECAM1', 'NPC2', 'FCGR3A', 'SOD2', 'C5AR1', 'CSF1R', 'TBXAS1', 'VSIR', 'SLC24A4', 'RRAS', 'ABI3', 'LST1', 'CD86', 'SECTM1', 'PELATON', 'MPEG1', 'CEACAM3', 'PPP1R17', 'FIBCD1', 'CFP', 'RHOG', 'CAMK1', 'GRK2', 'TCIRG1', 'CST3', 'LYNX1', 'LILRB1', 'FMNL2', 'CLEC7A', 'THEMIS2', 'LYZ', 'LILRA1', 'NAMPT', 'ADGRE2', 'FCN1', 'SCIMP', 'SAT1', 'CYRIA', 'POU2F2', 'C19orf38', 'CUX1', 'ANXA5', 'SH3BP2', 'HOTAIRM1', 'ZFAND5', 'PLAUR', 'MARCKS', 'ITGAX', 'CDH23', 'TNFRSF8', 'FCGRT', 'AIF1', 'SLC7A7', 'GPBAR1', 'MEG3', 'RGS2', 'FTH1', 'APOBEC3A', 'MYD88', 'FCER1G', 'PRELID1', 'LTA4H', 'CKB', 'FGL2', 'VMO1', 'CASP5', 'ZNF703', 'CASP1', 'BID', 'SLC16A3', 'TBC1D8', 'TESC', 'OAS1', 'IFITM3', 'LPL', 'NEAT1', 'AP1S2', 'MAFB', 'LY96', 'CDKN1C', 'LILRA2', 'CYBB', 'LGALS1', 'HMOX1', 'TNFRSF1B', 'LILRB3', 'PDK4', 'CSTB', 'CALHM6', 'UNC119', 'FKBP1A', 'LRRC25', 'CEBPD', 'ADGRE1', 'CTSL', 'PAPSS2', 'KNDC1', 'STX11', 'TAGLN', 'CD300E', 'STXBP2', 'IFITM2']
sc.tl.score_genes(adata, gene_list, score_name='myeloid_CD16_Monocyte') 
#myeloid_CD34_Monoblast
gene_list=['PYCARD', 'DPYSL2', 'PRSS57', 'CITED4', 'S100A8', 'H1-10', 'TFEC', 'GGCT', 'ATP6V0B', 'SH2B3', 'NAA38', 'CCL23', 'RANBP1', 'NUCKS1', 'TYMP', 'PRDX3', 'PLD4', 'COX8A', 'KLF4', 'ALYREF', 'SPARC', 'SERPINB8', 'TPI1', 'GNAI2', 'GPX1', 'MGST1', 'RAB31', 'VIM', 'F13A1', 'TYROBP', 'RNASEH2C', 'MT-ND4L', 'GGH', 'MNDA', 'TXN', 'MKI67', 'CEACAM4', 'SERPINB1', 'FES', 'PKM', 'RPN1', 'S100A4', 'PHPT1', 'CD302', 'MGST2', 'KIF22', 'LSM5', 'TIMP2', 'RFLNB', 'BLOC1S1', 'CYBA', 'LRP3', 'HSBP1', 'TUBA1B', 'IGFBP7', 'RAB3D', 'VCAN', 'SPCS3', 'PCLAF', 'CES1', 'CENPW', 'MS4A4A', 'PRAM1', 'CLEC11A', 'IFI27L2', 'ASRGL1', 'PGAM1', 'CEBPA', 'DEK', 'POLR2L', 'KCTD12', 'H2AJ', 'UHRF1', 'HGF', 'MCEMP1', 'SPI1', 'EIF4EBP1', 'YWHAE', 'BIRC5', 'RNASE6', 'TKT', 'CAPG', 'ZWINT', 'PRLR', 'CDCA7', 'IRAK3', 'PLAC8', 'SLC44A1', 'DUT', 'NREP', 'AP2S1', 'GSTP1', 'TNFSF13B', 'ANXA2', 'CKS1B', 'HNRNPAB', 'CTSG', 'ANXA1', 'STMN1', 'H2AZ1', 'RETN', 'DBI', 'QSOX1', 'CSTA', 'CFD', 'PRTN3', 'MPO', 'ENO1', 'TOP2A', 'SRGN', 'HMGN2', 'HDGF', 'HMGB2', 'HLA-DRA', 'SEC61G', 'LST1', 'GRN', 'LY86', 'ATP8B4', 'UQCRQ', 'MICOS10', 'KBTBD11', 'NDUFS6', 'GRK2', 'P2RY2', 'S100A9', 'RNASE3', 'CST3', 'HSP90B1', 'AZU1', 'ATP5MPL', 'RNASE2', 'CD63', 'JUND', 'NRGN', 'EMILIN2', 'LAMTOR2', 'RPLP1', 'ATOX1', 'LYZ', 'NUDT1', 'MCL1', 'CEP55', 'PTTG1', 'VAMP8', 'NDUFB1', 'PCNA', 'ATP5ME', 'MS4A3', 'ANXA5', 'UBE2C', 'FABP5', 'CTSZ', 'AIF1', 'RPA3', 'C4orf48', 'FLT3', 'FCER1G', 'SAMHD1', 'FLNA', 'MRPL23', 'ELANE', 'NT5DC2', 'TUBB', 'TYMS', 'MTMR11', 'RECK', 'FOS', 'TSPO', 'YWHAH', 'MACROH2A1', 'TMPO', 'IRF2BP2', 'SERPINB10', 'APLP2', 'ANKRD28', 'CAPNS1', 'EGR1', 'TK1', 'GAPDH', 'KCNE5', 'HNRNPD', 'KIAA0930', 'LGALS1', 'CTSD', 'DEFB1', 'RNF130', 'C20orf27', 'CEBPD', 'MS4A6A', 'IQGAP1', 'RAB32', 'HMGB3', 'STXBP2', 'LCP1']
sc.tl.score_genes(adata, gene_list, score_name='myeloid_CD34_Monoblast') 
#myeloid_CD34_Myeloblast
gene_list=['RPL22L1', 'COMT', 'PRSS57', 'GGCT', 'PCBP3', 'NAA38', 'DNPH1', 'RANBP1', 'PKP2', 'EMID1', 'SLPI', 'SYNGR1', 'SPARC', 'P4HB', 'CLDN10', 'COL17A1', 'EGFL7', 'CDH26', 'MGST1', 'MTARC1', 'FARSA', 'NUCB2', 'MYB', 'RNASEH2C', 'SPINK2', 'GGH', 'MANF', 'RAC3', 'PRRT4', 'CYTL1', 'TXN', 'DENND10', 'CEACAM4', 'SERPINB1', 'VAMP5', 'AFF2', 'B4GALT6', 'SDF2L1', 'AK4', 'RPN1', 'PHPT1', 'MGST2', 'MRPL51', 'NENF', 'NPM3', 'RFLNB', 'AIF1L', 'HSBP1', 'IGFBP7', 'EBPL', 'PCLAF', 'IGFBP2', 'PRAM1', 'CALR', 'HSPB1', 'CLEC11A', 'ITM2C', 'CEACAM6', 'CEBPA', 'ARL2', 'TMED3', 'GPR12', 'HGF', 'BEX3', 'IGLL1', 'SEM1', 'STAR', 'EIF4EBP1', 'OSTC', 'HSPA5', 'SH3D21', 'EREG', 'SNHG19', 'TCEAL4', 'RUVBL2', 'CDCA7', 'CRYGD', 'SLC2A5', 'MRPL12', 'PLAC8', 'TIMM13', 'DUT', 'NREP', 'GSTP1', 'TNFSF13B', 'TRH', 'CTSG', 'STMN1', 'DEFA4', 'H2AZ1', 'SNRPE', 'HMGA1', 'DNAJC12', 'CFD', 'PRTN3', 'AK2', 'HMGN5', 'MPO', 'GPI', 'RAB44', 'ENO1', 'SRGN', 'PROM1', 'ERLIN1', 'CPXM1', 'PDIA6', 'ATP8B4', 'TSPOAP1', 'GCSH', 'PRSS21', 'SMIM24', 'KBTBD11', 'FAH', 'RNASE3', 'KIT', 'GNA15', 'HSP90B1', 'CLU', 'AZU1', 'MYL6B', 'RAB34', 'RNASE2', 'CAT', 'PHGDH', 'UGT3A2', 'VAT1', 'RPLP1', 'ATOX1', 'SUCNR1', 'VAMP8', 'LPCAT2', 'BEX1', 'PCNA', 'HOXA10', 'MS4A3', 'PPP1R27', 'SCCPDH', 'FABP5', 'TRIM28', 'PLPPR3', 'SPNS3', 'IMPDH2', 'LRMDA', 'LKAAEAR1', 'AIF1', 'RPA3', 'NPDC1', 'NPW', 'CEBPE', 'C1QTNF4', 'MSRB3', 'ELANE', 'CDK6', 'HYAL3', 'MIF', 'TYMS', 'VKORC1', 'BCAT1', 'YBX3', 'HSPD1', 'MEST', 'DPM3', 'SERPINB10', 'CPA3', 'ARMH1', 'MAPK12', 'C9orf43', 'AREG', 'ANKRD28', 'SELENOP', 'CD34', 'KCNE5', 'SPNS2', 'TCEAL9', 'DEFB1', 'ATP5MC1', 'FKBP2', 'CKS2', 'HPGDS', 'TENT5A', 'CSF3R', 'CANX', 'DYNLL1', 'SOX4', 'MLC1', 'SNORC', 'KRT8', 'CST7', 'NAALADL1', 'RAB32', 'NME1', 'CHID1', 'MLEC', 'PDIA4']
sc.tl.score_genes(adata, gene_list, score_name='myeloid_CD34_Myeloblast') 
#myeloid_cDC
gene_list=['PEBP1', 'COX5A', 'CPVL', 'RAB7A', 'GSN', 'ENSA', 'EIF3I', 'HLA-DPA1', 'EIF2A', 'EFNB1', 'PKIB', 'IDO1', 'EEF2', 'ARL5A', 'WDR33', 'PLD4', 'SNRPB', 'HLA-DRB1', 'BUD23', 'SYNGR2', 'HLA-DQA2', 'HERPUD1', 'EIF3D', 'ATP5F1A', 'CD1E', 'DUSP2', 'RAB5C', 'ACAA1', 'EEF1B2', 'MAP3K8', 'ITGB2', 'LGALS9', 'TUFM', 'EIF4A1', 'IL13RA1', 'ARF6', 'C12orf75', 'CD2', 'TCTN3', 'YIF1B', 'TAGLN2', 'PTRHD1', 'CD1C', 'ITGB7', 'APH1A', 'DDIT4', 'ADAM8', 'TUBA1B', 'NDUFA12', 'IGFBP7', 'ATP5F1B', 'DENND1B', 'ADAM28', 'PDLIM1', 'GPR183', 'EIF3H', 'TMEM109', 'EVL', 'HSP90AB1', 'EIF3F', 'PMAIP1', 'BRK1', 'IL18', 'DUSP4', 'LSP1', 'HAVCR2', 'C1QBP', 'AP2M1', 'NDRG2', 'C7orf50', 'RGS10', 'NDUFV2', 'PHLDA2', 'FBLN2', 'RNASET2', 'TMEM273', 'ATP1B1', 'IL1R2', 'BIN1', 'LMNA', 'CD74', 'TCEA3', 'ATP5MC3', 'HLA-DOB', 'CLEC9A', 'HINT1', 'COL9A2', 'ANXA2', 'SNRPF', 'RNH1', 'PFDN2', 'HLA-DQB2', 'LIMD2', 'SLC25A3', 'IFT20', 'HMGA1', 'GPAT3', 'CSF2RA', 'CNN2', 'PON2', 'PAK1', 'CD83', 'SERBP1', 'UPK3A', 'CCT3', 'SEMA4A', 'LGALS2', 'CIITA', 'HLA-DQB1', 'VDAC1', 'HLA-DMA', 'SMDT1', 'HLA-DRB5', 'CTSH', 'HLA-DRA', 'CYRIB', 'ATP5F1C', 'C19orf33', 'CD86', 'C1orf162', 'LY86', 'RGS1', 'NR4A3', 'CRIP3', 'OXA1L', 'SEPTIN6', 'HLA-DPB1', 'PPIB', 'CREM', 'SLC25A5', 'SRSF2', 'CIB1', 'CADM1', 'CCDC12', 'HSPA8', 'FBL', 'TCOF1', 'MDH2', 'UFC1', 'ARPC1B', 'HLA-DOA', 'PPP1R16A', 'TMEM14C', 'CYC1', 'EZR', 'DECR1', 'FCER1A', 'MAP4K1', 'EIF3L', 'SPINT2', 'SNHG32', 'PARK7', 'HADHA', 'GADD45B', 'HMGN1', 'HNRNPA0', 'JAML', 'HLA-DQA1', 'SAMHD1', 'PRCP', 'PPP1R14A', 'HEXA', 'BID', 'TMEM9B', 'ARL4C', 'CD33', 'UBE2L3', 'PPA1', 'ALDH2', 'PSMB6', 'ARF5', 'ST13', 'AREG', 'APEX1', 'CLEC10A', 'ENHO', 'FCGR2B', 'PHACTR1', 'BCL7A', 'CLIC2', 'PPT1', 'CXCR4', 'TOB1', 'PSMB1', 'HLA-DMB', 'KCNK6', 'CSTB', 'PHB2', 'GDI2', 'IL2RG', 'RBPJ', 'CCSER1', 'FAM162A', 'CD48', 'MTDH', 'EIF3E', 'CACNA2D3', 'LDHB']
sc.tl.score_genes(adata, gene_list, score_name='myeloid_cDC') 
#myeloid_Macs
gene_list=['CPVL', 'FEZ1', 'PRDX1', 'C1QB', 'SMPDL3A', 'SCD', 'GPNMB', 'C11orf96', 'FGF13', 'NINJ1', 'FRMD4B', 'CXCL12', 'PLA2G7', 'MRC1', 'CCL15', 'EBI3', 'ZBTB7C', 'PCDHB7', 'TSPAN4', 'CCL14', 'TMEM37', 'WNT5A', 'EGFL7', 'GPX1', 'MARCO', 'TYROBP', 'PSAP', 'CCL18', 'EPB41L2', 'FXYD6', 'SIGLEC1', 'VCAM1', 'PTMS', 'TMIGD3', 'CREG1', 'LGMN', 'SMTNL2', 'MSR1', 'PPIC', 'CCL8', 'CD68', 'RAB42', 'SPIC', 'MS4A4A', 'HSD17B14', 'RENBP', 'LILRB5', 'SPSB4', 'RGL1', 'EPHX1', 'SDS', 'CCL2', 'FOLR2', 'BEX3', 'C2', 'SPATS2L', 'NRP1', 'MS4A7', 'C1orf54', 'FTL', 'PLD3', 'GPX3', 'MRO', 'NPL', 'FABP3', 'ATP1B1', 'HS3ST2', 'C1QA', 'ADI1', 'SLC46A1', 'SOX17', 'S100A11', 'CD74', 'GPR137B', 'KCNMA1', 'ADGRL4', 'IFT74', 'CD163L1', 'SCN1B', 'PCSK2', 'SLC7A8', 'DAB2', 'LINC01503', 'RBP1', 'RND3', 'NPC2', 'TPD52L1', 'FCGR3A', 'LINC01356', 'CHCHD6', 'ME1', 'PPARG', 'HLA-DRB5', 'HLA-DRA', 'BLVRB', 'TRDN', 'MMP19', 'CD59', 'NXF3', 'GNG12', 'IGSF6', 'CST3', 'FPR3', 'SDC3', 'IGSF21', 'LYVE1', 'PLA2G15', 'SLC1A3', 'CD63', 'SRPX2', 'PLTP', 'ADORA3', 'SDC2', 'FCN2', 'TGFBI', 'C1QC', 'HNMT', 'SAT1', 'CD5L', 'OTOA', 'FABP5', 'CTSZ', 'MCOLN1', 'NQO1', 'SULT1A1', 'PDZRN3', 'MARCKS', 'VSIG4', 'GATM', 'KCNAB1', 'FCGRT', 'AIF1', 'NUPR1', 'A2M', 'NR1H3', 'SLC48A1', 'FTH1', 'DDAH2', 'FCER1G', 'GPR34', 'GFRA2', 'LGALS3BP', 'ETV5', 'IGF1', 'APOE', 'ACP5', 'VMO1', 'LIPA', 'SIGLEC11', 'PKD2L1', 'KCNJ10', 'SLC40A1', 'FABP4', 'LINC00635', 'APOC1', 'IFI27', 'TM4SF18', 'GABRA2', 'EDNRB', 'USP2', 'CTSB', 'CRYBB1', 'LY96', 'SELENOP', 'QPRT', 'CETP', 'PDCD1LG2', 'ELF5', 'NAGK', 'HMOX1', 'CTSD', 'CCL13', 'MYO7A', 'HPGDS', 'GLUL', 'CSTB', 'PLPP3', 'RNASE1', 'SLCO2B1', 'ADAP2', 'AXL', 'TIMD4', 'MS4A6A', 'CTSL', 'DNASE2B', 'RAB32', 'NRP2', 'GPIHBP1', 'CD163', 'ATP6V0D2', 'TMSB4X', 'FUCA1', 'CTSC']
sc.tl.score_genes(adata, gene_list, score_name='myeloid_Macs') 
#myeloid_pDC
gene_list=['GSN', 'CCDC183', 'BLNK', 'PLD4', 'TCF4', 'UBE2J1', 'SLC20A1', 'APP', 'CCDC50', 'CLN8', 'SMIM3', 'HERPUD1', 'GPX1', 'P2RY14', 'SERPINF2', 'JCHAIN', 'KRT5', 'TLR7', 'SERPINF1', 'TNFRSF21', 'A1BG', 'MYBL2', 'TNNI2', 'PHB', 'UGCG', 'PTMS', 'MAP1A', 'PLXNA4', 'C12orf75', 'TXN', 'BCL11A', 'PTGDS', 'IRF8', 'CCDC189', 'PALD1', 'FAM160A1', 'SELENOS', 'SCN9A', 'LGMN', 'SMIM6', 'GAPT', 'PACSIN1', 'WDFY4', 'SFT2D2', 'CD68', 'PLP2', 'CXCR3', 'NUDT17', 'SCT', 'GPR183', 'ITM2C', 'COBLL1', 'C12orf45', 'DUSP5', 'RUBCN', 'ARID3A', 'SUSD1', 'LINC01374', 'PTPRE', 'CADM4', 'NRP1', 'IGKC', 'SEMA7A', 'TTC39A', 'RNASE6', 'CAPG', 'TMEM210', 'VASH2', 'RNASET2', 'CYP46A1', 'RASD1', 'PLAC8', 'SIDT1', 'NTM', 'RPS6KA4', 'DRD4', 'ALOX5AP', 'UNC93B1', 'OPN3', 'CLEC4C', 'ANKRD53', 'STMN1', 'SEL1L3', 'SLC7A11', 'HIGD1A', 'PROC', 'PLVAP', 'DAB2', 'DERL3', 'EPHA2', 'COL26A1', 'NPC2', 'RUNX2', 'KCNK17', 'TP53I11', 'HLA-DMA', 'LDLRAD4', 'SMIM5', 'CRYM-AS1', 'HLA-DRA', 'TPM2', 'PTPRS', 'SLC4A3', 'NEK8', 'MPEG1', 'PFKFB2', 'SLC7A5', 'FUT7', 'NLRP7', 'CSF2RB', 'CYB561A3', 'EPHB1', 'LHFPL2', 'SPCS1', 'ATG101', 'CRYM', 'LILRB4', 'SMPD3', 'IRF7', 'CST3', 'GNA15', 'LINC00996', 'RHEX', 'SLC35F3', 'LRRC26', 'MAPKAPK2', 'GAS6', 'LILRA4', 'TGFBI', 'GPRC5C', 'LAMP5', 'SCAMP5', 'TSPAN13', 'SLC12A3', 'CLIC3', 'ZFAT', 'PRXL2A', 'FCER1A', 'IDH3A', 'CDH23', 'TMEM8B', 'ST14', 'FLNB', 'SLC15A4', 'RAB11FIP1', 'FCER1G', 'JAML', 'ZDHHC17', 'EGLN3', 'CCDC88A', 'IRF4', 'KIRREL3', 'PTCRA', 'GZMB', 'PPP1R14B', 'CUX2', 'CUEDC1', 'NOTCH4', 'RRBP1', 'PHEX', 'CTSB', 'CDK2AP2', 'PPM1K', 'GNG7', 'SEC61B', 'LCN12', 'IL3RA', 'PHACTR1', 'KCNK10', 'MZB1', 'DNASE1L3', 'HMSD', 'FKBP2', 'CD2AP', 'CBFA2T3', 'FAM221B', 'LRRC36', 'SOX4', 'ASIP', 'LINC00865', 'PPM1J', 'KCNA5', 'NIBAN3', 'LCNL1', 'CIB2', 'SPIB', 'TRAF4', 'SHD', 'P3H2', 'VEGFB']
sc.tl.score_genes(adata, gene_list, score_name='myeloid_pDC') 
#myeloid_stromal
gene_list=['PCDH18', 'C11orf96', 'OLFML3', 'KITLG', 'NOTCH3', 'ANTXR1', 'CXCL12', 'FMO3', 'ALPL', 'SIX1', 'ALDH1A3', 'NTRK2', 'ADAMTS9', 'GHR', 'RSPO1', 'OSR2', 'TNS2', 'SFRP1', 'SERTAD4', 'SPARCL1', 'VEGFC', 'ITIH5', 'PDE1A', 'TMEM37', 'ACSM5', 'FBN1', 'PLEKHS1', 'APOD', 'PDGFRL', 'PLIN1', 'VCAM1', 'EGFLAM', 'TEAD1', 'FBLN1', 'BGN', 'CAV2', 'ADIRF', 'SCARA3', 'LEPR', 'ADIPOQ', 'DLX5', 'PPIC', 'MT1M', 'OSMR', 'TENM2', 'CRH', 'FRZB', 'ADAMTS1', 'NCAM2', 'TF', 'IGFBP2', 'ABCA9', 'SVEP1', 'COL16A1', 'RBMS3', 'DEPP1', 'LGI4', 'CCL2', 'CCN2', 'OLFML2B', 'TNFRSF11B', 'C1R', 'GLDN', 'SULF1', 'COL28A1', 'FSTL1', 'IL34', 'COL6A1', 'LAMA4', 'PGF', 'MXRA5', 'SRPX', 'COL3A1', 'C7', 'PRELP', 'LINC00640', 'CALD1', 'EBF3', 'BCAR1', 'NNMT', 'LRATD1', 'LUM', 'MGP', 'IGF2', 'TUSC3', 'APOB', 'DCLK1', 'ZFHX4', 'C1QTNF1', 'FST', 'RBP1', 'RND3', 'RARRES2', 'SOD3', 'ABCA8', 'PPARG', 'YAP1', 'CYP4X1', 'MXRA8', 'C1S', 'BICC1', 'INHBB', 'TRABD2B', 'SERPING1', 'FNDC1', 'NAV2', 'GNG12', 'IL1R1', 'ANGPTL2', 'SNAI2', 'TIMP4', 'ANK2', 'CP', 'FGF7', 'GPX8', 'SLC7A10', 'GAS6', 'PDLIM4', 'WASF3', 'TMEM108', 'DCN', 'PLAC9', 'SEMA6D', 'BMP5', 'KCNE4', 'WWTR1', 'GGT5', 'MMP2', 'LBP', 'AMOTL2', 'EFEMP1', 'SLC7A2', 'EPB41L4B', 'COL14A1', 'RAI14', 'NUPR1', 'CRYAB', 'FRMD6', 'GJA1', 'COL1A2', 'COX7A1', 'GFRA1', 'FAM13C', 'BMPER', 'FOXC1', 'CDH11', 'MEIS2', 'CHRDL1', 'LIFR', 'APOE', 'ESM1', 'TMEM132C', 'TDO2', 'ERRFI1', 'CCN1', 'CCDC80', 'IGFBP5', 'TMEM47', 'MATN3', 'EDNRB', 'LPL', 'SLC22A3', 'FMOD', 'SOX9', 'SELENOP', 'FERMT2', 'NR2F2', 'MRGPRF', 'TWIST2', 'KIRREL1', 'PCOLCE', 'C5orf38', 'VGLL3', 'EGFR', 'FMO2', 'CHL1', 'PDGFRA', 'VWA1', 'DENND2B', 'ENPEP', 'ITGBL1', 'PLPP3', 'MDFI', 'CHST3', 'ADGRL2', 'TNC', 'LHFPL6', 'ISLR', 'THY1', 'EYA1', 'ID4', 'LTBP2', 'DDR2', 'TAGLN', 'TNFAIP6', 'ANGPTL4', 'EPAS1', 'PAPPA', 'ECM2']
sc.tl.score_genes(adata, gene_list, score_name='myeloid_stromal') 
#T_NK_cells_CD4_Naive_T_cell
gene_list=['LEPROTL1', 'LAT', 'ETS1', 'LINC01550', 'RPS17', 'ITPKB', 'RPL19', 'RPS28', 'RHOH', 'RPL22', 'RPL9', 'RPL21', 'RPS12', 'RPS7', 'NACA', 'RPL10A', 'GIMAP7', 'SH3YL1', 'EEF1B2', 'ICOS', 'NOSIP', 'RPS3', 'RPS16', 'TPT1', 'RPL13', 'MAL', 'MALAT1', 'ANXA2R', 'PIK3IP1', 'TRAT1', 'TOMM7', 'CD2', 'RPS6', 'BCL11B', 'KLF2', 'APBB1', 'RPL18A', 'RPL14', 'RPS21', 'LDLRAP1', 'RPSA', 'EPHX2', 'FLT3LG', 'RPL12', 'SNHG8', 'IL32', 'PDE3B', 'RPLP0', 'CD6', 'SOCS3', 'TRAC', 'CD40LG', 'IFITM1', 'STAT3', 'TRABD2A', 'SFXN1', 'RGL4', 'SLFN5', 'EVL', 'NELL2', 'VAMP2', 'RPL41', 'TSHZ2', 'GRAP2', 'CHRM3-AS2', 'LRRN3', 'TNFRSF25', 'JUNB', 'MDS2', 'DGKA', 'ACAP1', 'CAMK4', 'NOP53', 'RGS10', 'FOXP1', 'RPS2', 'RCAN3', 'AQP3', 'RPL11', 'RPS27', 'RPS15A', 'CD3D', 'TCEA3', 'PRMT2', 'TRAF3IP3', 'RPL23A', 'CD52', 'RPS27A', 'RPL18', 'RPS29', 'RPL29', 'CD7', 'ITM2A', 'RPL30', 'IL7R', 'CPA5', 'RPL27A', 'APBA2', 'RPS14', 'LCK', 'RPL34', 'RPL35A', 'RPL7', 'RPS25', 'PLAAT4', 'SPOCK2', 'RPL10', 'PASK', 'NDFIP1', 'GIMAP1', 'LTB', 'RPL39', 'PABPC1', 'SEPTIN1', 'CD28', 'PRKCA', 'RPS15', 'RPL3', 'ADTRP', 'TCF7', 'LINC01089', 'RPL13A', 'RPL37', 'AK5', 'SERINC5', 'GCNT4', 'RPL38', 'MGAT4A', 'RPL36', 'RPS19', 'RPL24', 'RPL6', 'RPS8', 'RPS20', 'RPS10', 'RPL32', 'CLEC2D', 'ZFP36L2', 'SNHG32', 'INPP4B', 'RPLP2', 'RPS23', 'RPL17', 'CCR7', 'EEF1A1', 'CD8B', 'RPL4', 'ATM', 'TMEM123', 'RPS4X', 'SATB1', 'COMMD6', 'CD5', 'ATP6V0E2', 'RACK1', 'PLEKHB1', 'TLE5', 'NPM1', 'RPL31', 'AAK1', 'LINC00861', 'TXK', 'S100B', 'OXNAD1', 'RPS13', 'TRBC2', 'PCSK1N', 'SELENOM', 'FHIT', 'ITK', 'STMN3', 'RPL5', 'RPS3A', 'RPL27', 'CD3E', 'RPL36A', 'TESPA1', 'CD27', 'SARAF', 'GYPC', 'RPS5', 'TRBC1', 'ABLIM1', 'LEF1', 'SUSD3', 'FYB1', 'AP3M2', 'CD3G', 'TSC22D3', 'CD48', 'DNAJB1', 'RPS18', 'EIF3E', 'LDHB']
sc.tl.score_genes(adata, gene_list, score_name='T_NK_cells_CD4_Naive_T_cell') 
#T_NK_cells_CD56bright_NK_cell
gene_list=['FEZ1', 'ID2', 'GSN', 'TGFA', 'MYL12A', 'RAC2', 'KLRF1', 'PILRB', 'BHLHE40', 'ADGRE5', 'FYN', 'DOK2', 'CAPN12', 'PDGFD', 'MYO1F', 'GNLY', 'B2M', 'GPR68', 'TYROBP', 'RGS3', 'PFN1', 'KRT86', 'ARHGAP9', 'DUSP2', 'BTG1', 'SIGLEC7', 'KIR2DL4', 'MAP3K8', 'XCL2', 'ITGB2', 'KLRB1', 'RNF165', 'UBB', 'CD2', 'GZMK', 'SYTL3', 'XCL1', 'LAT2', 'APMAP', 'TNFSF11', 'EIF3G', 'DDIT4', 'CCL4', 'ADAM8', 'PDE6G', 'RIN3', 'TNFRSF18', 'CD244', 'CD160', 'CXCR3', 'IFITM1', 'PAXX', 'EVL', 'HOPX', 'RHOC', 'CLDND1', 'FGR', 'BIN2', 'SELL', 'SH2D1A', 'PRKCH', 'ACAP1', 'WIPF1', 'GATA3', 'KLRC2', 'RAMP1', 'ZMAT4', 'IL18RAP', 'BST2', 'PLAC8', 'MBP', 'ABCB1', 'APOBEC3G', 'GSTP1', 'GZMM', 'CD7', 'NCALD', 'CDHR1', 'PLA2G6', 'ZFP36', 'IL2RB', 'TRDC', 'TPST2', 'IL12RB2', 'MMP23B', 'CMC1', 'CCNJL', 'PRSS33', 'JAK1', 'CTSW', 'RUNX3', 'FAM43A', 'IL18R1', 'COX6A2', 'SRGN', 'PRF1', 'H3-3B', 'MYBL1', 'FASLG', 'NCR3', 'ATP8B4', 'KLRD1', 'AGK', 'FUT7', 'HCST', 'CCL25', 'ARPC5L', 'TBX21', 'TIGIT', 'LIF', 'NCR1', 'IRF1', 'LINC00996', 'CXCR6', 'LPCAT1', 'CD63', 'HLA-E', 'MIB2', 'KRT81', 'OSTF1', 'TMIGD2', 'IGFBP4', 'GPR65', 'HSH2D', 'LY6E', 'CCL5', 'CLIC3', 'NCAM1', 'SUN2', 'LDB2', 'TNFRSF11A', 'EOMES', 'NKG7', 'SPTSSB', 'DMKN', 'AOAH', 'TOX', 'CCL3', 'SH2D1B', 'ADGRG3', 'MATK', 'PTGDR', 'FCER1G', 'B3GNT7', 'TOX2', 'CLNK', 'YPEL1', 'SPRY2', 'GFOD1', 'TRGC1', 'HLA-A', 'CD300A', 'STAT4', 'GZMB', 'FHL3', 'CD247', 'STK17A', 'PIP4K2A', 'MAFF', 'IFITM3', 'TXK', 'SLFN13', 'AREG', 'PYHIN1', 'KLRC1', 'DRAP1', 'SAMD3', 'TNFSF14', 'CHST12', 'IER2', 'ABHD15', 'PIK3R1', 'IL2RG', 'DLL1', 'ZAP70', 'HLA-B', 'CST7', 'MMP25-AS1', 'NFKBIA', 'NAALADL1', 'GZMA', 'TRGV9', 'SEPTIN7', 'CD96', 'SKAP1', 'ZBTB16', 'PPP1R9A', 'TMSB4X', 'HLA-C', 'BCO2', 'IFITM2', 'MCTP2']
sc.tl.score_genes(adata, gene_list, score_name='T_NK_cells_CD56bright_NK_cell') 
#T_NK_cells_CD56dim_NK_cell
gene_list=['FEZ1', 'ID2', 'MYL12A', 'RAC2', 'KLRF1', 'ADGRG1', 'CX3CR1', 'TGFBR3', 'KLRG1', 'DHRS7', 'DOK2', 'PDGFD', 'CAPN12', 'SH2D2A', 'MYO1F', 'GNLY', 'B2M', 'TYROBP', 'RGS3', 'PFN1', 'DUSP2', 'IRF1-AS1', 'SIGLEC7', 'MAP3K8', 'XCL2', 'ITGB2', 'KLRB1', 'RNF165', 'MALAT1', 'UBB', 'C12orf75', 'SYTL3', 'UBE2F', 'PTGDS', 'AKNA', 'APMAP', 'CYBA', 'DDIT4', 'CCL4', 'ADAM8', 'ABHD17A', 'TNFRSF18', 'IGFBP7', 'AKR1C3', 'CD160', 'CD244', 'IFITM1', 'PAXX', 'CCL4L2', 'LITAF', 'NMUR1', 'EVL', 'HOPX', 'GZMH', 'RHOC', 'KIR2DL1', 'FGR', 'HAVCR2', 'BIN2', 'LYAR', 'LAIR2', 'PRKCH', 'SLAMF7', 'WIPF1', 'PLEKHG3', 'KLRC2', 'IL18RAP', 'CEP78', 'OSBPL5', 'GTF3C1', 'PLAC8', 'SYNE1', 'APOBEC3G', 'GZMM', 'CD7', 'ALOX5AP', 'NCALD', 'ZBP1', 'CDC42SE1', 'C1orf21', 'ASCL2', 'IL2RB', 'TRDC', 'TPST2', 'MMP23B', 'GNG2', 'PPP1R18', 'CMC1', 'DTHD1', 'MYOM2', 'PLAAT4', 'CTSW', 'JAK1', 'RUNX3', 'FCGR3A', 'LLGL2', 'SRGN', 'PRF1', 'MYBL1', 'FASLG', 'CYRIB', 'ERBB2', 'NCR3', 'ABI3', 'SLC1A7', 'KLRD1', 'SYTL1', 'HCST', 'TBX21', 'CD99', 'PLAAT3', 'NCR1', 'IRF1', 'GK5', 'S1PR5', 'LPCAT1', 'CD63', 'HLA-E', 'SYNE2', 'MIB2', 'OSTF1', 'TMIGD2', 'IGF2R', 'GPR65', 'HSH2D', 'BNC2', 'PRSS23', 'CCL5', 'CLIC3', 'VIPR2', 'CHST2', 'NCAM1', 'SUN2', 'LGALS9C', 'PRR5L', 'NKG7', 'AOAH', 'KIR2DL3', 'CCL3', 'SCLT1', 'SH2D1B', 'MATK', 'PTGDR', 'FCER1G', 'B3GNT7', 'YPEL1', 'KIR3DL1', 'FLNA', 'KIR3DL2', 'RAP1B', 'MAPK1', 'TRGC1', 'GFOD1', 'LIM2', 'HLA-A', 'EFHD2', 'ARL4C', 'CD300A', 'STAT4', 'GZMB', 'ITGAL', 'FHL3', 'CD247', 'FCRL6', 'PIP4K2A', 'TTC38', 'IFITM3', 'GNGT2', 'TXK', 'ARPC2', 'CDK2AP2', 'PYHIN1', 'KLRC1', 'SAMD3', 'CHST12', 'PLEK', 'KIFC3', 'PLEKHF1', 'PTPN4', 'FGFBP2', 'IL2RG', 'BOK', 'ZAP70', 'HLA-B', 'CST7', 'TRBC1', 'GZMA', 'TRGV9', 'LGALS9B', 'SPON2', 'ZBTB16', 'TMSB4X', 'HLA-C', 'CTSC', 'IFITM2']
sc.tl.score_genes(adata, gene_list, score_name='T_NK_cells_CD56dim_NK_cell') 
#T_NK_cells_CD8_Effector_T_cell
gene_list=['ID2', 'RNF125', 'LEPROTL1', 'LAT', 'MPZL3', 'PARP8', 'DDX24', 'ADGRE5', 'GABARAPL1', 'KLRG1', 'FYN', 'SH2D2A', 'MYO1F', 'B2M', 'CSRNP1', 'MAF', 'DUSP2', 'BTG1', 'CLEC2B', 'RPS3', 'TRGC2', 'KLRB1', 'XCL2', 'VCAM1', 'MALAT1', 'UBB', 'CD2', 'GZMK', 'C12orf75', 'SYTL3', 'JAKMIP1', 'CRTAM', 'XCL1', 'S100A4', 'PTPN22', 'DNAJB6', 'PPP2R2B', 'MT-CYB', 'PPP2R5C', 'DDIT4', 'CCL4', 'TNFRSF9', 'IL32', 'CD6', 'CD160', 'TRAC', 'CXCR3', 'LAG3', 'NR4A2', 'CCNH', 'CCL4L2', 'LITAF', 'UBC', 'IFRD1', 'GBP5', 'DKK3', 'PMAIP1', 'HOPX', 'DUSP4', 'JUNB', 'F2R', 'LYAR', 'SH2D1A', 'PRKCH', 'ACAP1', 'WIPF1', 'GATA3', 'EIF1', 'HSPA5', 'ATG2A', 'PCAT29', 'MT2A', 'RPS27', 'CD3D', 'GZMM', 'CD7', 'CCL3L3', 'ITM2A', 'RPS29', 'ZFP36', 'TNFAIP3', 'IL7R', 'ATXN1', 'LCK', 'PTGER4', 'JMJD6', 'JUN', 'CALM1', 'CMC1', 'DTHD1', 'PLAAT4', 'CTSW', 'RORA', 'SPOCK2', 'RUNX3', 'SRGN', 'CBLB', 'TSPYL2', 'H3-3B', 'CD69', 'HERPUD2', 'RGS1', 'KLRD1', 'CDC42SE2', 'HCST', 'CREM', 'TIGIT', 'CD99', 'SBDS', 'PLAAT3', 'IRF1', 'WHRN', 'YPEL5', 'CXCR6', 'HLA-E', 'MT-ND2', 'SLA', 'IFNG', 'SYNE2', 'DUSP1', 'SRSF7', 'GUK1', 'IDS', 'CCL5', 'VIPR2', 'ZFP36L2', 'PDCD4', 'PITPNC1', 'EOMES', 'NKG7', 'ZC3H12A', 'PDCD1', 'LINC00987', 'GADD45B', 'TOX', 'CCL3', 'MT1E', 'EML4', 'MATK', 'SELENOK', 'CEMIP2', 'TUBA4A', 'CD8B', 'TRGC1', 'PHF1', 'HLA-A', 'ARL4C', 'STAT4', 'FCRL6', 'HLA-F', 'STK17A', 'RNF19A', 'TC2N', 'MAFF', 'TNIP3', 'SLC4A10', 'TPRG1', 'FKBP11', 'A2M-AS1', 'PYHIN1', 'TRBC2', 'CD8A', 'SELENOM', 'SAMD3', 'PERP', 'CXCR4', 'IER2', 'GPR171', 'CD3E', 'PRDM1', 'PIK3R1', 'IDI1', 'IL2RG', 'SARAF', 'HLA-B', 'FAM177A1', 'LINC-PINT', 'CST7', 'TRBC1', 'CNOT6L', 'GZMA', 'CD96', 'CD3G', 'TSC22D3', 'PTPN7', 'P2RY8', 'TMSB4X', 'PTPRC', 'ZNF331', 'DNAJB1', 'TENT5C', 'HLA-C', 'ARAP2']
sc.tl.score_genes(adata, gene_list, score_name='T_NK_cells_CD8_Effector_T_cell') 
#T_NK_cells_CD8_Naive_T_cell
gene_list=['RASAL3', 'ID2', 'RNF125', 'ZNF683', 'LAT', 'ETS1', 'MYL12A', 'GIMAP4', 'ARHGAP30', 'RAC2', 'KLRF1', 'MIAT', 'ADGRG1', 'TGFBR3', 'KLRG1', 'TGFB1', 'DHRS7', 'FYN', 'DOK2', 'SH2D2A', 'MYO1F', 'GNLY', 'B2M', 'CD53', 'GIMAP7', 'ARHGAP9', 'DUSP2', 'IRF1-AS1', 'CLEC2B', 'XCL2', 'ITGB2', 'KLRB1', 'TRGC2', 'C12orf75', 'CD2', 'ITGB1', 'SYTL3', 'MYL12B', 'AKNA', 'PCSK7', 'APMAP', 'DSTN', 'ITGB7', 'PPP2R5C', 'DDIT4', 'CCL4', 'ABHD17A', 'IL32', 'CD160', 'TRAC', 'IFITM1', 'PAXX', 'LAG3', 'ANXA6', 'LITAF', 'EVL', 'FAM107B', 'HOPX', 'GZMH', 'RHOC', 'LSP1', 'BIN2', 'LYAR', 'SH2D1A', 'PRKCH', 'ACAP1', 'LAIR2', 'WIPF1', 'KLRC2', 'RNF213', 'CEP78', 'MT2A', 'PLAC8', 'BIN1', 'SYNE1', 'MBP', 'CD3D', 'APOBEC3G', 'GZMM', 'PRMT2', 'TRAF3IP3', 'CD7', 'ZBP1', 'TNFAIP3', 'ANXA1', 'LCK', 'C1orf21', 'IL2RB', 'TRDC', 'TPST2', 'CALM1', 'MMP23B', 'GNG2', 'CMC1', 'PLAAT4', 'CTSW', 'JAK1', 'TBC1D10C', 'RORA', 'SPOCK2', 'RUNX3', 'FCGR3A', 'PRF1', 'MYBL1', 'SEPTIN1', 'NCR3', 'ABI3', 'KLRD1', 'SYTL1', 'HCST', 'ARPC5L', 'TBX21', 'CD99', 'BTN3A2', 'NCR1', 'IRF1', 'S1PR5', 'HSPA8', 'HLA-E', 'IFNG', 'SYNE2', 'MIB2', 'OSTF1', 'GPR65', 'LY6E', 'PRSS23', 'CCL5', 'CLIC3', 'CLEC2D', 'SUN2', 'NKG7', 'TSEN54', 'STK4', 'SH2D1B', 'EML4', 'MATK', 'CEMIP2', 'PTGDR', 'FLNA', 'PSMB9', 'KIR3DL2', 'RAP1B', 'POLR2J3', 'CD8B', 'TRGC1', 'EFHD2', 'ARL6IP5', 'ARL4C', 'STAT4', 'GZMB', 'ITGAL', 'SIGIRR', 'CD247', 'ATM', 'FCRL6', 'HLA-F', 'STK17A', 'PIP4K2A', 'RASGRP1', 'TTC38', 'TLE5', 'AAK1', 'LINC00861', 'ARPC2', 'TXK', 'FKBP11', 'PYHIN1', 'TRBC2', 'CD8A', 'KLRC1', 'SAMD3', 'CHST12', 'CD3E', 'PLEKHF1', 'CD226', 'PTPN4', 'PIK3R1', 'FGFBP2', 'IL2RG', 'ZAP70', 'CST7', 'TRBC1', 'GZMA', 'FYB1', 'SEPTIN7', 'CD3G', 'SPON2', 'SKAP1', 'CD96', 'PTPN7', 'OPTN', 'PTPRC', 'CD48', 'HLA-C', 'IFITM2']
sc.tl.score_genes(adata, gene_list, score_name='T_NK_cells_CD8_Naive_T_cell') 
#T_NK_cells_NKT_cell
gene_list=['ID2', 'ZNF683', 'RGS9', 'LAT', 'TRG-AS1', 'MYL12A', 'MIAT', 'KLRF1', 'ADGRG1', 'PROK2', 'S100A10', 'TGFBR3', 'BHLHE40', 'KLRG1', 'FYN', 'DOK2', 'SH2D2A', 'MYO1F', 'GNLY', 'B2M', 'MRPL10', 'MAF', 'PFN1', 'DUSP2', 'CD320', 'IRF1-AS1', 'CLEC2B', 'XCL2', 'ITGB2', 'KLRB1', 'TRGC2', 'MALAT1', 'UBB', 'SH3BGRL3', 'C12orf75', 'CD2', 'ITGB1', 'SYTL3', 'JAKMIP1', 'NUAK1', 'S100A6', 'S100A4', 'COL6A2', 'MYL12B', 'AKNA', 'APMAP', 'DSTN', 'ITGB7', 'PPP2R5C', 'PPP2R2B', 'CCL4', 'IL32', 'CD6', 'TRAC', 'IL12RB1', 'IFITM1', 'PAXX', 'LAG3', 'CCL4L2', 'LITAF', 'GBP5', 'HOPX', 'GZMH', 'RHOC', 'F2R', 'BIN2', 'LYAR', 'SH2D1A', 'PRKCH', 'LAIR2', 'WIPF1', 'TSPAN2', 'KLRC2', 'ATG2A', 'ADRB2', 'CEP78', 'MT2A', 'GTF3C1', 'SYNE1', 'CD3D', 'APOBEC3G', 'GZMM', 'CD52', 'NCALD', 'ZBP1', 'ANXA1', 'LCK', 'C1orf21', 'ASCL2', 'TRDC', 'TPST2', 'CALM1', 'GNG2', 'CMC1', 'MT-CO1', 'PLAAT4', 'CTSW', 'RORA', 'SPOCK2', 'RUNX3', 'FCGR3A', 'SRGN', 'PRF1', 'MYBL1', 'ZBTB38', 'SLC1A7', 'MSC', 'KLRD1', 'TTC16', 'HCST', 'ARPC5L', 'TBX21', 'CD99', 'PLAAT3', 'IRF1', 'WHRN', 'CCDC12', 'S1PR5', 'HLA-E', 'MYO6', 'IFNG', 'SYNE2', 'GUK1', 'CYTOR', 'RAB11FIP5', 'YWHAQ', 'GPR65', 'B3GAT1', 'PRSS23', 'CCL5', 'CLIC3', 'ZEB2', 'SUN2', 'EOMES', 'NKG7', 'TSEN54', 'KIR2DL3', 'MT1E', 'MATK', 'TUBA4A', 'FLNA', 'KIR3DL2', 'CD8B', 'LINC00944', 'TRGC1', 'PATL2', 'ENC1', 'HLA-A', 'EFHD2', 'ARL4C', 'ITGAL', 'STAT4', 'GZMB', 'CD247', 'SYTL2', 'FCRL6', 'HLA-F', 'TTC38', 'TPRG1', 'EPHX4', 'ARPC2', 'FKBP11', 'A2M-AS1', 'PYHIN1', 'TRBC2', 'CD8A', 'SAMD3', 'PERP', 'CHST12', 'LGALS1', 'CD3E', 'PLEKHF1', 'PRDM1', 'PIK3R1', 'RNF166', 'FGFBP2', 'IL2RG', 'ZAP70', 'HLA-B', 'RPS4Y1', 'CST7', 'TRBC1', 'OASL', 'APOBEC3H', 'GZMA', 'SEPTIN7', 'CD3G', 'SPON2', 'KIF19', 'PTPN7', 'TMSB4X', 'PTPRC', 'HLA-C', 'IFITM2']
sc.tl.score_genes(adata, gene_list, score_name='T_NK_cells_NKT_cell') 
#T_NK_cells_Regulatory_T_cell
gene_list=['PBXIP1', 'LEPROTL1', 'LAT', 'BIRC3', 'ETS1', 'GPSM3', 'RPS17', 'RPL19', 'RPS28', 'EEF2', 'S100A10', 'USP10', 'RPL9', 'ARHGDIB', 'CD44', 'RPS12', 'B2M', 'PAG1', 'RPL10A', 'VIM', 'GIMAP7', 'MAF', 'BTG1', 'SH3YL1', 'EEF1B2', 'ICOS', 'FBLN7', 'NOSIP', 'RPS3', 'RPS16', 'TPT1', 'RPL13', 'KLRB1', 'MAL', 'NEFL', 'PIK3IP1', 'TRAT1', 'TOMM7', 'CD2', 'RPS6', 'ITGB1', 'KLF2', 'ARID5B', 'BCL11B', 'CCL20', 'RPL14', 'S100A4', 'RPSA', 'FLT3LG', 'TRADD', 'DDIT4', 'DPP4', 'IL32', 'RPLP0', 'CD6', 'SOCS3', 'PLP2', 'TNFRSF18', 'TRAC', 'CD40LG', 'IFITM1', 'GPR183', 'RORC', 'PTPN13', 'EVL', 'RPL41', 'TSHZ2', 'TNFRSF25', 'ANKRD12', 'JUNB', 'CLDND1', 'ACAP1', 'CAMK4', 'CFAP36', 'NOP53', 'GATA3', 'RNASET2', 'RCAN3', 'AQP3', 'RPL11', 'RPS27', 'CD3D', 'RPS15A', 'HINT1', 'RPL23A', 'RPS27A', 'CD52', 'ITM2A', 'RPS29', 'RPL30', 'IL7R', 'ANXA1', 'RPL27A', 'TNFAIP3', 'RPS14', 'LCK', 'RPL34', 'SAMSN1', 'SIRPG', 'CALM1', 'RPL36AL', 'RPL35A', 'PLAAT4', 'RPS25', 'RORA', 'SPOCK2', 'RPL10', 'PASK', 'CRIP2', 'IL2RA', 'LTB', 'RPL39', 'CD69', 'SEPTIN1', 'CRIP1', 'CD28', 'CORO1B', 'RPL3', 'CREM', 'TCF7', 'CCR6', 'RPL13A', 'HSPA8', 'ITM2B', 'RPL38', 'ARHGAP15', 'SYNE2', 'MGAT4A', 'RPL36', 'RPS19', 'ODF2L', 'RPL6', 'RPS8', 'GPRIN3', 'RPS20', 'RPS10', 'FXYD5', 'RPL32', 'ZFP36L2', 'GSTK1', 'TTC39C-AS1', 'INPP4B', 'RPLP2', 'EML4', 'TNFAIP8', 'NPDC1', 'RPL17', 'EEF1A1', 'RGCC', 'HLA-A', 'ARL4C', 'RPL4', 'ISG20', 'TMEM123', 'RPS4X', 'TC2N', 'CD5', 'DUSP16', 'TLE5', 'NPM1', 'RPL31', 'AAK1', 'ZC3H12D', 'FKBP11', 'TRBC2', 'PERP', 'RPL5', 'CXCR4', 'RPL27', 'CD3E', 'PIK3R1', 'TTC39C', 'RPL36A', 'IL2RG', 'CD27', 'SARAF', 'TNFRSF4', 'TRBC1', 'NFKBIA', 'FYB1', 'AP3M2', 'CD96', 'CD3G', 'TSC22D3', 'SKAP1', 'TMSB4X', 'PTPRC', 'CD48', 'DNAJB1', 'RPS18', 'HLA-C', 'EIF3E', 'LDHB', 'HAPLN3']
sc.tl.score_genes(adata, gene_list, score_name='T_NK_cells_Regulatory_T_cell') 
#CTRL_Myeloid_Neutrophil
gene_list=['BASP1', 'LINC02596', 'CSGALNACT1', 'ICAM3', 'HSD11B1-AS1', 'PTGS2', 'NEAT1', 'SLC2A3', 'ZNF467', 'ARAP3', 'RIPOR2', 'PRDM8', 'RUBCNL', 'FCAR', 'G0S2', 'TREML2', 'C3orf86', 'ZFP36', 'PLXNC1', 'RASSF2', 'APOBEC3A', 'CSF2RB', 'TSPAN2', 'IL1B', 'ARHGAP15', 'PXN', 'CD55', 'PRKCB', 'PTEN', 'LITAF', 'CXCL8', 'RNF149', 'IFITM2', 'ACTB', 'DYSF', 'VNN3', 'CHI3L1', 'NFE2', 'OSM', 'CCR3', 'TNFAIP6', 'FFAR2', 'ADGRG3', 'PELI2', 'PFKFB3', 'CYRIB', 'GLT1D1', 'NEDD9', 'H2AC6', 'XPO6', 'FAM157A', 'COL18A1', 'TRAF3IP3', 'FFAR3', 'ARHGAP26', 'MCTP2', 'LINC01366', 'CFP', 'HCAR2', 'SAT1', 'VNN2', 'CYTH4', 'ST3GAL4', 'KCNJ2', 'DENND3', 'TAGAP', 'FOS', 'LCP2', 'BTNL8', 'EGR3', 'PADI4', 'MGAM', 'MXD1', 'SELL', 'RGS2', 'CYP4F3', 'ECE1', 'ACSL1', 'DOCK5', 'CEACAM3', 'SYNE2', 'CRISPLD2', 'TMEM154', 'ZFP36L1', 'SLC25A37', 'MNDA', 'ZEB1', 'HCAR3', 'PLEK', 'RNF24', 'ADM', 'LRRK2', 'PPP1R3B', 'CHSY1', 'DGAT2', 'IL18RAP', 'HRH2', 'LINC01506', 'PGGHG', 'PTPRC', 'FCGR3B', 'CPD', 'FAM86B3P', 'LIMK2', 'PREX1', 'TECPR2', 'FBXL13', 'LYN', 'AQP9', 'BCL6', 'MX2', 'SMCHD1', 'TNFSF14', 'S100A8', 'TLR2', 'CCNJL', 'TNFRSF10C', 'ST8SIA4', 'EPHB1', 'PROK2', 'MALAT1', 'BCL2A1', 'PGLYRP1', 'NIBAN1', 'BTG2', 'CAMK1D', 'SORL1', 'C4BPA', 'ANTXR2', 'SRGN', 'MT-RNR2', 'CXCR2', 'ALPL', 'CMTM2', 'ADAM8', 'SOCS3', 'FPR2', 'H3-3B', 'CSF3R', 'NAMPT', 'TMEM71', 'IL1RN', 'S100A9', 'IL1RAP', 'CXCL1', 'DUSP1', 'LUCAT1', 'CHST15', 'CREB5', 'EHD1', 'STEAP4', 'PDE4B', 'TNFRSF1B', 'LINC02218', 'TRPM6', 'CNTNAP3', 'CXCR1', 'CLDN9', 'NHSL2', 'IL1R2', 'MEFV', 'PELI1', 'CASS4', 'CDA', 'MMP25', 'S100A12', 'ISG20', 'KCNJ15', 'FPR1', 'MBOAT7', 'PHOSPHO1', 'NFKBIA']
sc.tl.score_genes(adata, gene_list, score_name='CTRL_Myeloid_Neutrophil') 

In [None]:
##Figure S1G
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
import pandas as pd

# Assuming 'adata' is your AnnData object

# List of all score names
score_names = [
    'pre_plasma', 'pro_B', 'immature_B',
    'B_mem', 'B_naive', 'plasma',
    'pre_B', 'erythropoietic_Basophilic_Erythroblast', 'erythropoietic_CD34_Pro_erythroblast',
    'erythropoietic_Erythrocyte', 'erythropoietic_Polychromatic_Erythroblast', 'hematopoetic_progenitors_CD34_HSC',
    'hematopoetic_progenitors_CD34_MEP_1', 'hematopoetic_progenitors_CD34_MEP_2', 'hematopoetic_progenitors_CD34_MPP',
    'megakaryocytic_CD34_Megakaryoblast', 'megakaryocytic_Megakaryocyte', 'megakaryocytic_Platelet',
    'myeloid_CD14_Monocyte', 'myeloid_CD16_Monocyte', 'myeloid_CD34_Monoblast',
    'myeloid_CD34_Myeloblast', 'myeloid_cDC', 'myeloid_Macs',
    'myeloid_pDC', 'myeloid_stromal', 'T_NK_cells_CD4_Naive_T_cell',
    'T_NK_cells_CD56bright_NK_cell', 'T_NK_cells_CD56dim_NK_cell', 'T_NK_cells_CD8_Effector_T_cell',
    'T_NK_cells_CD8_Naive_T_cell', 'T_NK_cells_NKT_cell', 'T_NK_cells_Regulatory_T_cell','CTRL_Myeloid_Neutrophil'

]

# Extract scores and sample-model information into a DataFrame
scores_sample_df = adata.obs[score_names + ['integrated_snn_res.5.1', 'cancer']]

# Aggregate scores by sample
# Here we use the mean, but you can choose other aggregation methods if needed
agg_scores = scores_sample_df.groupby('integrated_snn_res.5.1')[score_names].mean()

# Transpose the DataFrame so that scores are rows and samples are columns
agg_scores_transposed = agg_scores.T

# Perform row-wise scaling (z-score normalization)
scaled_agg_scores = pd.DataFrame(scale(agg_scores_transposed, axis=1), 
                                 index=agg_scores_transposed.index, 
                                 columns=agg_scores_transposed.columns)

# Get the ordered list of samples based on 'model' sorting
ordered_samples = scores_sample_df.drop_duplicates(subset=['integrated_snn_res.5.1']).sort_values(by='cancer')['integrated_snn_res.5.1']

# Reorder the columns of scaled_agg_scores based on the ordered_samples
scaled_agg_scores = scaled_agg_scores[ordered_samples]

# These values depend on the range of your data and the aspect you want to highlight
color_scale_min = -3  # Adjust this value
color_scale_max = 3  # Adjust this value


import seaborn as sns
import matplotlib.pyplot as plt

# Set the figure size for the clustermap
sns.set(rc={'figure.figsize':(15,5)})

# Create a clustermap with hierarchical clustering on both rows and columns
g = sns.clustermap(scaled_agg_scores, cmap='bwr', vmin=color_scale_min, vmax=color_scale_max,
                   xticklabels=ordered_samples, figsize=(30, 10), method='average', metric='euclidean')

# Set the title and rotate x-axis labels for better readability
plt.setp(g.ax_heatmap.get_xticklabels(), rotation=0)  # Rotate the x-axis labels
plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0)   # Rotate the y-axis labels if needed

# Customize the axis labels
g.ax_heatmap.set_title('Heatmap of Aggregated Gene Scores Clustered by Model')
g.ax_heatmap.set_xlabel('integrated_snn_res.5.1')
g.ax_heatmap.set_ylabel('Aggregated Gene Scores')

# Uncomment the line below to save the clustermap
#plt.savefig("AUCell_score_cellmarker_clustered_res.5.1.pdf", dpi=600, format='pdf', transparent=True)

plt.show()


In [None]:
##Figure S1H
# Define the reversed colormap
cmap = plt.cm.get_cmap('inferno_r')  # brown-white gradient

# Use rc_context to set the figure size, in this case, 4x4
with plt.rc_context({'figure.figsize': (4, 4)}):
    sc.pl.embedding(adata, 
                    basis='X_umap.rpca',  # Set basis to 'X_umap.rpca'
                    color=['TNFRSF11A','OSCAR','OCSTAMP',
                           'SIGLEC15','CTSK','ACP5','DCSTAMP','MMP9'],        # Feature to color the plot by
                    cmap=cmap, 
                    size=2, 
                    ncols=4, 
                    show=False  # Disable automatic display if saving later
                    # Uncomment the following line to save the plot
                    #,save='OC_feature_plot_updated.pdf'
                   )

# Display the plot
plt.show()

In [None]:
##preparing for Figure 6A-D
##subset by archetype
#subset_values = ["#41", "#21", "#48", "#12", "#18", "#29", "#35", "#42", "#44", "#47", "#50"]
subset_values = ["Mono"]
adata_subset = adata[adata.obs['archetyp'].isin(subset_values), :]
sc.tl.pca(adata_subset)

In [None]:
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.lines import Line2D
from scipy.spatial import ConvexHull
import itertools

# Assuming 'adata' is your Anndata object and it has 'tissue', 'celltype_C', and 'cancer.id' in .obs
# Ensure PCA has been performed, and PCA results are in adata.obsm['X_pca']

# Filter out specific cancer types if needed
excluded_cancer_ids = []  # Add other types to exclude if necessary
# Copy only the filtered data to avoid ImplicitModificationWarning
filtered_adata = adata_subset[~adata_subset.obs['cancer.id'].isin(excluded_cancer_ids)].copy()

# Extract the first two principal components
filtered_adata.obs['pca_1'] = filtered_adata.obsm['X_pca'][:, 0]
filtered_adata.obs['pca_2'] = filtered_adata.obsm['X_pca'][:, 1]

# Aggregate the PCA data by cancer ID and cell type (ignoring the sample)
agg_pca = (filtered_adata.obs.groupby(['cancer.id', 'celltype_C'], observed=False)
           .agg({'pca_1': 'mean', 'pca_2': 'mean'})
           .reset_index())

# List of marker shapes
shapes = ['o', 's', '^', 'D', 'p', '*', '+', 'x']  # Add more shapes as needed

# Create a cycle iterator for shapes
shape_cycle = itertools.cycle(shapes)

# Extract unique cancer types from the filtered data
unique_cancer_ids = filtered_adata.obs['cancer.id'].unique()

# Create a dictionary to assign a shape to each cancer type
cancer_id_markers = {cancer_id: next(shape_cycle) for cancer_id in unique_cancer_ids}

# Generate a color palette for the specified cell types
specified_celltypes = ["Mφ", "OC", "exhausting CD8 T", "CD8 Tex", "CD4 Treg", "Mono", "pro Mono"]
specified_colors = plt.get_cmap('Set2')(np.linspace(0, 1, len(specified_celltypes)))

# Map colors to specified cell types
celltype_color_map = {celltype: color for celltype, color in zip(specified_celltypes, specified_colors)}

# Assign light grey for unspecified cell types
light_grey = (0.8, 0.8, 0.8, 1.0)  # RGBA for light grey
for celltype in agg_pca['celltype_C'].unique():
    if celltype not in celltype_color_map:
        celltype_color_map[celltype] = light_grey

# Create the PCA plot
fig, ax = plt.subplots(figsize=(5.5, 4))

# Draw connecting lines and calculate area
celltype_areas = {}

for celltype in agg_pca['celltype_C'].unique():
    # Filter points for the current celltype
    celltype_points = agg_pca[agg_pca['celltype_C'] == celltype][['pca_1', 'pca_2']]
    
    # Drop rows with NaN values
    celltype_points = celltype_points.dropna()

    # Convert to numpy array
    celltype_points = celltype_points.values

    # Check if there are enough points to form a convex hull
    if len(celltype_points) >= 3:
        try:
            hull = ConvexHull(celltype_points)
            for simplex in hull.simplices:
                ax.plot(celltype_points[simplex, 0], celltype_points[simplex, 1], 
                        color=celltype_color_map[celltype])
            # Calculate area
            celltype_areas[celltype] = hull.volume
        except ValueError:
            # Handle the case where Convex Hull cannot be formed
            celltype_areas[celltype] = 0
    else:
        # Not enough points to form a convex hull, set area to zero
        celltype_areas[celltype] = 0

# Plot each cell type-cancer combination
for _, row in agg_pca.iterrows():
    ax.scatter(
        row['pca_1'],
        row['pca_2'],
        marker=cancer_id_markers[row['cancer.id']],
        color=celltype_color_map[row['celltype_C']],
        label=f"{row['cancer.id']}_{row['celltype_C']}"
    )

# Set labels and title
ax.set_xlabel('PCA 1')
ax.set_ylabel('PCA 2')
ax.set_title('PCA Plot by celltype_C and Cancer')

# Create a legend for cell types
celltype_patches = [Line2D([0], [0], marker='o', color='w', label=celltype,
                           markerfacecolor=color, markersize=5)
                    for celltype, color in celltype_color_map.items()]

# Create a legend for cancer types (use cancer_id_markers)
tissue_patches = [Line2D([0], [0], marker=marker, color='w', label=cancer_id,
                         markerfacecolor='grey', markersize=5)
                  for cancer_id, marker in cancer_id_markers.items()]

# Adjust legend placement
first_legend = ax.legend(handles=celltype_patches, title='Cell Type', bbox_to_anchor=(1.05, 1), loc='upper left')
ax.add_artist(first_legend)
ax.legend(handles=tissue_patches, title='Cancer ID', bbox_to_anchor=(1.05, 0.5), loc='center left')

plt.tight_layout()
#plt.savefig('/Users/fengshuoliu/Desktop/ctrl_pca_area.svg', dpi=300, transparent=True, bbox_inches='tight')
plt.show()

# Section break ----------------------------------------------------------------------

# Sorting celltype areas in descending order
sorted_celltype_areas = sorted(celltype_areas.items(), key=lambda x: x[1], reverse=True)

# Extracting labels and values
labels, areas = zip(*sorted_celltype_areas)

# Create the vertical bar plot for cell type areas
fig, ax = plt.subplots(figsize=(4, 7))  # Adjust the figure size as needed
bars = ax.barh(labels, areas, color=[celltype_color_map[label] for label in labels])
ax.set_xlabel('Area')
ax.set_title('Area of Each Cell Type in PCA Plot')

# Set the limit for the x-axis, adjust the max_x_value as needed
max_x_value = max(areas) * 1.1  # Set a bit larger than the max area
ax.set_xlim(0, 200)

# Invert the y-axis to have the largest bar on top
ax.invert_yaxis()

# Annotate each bar with the cell type name
for bar, label in zip(bars, labels):
    label_x_pos = bar.get_width() + max_x_value * 0.01  # Adjust this value to move the label to the right
    ax.text(label_x_pos, bar.get_y() + bar.get_height() / 2, label, va='center')

# Rotate x-axis labels to the bottom (default position)
plt.xticks(rotation=0)

# Remove y-axis labels (since they are redundant with bar annotations)
ax.set_yticklabels([])

# Show the plot
plt.tight_layout()
#plt.savefig('/Users/fengshuoliu/Desktop/ctrl_pca_barplot.svg', dpi=300, transparent=True, bbox_inches='tight')
plt.show()
