In [None]:
from platform import release
import sys
import os,csv,re
import pandas as pd
import numpy as np
import scanpy as sc
import math
from scipy.sparse import issparse
from scipy.sparse import csr_matrix
import random, torch
import warnings
warnings.filterwarnings("ignore")
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import cv2 #In order to read in image data, pip3 install opencv-python
from scipy.spatial import distance
import random
from utils import getDefaultColors


args = sys.argv
indata = args[1]
outdir = args[2]
os.chdir(outdir)
adata = sc.read_h5ad(indata)
adata = adata[adata.obs['annotated_cluster'].isin(['DCIS cells','IBC cells'])].copy()
adata.obs['annotated_cluster'] = adata.obs['annotated_cluster'].astype('category')

##calculate signature score(for Fig5b)
HALLMARK_OXIDATIVE_PHOSPHORYLATION = ["ABCB7","ACAA1","ACAA2","ACADM","ACADSB","ACADVL","ACAT1","ACO2","AFG3L2","AIFM1","ALAS1","ALDH6A1","ATP1B1","ATP5F1A","ATP5F1B","ATP5F1C","ATP5F1D","ATP5F1E","ATP5MC1","ATP5MC2","ATP5MC3","ATP5ME","ATP5MF","ATP5MG","ATP5PB","ATP5PD","ATP5PF","ATP5PO","ATP6AP1","ATP6V0B","ATP6V0C","ATP6V0E1","ATP6V1C1","ATP6V1D","ATP6V1E1","ATP6V1F","ATP6V1G1","ATP6V1H","BAX","BCKDHA","BDH2","CASP7","COX10","COX11","COX15","COX17","COX4I1","COX5A","COX5B","COX6A1","COX6B1","COX6C","COX7A2","COX7A2L","COX7B","COX7C","COX8A","CPT1A","CS","CYB5A","CYB5R3","CYC1","CYCS","DECR1","DLAT","DLD","DLST","ECH1","ECHS1","ECI1","ETFA","ETFB","ETFDH","FDX1","FH","FXN","GLUD1","GOT2","GPI","GPX4","GRPEL1","HADHA","HADHB","HCCS","HSD17B10","HSPA9","HTRA2","IDH1","IDH2","IDH3A","IDH3B","IDH3G","IMMT","ISCA1","ISCU","LDHA","LDHB","LRPPRC","MAOB","MDH1","MDH2","MFN2","MGST3","MPC1","MRPL11","MRPL15","MRPL34","MRPL35","MRPS11","MRPS12","MRPS15","MRPS22","MRPS30","MTRF1","MTRR","MTX2","NDUFA1","NDUFA2","NDUFA3","NDUFA4","NDUFA5","NDUFA6","NDUFA7","NDUFA8","NDUFA9","NDUFAB1","NDUFB1","NDUFB2","NDUFB3","NDUFB4","NDUFB5","NDUFB6","NDUFB7","NDUFB8","NDUFC1","NDUFC2","NDUFS1","NDUFS2","NDUFS3","NDUFS4","NDUFS6","NDUFS7","NDUFS8","NDUFV1","NDUFV2","NNT","NQO2","OAT","OGDH","OPA1","OXA1L","PDHA1","PDHB","PDHX","PDK4","PDP1","PHB2","PHYH","PMPCA","POLR2F","POR","PRDX3","RETSAT","RHOT1","RHOT2","SDHA","SDHB","SDHC","SDHD","SLC25A11","SLC25A12","SLC25A20","SLC25A3","SLC25A4","SLC25A5","SLC25A6","SUCLA2","SUCLG1","SUPV3L1","SURF1","TCIRG1","TIMM10","TIMM13","TIMM17A","TIMM50","TIMM8B","TIMM9","TOMM22","TOMM70","UQCR10","UQCR11","UQCRB","UQCRC1","UQCRC2","UQCRFS1","UQCRH","UQCRQ","VDAC1","VDAC2","VDAC3"]
HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION = ["ABI3BP","ACTA2","ADAM12","ANPEP","APLP1","AREG","BASP1","BDNF","BGN","BMP1","CADM1","CALD1","CALU","CAP2","CAPG","CCN1","CCN2","CD44","CD59","CDH11","CDH2","CDH6","COL11A1","COL12A1","COL16A1","COL1A1","COL1A2","COL3A1","COL4A1","COL4A2","COL5A1","COL5A2","COL5A3","COL6A2","COL6A3","COL7A1","COL8A2","COLGALT1","COMP","COPA","CRLF1","CTHRC1","CXCL1","CXCL12","CXCL6","CXCL8","DAB2","DCN","DKK1","DPYSL3","DST","ECM1","ECM2","EDIL3","EFEMP2","ELN","EMP3","ENO2","FAP","FAS","FBLN1","FBLN2","FBLN5","FBN1","FBN2","FERMT2","FGF2","FLNA","FMOD","FN1","FOXC2","FSTL1","FSTL3","FUCA1","FZD8","GADD45A","GADD45B","GAS1","GEM","GJA1","GLIPR1","GPC1","GPX7","GREM1","HTRA1","ID2","IGFBP2","IGFBP3","IGFBP4","IL15","IL32","IL6","INHBA","ITGA2","ITGA5","ITGAV","ITGB1","ITGB3","ITGB5","JUN","LAMA1","LAMA2","LAMA3","LAMC1","LAMC2","LGALS1","LOX","LOXL1","LOXL2","LRP1","LRRC15","LUM","MAGEE1","MATN2","MATN3","MCM7","MEST","MFAP5","MGP","MMP1","MMP14","MMP2","MMP3","MSX1","MXRA5","MYL9","MYLK","NID2","NNMT","NOTCH2","NT5E","NTM","OXTR","P3H1","PCOLCE","PCOLCE2","PDGFRB","PDLIM4","PFN2","PLAUR","PLOD1","PLOD2","PLOD3","PMEPA1","PMP22","POSTN","PPIB","PRRX1","PRSS2","PTHLH","PTX3","PVR","QSOX1","RGS4","RHOB","SAT1","SCG2","SDC1","SDC4","SERPINE1","SERPINE2","SERPINH1","SFRP1","SFRP4","SGCB","SGCD","SGCG","SLC6A8","SLIT2","SLIT3","SNAI2","SNTB1","SPARC","SPOCK1","SPP1","TAGLN","TFPI2","TGFB1","TGFBI","TGFBR3","TGM2","THBS1","THBS2","THY1","TIMP1","TIMP3","TNC","TNFAIP3","TNFRSF11B","TNFRSF12A","TPM1","TPM2","TPM4","VCAM1","VCAN","VEGFA","VEGFC","VIM","WIPF1","WNT5A"]
HALLMARK_GLYCOLYSIS = ["ABCB6","ADORA2B","AGL","AGRN","AK3","AK4","AKR1A1","ALDH7A1","ALDH9A1","ALDOA","ALDOB","ALG1","ANG","ANGPTL4","ANKZF1","ARPP19","ARTN","AURKA","B3GALT6","B3GAT1","B3GAT3","B3GNT3","B4GALT1","B4GALT2","B4GALT4","B4GALT7","BIK","BPNT1","CACNA1H","CAPN5","CASP6","CD44","CDK1","CENPA","CHPF","CHPF2","CHST1","CHST12","CHST2","CHST4","CHST6","CITED2","CLDN3","CLDN9","CLN6","COG2","COL5A1","COPB2","CTH","CXCR4","CYB5A","DCN","DDIT4","DEPDC1","DLD","DPYSL4","DSC2","ECD","EFNA3","EGFR","EGLN3","ELF3","ENO1","ENO2","ERO1A","EXT1","EXT2","FAM162A","FBP2","FKBP4","FUT8","G6PD","GAL3ST1","GALE","GALK1","GALK2","GAPDHS","GCLC","GFPT1","GFUS","GLCE","GLRX","GMPPA","GMPPB","GNE","GNPDA1","GOT1","GOT2","GPC1","GPC3","GPC4","GPR87","GUSB","GYS1","GYS2","HAX1","HDLBP","HK2","HMMR","HOMER1","HS2ST1","HS6ST2","HSPA5","IDH1","IDUA","IER3","IGFBP3","IL13RA1","IRS2","ISG20","KDELR3","KIF20A","KIF2A","LCT","LDHA","LDHC","LHPP","LHX9","MDH1","MDH2","ME1","ME2","MED24","MERTK","MET","MIF","MIOX","MPI","MXI1","NANP","NASP","NDST3","NDUFV3","NOL3","NSDHL","NT5E","P4HA1","P4HA2","PAM","PAXIP1","PC","PDK3","PFKFB1","PFKP","PGAM1","PGAM2","PGK1","PGLS","PGM2","PHKA2","PKM","PKP2","PLOD1","PLOD2","PMM2","POLR3K","PPFIA4","PPIA","PPP2CB","PRPS1","PSMC4","PYGB","PYGL","QSOX1","RARS1","RBCK1","RPE","RRAGD","SAP30","SDC1","SDC2","SDC3","SDHC","SLC16A3","SLC25A10","SLC25A13","SLC35A3","SLC37A4","SOD1","SOX9","SPAG4","SRD5A3","STC1","STC2","STMN1","TALDO1","TFF3","TGFA","TGFBI","TKTL1","TPBG","TPI1","TPST1","TXN","UGP2","VCAN","VEGFA","VLDLR","XYLT2","ZNF292"]
HALLMARK_HYPOXIA = ["ACKR3","ADM","ADORA2B","AK4","AKAP12","ALDOA","ALDOB","ALDOC","AMPD3","ANGPTL4","ANKZF1","ANXA2","ATF3","ATP7A","B3GALT6","B4GALNT2","BCAN","BCL2","BGN","BHLHE40","BNIP3L","BRS3","BTG1","CA12","CASP6","CAV1","CAVIN1","CAVIN3","CCN1","CCN2","CCN5","CCNG2","CDKN1A","CDKN1B","CDKN1C","CHST2","CHST3","CITED2","COL5A1","CP","CSRP2","CXCR4","DCN","DDIT3","DDIT4","DPYSL4","DTNA","DUSP1","EDN2","EFNA1","EFNA3","EGFR","ENO1","ENO2","ENO3","ERO1A","ERRFI1","ETS1","EXT1","F3","FAM162A","FBP1","FOS","FOSL2","FOXO3","GAA","GALK1","GAPDH","GAPDHS","GBE1","GCK","GCNT2","GLRX","GPC1","GPC3","GPC4","GPI","GRHPR","GYS1","HAS1","HDLBP","HEXA","HK1","HK2","HMOX1","HOXB9","HS3ST1","HSPA5","IDS","IER3","IGFBP1","IGFBP3","IL6","ILVBL","INHA","IRS2","ISG20","JMJD6","JUN","KDELR3","KDM3A","KIF5A","KLF6","KLF7","KLHL24","LALBA","LARGE1","LDHA","LDHC","LOX","LXN","MAFF","MAP3K1","MIF","MT1E","MT2A","MXI1","MYH9","NAGK","NCAN","NDRG1","NDST1","NDST2","NEDD4L","NFIL3","NOCT","NR3C1","P4HA1","P4HA2","PAM","PCK1","PDGFB","PDK1","PDK3","PFKFB3","PFKL","PFKP","PGAM2","PGF","PGK1","PGM1","PGM2","PHKG1","PIM1","PKLR","PKP1","PLAC8","PLAUR","PLIN2","PNRC1","PPARGC1A","PPFIA4","PPP1R15A","PPP1R3C","PRDX5","PRKCA","PYGM","RBPJ","RORA","RRAGD","S100A4","SAP30","SCARB1","SDC2","SDC3","SDC4","SELENBP1","SERPINE1","SIAH2","SLC25A1","SLC2A1","SLC2A3","SLC2A5","SLC37A4","SLC6A6","SRPX","STBD1","STC1","STC2","SULT2B1","TES","TGFB3","TGFBI","TGM2","TIPARP","TKTL1","TMEM45A","TNFAIP3","TPBG","TPD52","TPI1","TPST2","UGP2","VEGFA","VHL","VLDLR","WSB1","XPNPEP1","ZFP36","ZNF292"]
sc.tl.score_genes(adata, HALLMARK_OXIDATIVE_PHOSPHORYLATION, score_name='Oxidative_phosphorylation')
sc.tl.score_genes(adata, HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION, score_name='EMT')
sc.tl.score_genes(adata, HALLMARK_GLYCOLYSIS, score_name='Glycolysis')
sc.tl.score_genes(adata, HALLMARK_HYPOXIA, score_name='Hypoxia')

adata.write_h5ad(outdir+"cellbin_raw_score.h5ad")
adata.obs.to_csv("cellbin_raw_metadata_score.txt",sep = '\t')

In [None]:
##plot signature score(for Fig5b)

options(stringsAsFactors = FALSE)
library(tidyverse)
library(Matrix)
library(cowplot)
library(ggplot2)
library(Cairo)
library(ggpubr)
options(bitmapType = "cairo")

outdir = './Fig4/'
metadata <- read.table('cellbin_raw_metadata_score.txt', sep='\t', header = TRUE)
#head(metadata)

metadata %>%
  #filter(annotated_cluster != 'Immune') %>%
  group_by(annotated_cluster) %>%
  summarize(
    Q3 = quantile(Oxidative_phosphorylation, prob = 0.75),
    median = median(Oxidative_phosphorylation),
    mean = mean(Oxidative_phosphorylation),
    Q1 = quantile(Oxidative_phosphorylation, prob = 0.25)
  )
#-------------------------------- Violin + box plot--------------------------------
p <- ggviolin(metadata[metadata$annotated_cluster!='Immune',], x = "annotated_cluster", y = "Oxidative_phosphorylation", fill="annotated_cluster",
              alpha=1,legend = "top",legend.title = "Cell type",ylim=c(-0.1,0.6),
              color = "annotated_cluster", palette = "jco",add  =  "boxplot", add.params  = list (fill  =  "white"), 
              size=0.5, font.tickslab = c(15,"plain","black"), font.legend = c(12, "plain", "black"), font.x = 18, font.y = 18, width = 0.8)+
  xlab("Cell Types")+ylab("Oxidative phosphorylation score")

p2 = p + stat_compare_means(aes(group = annotated_cluster), method = "t.test")
pdf(file = paste0(outdir, "/Oxidative_phosphorylation_score_boxplot.pdf"), width = 5, height=5)
print(p2)
dev.off()

metadata <- read.table('cellbin_raw_metadata_score.txt', sep='\t', header = TRUE)
#head(metadata)

metadata %>%
  #filter(annotated_cluster != 'Immune') %>%
  group_by(annotated_cluster) %>%
  summarize(
    Q3 = quantile(EMT, prob = 0.75),
    median = median(EMT),
    mean = mean(EMT),
    Q1 = quantile(EMT, prob = 0.25)
  )
#-------------------------------- Violin + box plot--------------------------------
p <- ggviolin(metadata[metadata$annotated_cluster!='Immune',], x = "annotated_cluster", y = "EMT", fill="annotated_cluster",
              alpha=1,legend = "top",legend.title = "Cell type",ylim=c(-0.1,0.4),
              color = "annotated_cluster", palette = "jco",add  =  "boxplot", add.params  = list (fill  =  "white"),
              size=0.5, font.tickslab = c(15,"plain","black"), font.legend = c(12, "plain", "black"), font.x = 18, font.y = 18, width = 0.8)+
  xlab("Cell Types")+ylab("EMT score")

p2 = p + stat_compare_means(aes(group = annotated_cluster), method = "t.test")
pdf(file = paste0(outdir, "/EMT_score_boxplot.pdf"), width = 5, height=5)
print(p2)
dev.off()

metadata <- read.table('cellbin_raw_metadata_score.txt', sep='\t', header = TRUE)
#head(metadata)

metadata %>%
  #filter(annotated_cluster != 'Immune') %>%
  group_by(annotated_cluster) %>%
  summarize(
    Q3 = quantile(Glycolysis, prob = 0.75),
    median = median(Glycolysis),
    mean = mean(Glycolysis),
    Q1 = quantile(Glycolysis, prob = 0.25)
  )