In [None]:
library(tidyr)
library(dplyr)
library(ggplot2)
library(grid)
library(tibble)
library(tximport)
library(enrichplot)
library(RColorBrewer)

### figure 2c

In [None]:
proportion <- read.csv("./100samples_proportion.csv",header=1,row.names=1)


In [None]:
proportion_df <- as.data.frame(proportion)

proportion_df$CellType <- rownames(proportion_df)
proportion_df$CellType <- factor(proportion_df$CellType, level=c('Malignant', 'Fibroblast', 'NK', 'T', 'B', 'Neutrophil', 'Macrophage', 'DC', 'Mast', 'Endothelial', 'Others'))

proportion_long_data <- pivot_longer(proportion_df, 
                          cols = -CellType, 
                          names_to = "Sample", 
                          values_to = "Proportion")
proportion_long_data$Sample <- factor(proportion_long_data$Sample, levels = colnames(proportion_df))
proportion_long_data <- proportion_long_data %>%
  arrange(Sample)

colors <- c("NK"="#CDCE6B", "T"="#FFFF99", "B"="#00CC99", "Neutrophil"="#3366CC", "Macrophage"="#990066", "DC"="#FF9999", "Mast"="#6699FF", "Endothelial"="#FF3366", "Fibroblast"="#008080", "Malignant/Epithelial"="#0D5886", "Others"="lightgray")

cell_proportion <- ggplot(proportion_long_data, aes(x = Sample, y = Proportion, fill = CellType)) +
  geom_bar(stat = "identity", position = "stack", width = 0.9) +
  scale_fill_manual(values = colors) +
  labs(title = NULL,
       x = NULL,
       y = NULL,
       fill = "Cell Type") +
  theme_minimal() +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position = "none")


In [None]:
count <- read.csv("./100samples_cell_count.csv",header=1,row.names=1)


In [None]:
count$CellType <- rownames(count)
count$CellType <- factor(count$CellType, level=c('Malignant', 'Fibroblast', 'NK', 'T', 'B', 'Neutrophil', 'Macrophage', 'DC', 'Mast', 'Endothelial', 'Others'))

count_long_data <- pivot_longer(count, 
                          cols = -CellType, 
                          names_to = "Sample",
                          values_to = "Proportion")

count_long_data$Sample <- factor(count_long_data$Sample, levels = colnames(count))
count_long_data <- count_long_data %>%
  arrange(Sample)

colors <- c("NK"="#CDCE6B", "T"="#FFFF99", "B"="#00CC99", "Neutrophil"="#3366CC", "Macrophage"="#990066", "DC"="#FF9999", "Mast"="#6699FF", "Endothelial"="#FF3366", "Fibroblast"="#008080", "Malignant/Epithelial"="#0D5886", "Others"="lightgray")

cell_count <- ggplot(count_long_data, aes(x = Sample, y = Proportion, fill = CellType)) +
  geom_bar(stat = "identity", position = "stack", width = 0.9) +
  scale_fill_manual(values = colors) +
  labs(title = NULL,
       x = NULL,
       y = NULL,
       fill = "Cell Type") +
  theme_minimal() +
  theme(axis.text.x = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position = "none")


In [None]:
clinical_data <- read.csv("./HD100_clinical_info.csv",row.names=1)


In [None]:
clinical_data <- clini_df[,c("Pathology_ID","Tumor_Type","Tumor_SubType","Age_state","Recurrence_Status","purity_state","Tumor_Grade","Tumor_Stage","MSI_state")]
row.names(clinical_data) <- clinical_data$Pathology_ID
clinical_data$Pathology_ID <- NULL

clinical_data[is.na(clinical_data)] <- "Unknown"
clinical_data[clinical_data==""] <- "Unknown"

top_annotations <- HeatmapAnnotation(
  df = clinical_data,
  col = list(
    Tumor_Type = c("OC" = "#ab526a", "EC" = "#d68081", "CC"="#e46b72", 'Unknown' = "#dcdcdc"),
    Tumor_SubType = c("Mucinous" = "#b1bcf4", "Serous" = "#65a56d", "Clear cell" = "#f4e88d", "Squ"="#f85adb", "Ade"="#a94684", "Ade-Squ"="#ff1e16", "Endometrial" = "#eab768", "Sarcoma" = "#cf5b5d",'Others' = "#dcdcdc"),
    Age_state = c("High" = "#d87559", "Medium" = "#e6a76c", "Low" = "#e3c577", 'Unknown' = "#dcdcdc"),
    Recurrence_Status = c("1" = "#785190", "0" = "#6f87bb", 'Unknown' = "#dcdcdc"),
    purity_state = c("High" = "#345b7c", "Medium" = "#5896aa", "Low" = "#91cfe3", 'Unknown' = "#dcdcdc"),
    Tumor_Grade = c("1" = "#a1a5bf", "2" = "#656e96", "3" = "#475482", 'Unknown' = "#dcdcdc"),
    Tumor_Stage = c("I" = "#a6c692", "II" = "#92b17f", "III" = "#7e9d6b", "IV" = "#6b8959", 'Unknown' = "#dcdcdc"),
    MSI_state = c("MSI-low" = "#62b58f", "MSI-high" = "#e7cc5e", 'Unknown' = "#dcdcdc")
  ),
  annotation_name_side = "left",
  annotation_legend_param = list(
    Tumor_Type = list(title = "Purity", title_gp = gpar(fontsize = 13)),
    Tumor_SubType = list(title = "Tumor_Type", title_gp = gpar(fontsize = 13)),
    Age_state = list(title = "Age", title_gp = gpar(fontsize = 13)),
    Recurrence_Status = list(title = "Tumor_Stage", title_gp = gpar(fontsize = 13)),
    purity_state = list(title = "Tumor_Grade", title_gp = gpar(fontsize = 13)),
    Tumor_Grade = list(title = "Tumor_Grade", title_gp = gpar(fontsize = 13)),
    Tumor_Stage = list(title = "Tumor_Stage", title_gp = gpar(fontsize = 13)),
    MSI_state = list(title = "Tumor_Grade", title_gp = gpar(fontsize = 13))
  )
)

In [None]:
heatmap_with_annotations <- Heatmap(matrix(nrow = 0, ncol = 100),
                                    top_annotation = top_annotations,
                                    show_row_names = FALSE,
                                    show_column_names = FALSE)

annotation <- draw(heatmap_with_annotations, annotation_legend_side = "right")


### figure 2d

In [None]:
proportion <- read.csv("/home/xutian/projects/Spatial_OV/figures/figure2/cell_type_proportion/100samples_proportion.csv",header=1,row.names=1)
proportion <- proportion[, colnames(proportion) != "Others"]

proportion_t <- as_tibble(proportion, rownames = "sample")
proportion_long <- proportion_t %>%
  pivot_longer(
    cols = -sample, 
    names_to = "cell_type", 
    values_to = "proportion"
  )

proportion_long <- proportion_long %>%
  mutate(group = case_when(
    sample %in% OC_sample_id ~ "OC",
    sample %in% EC_sample_id ~ "EC",
    sample %in% CC_sample_id ~ "CC"
  ))

In [None]:
proportion_long$cell_type <- factor(proportion_long$cell_type, level=c('NK', 'T', 'B', 'DC', 'Mast', 'Neutrophil', 'Macrophage', 'Endothelial', 'Fibroblast', 'Malignant'))
proportion_long$group <- factor(proportion_long$group, levels=c("OC","EC","CC"))

p <- ggplot(proportion_long, aes(x = group, y = proportion, color = group, fill = group)) +
  geom_violin(alpha = 0.9, position = position_dodge(width = 0.7), trim = FALSE, color="grey90", linewidth = 0.5) +
  geom_boxplot(width = 0.1, color = "white", size = 0.5, outlier.shape = NA) +
  facet_wrap(~cell_type, nrow = 2, scales = "free_y", ncol = 5) +
  theme_minimal() +
  labs(title = "Cell Type Proportion", x = "", y = "Proportion") +
  scale_y_continuous(limits = c(-0.01, NA)) + 
  scale_color_manual(values = c("OC" = "#ab526a","EC" = "#d68081","CC" = "#2878b5")) +
  scale_fill_manual(values = c("OC" = "#ab526a","EC" = "#d68081","CC" = "#2878b5")) +
  theme(
      axis.text.x = element_blank(),
      axis.ticks.y = element_line(color = "black", linewidth = 0.8),
      axis.text.y = element_text(size = 14, color = "black"),
      panel.border = element_rect(color = "black", linewidth = 0.8, fill = NA),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      strip.text = element_text(size = 14, face = "bold")
  )

print(p)

### figure S3i

In [None]:
cell_proportion_df <- read.csv("./cancer_type_total_cell_proportion.csv", row.names=1)
cell_proportion_df <- t(cell_proportion_df)
cell_proportion_df <- as.data.frame(cell_proportion_df)
cell_proportion_df$Cancer_Type <- row.names(cell_proportion_df)

cell_proportion_long_df <- cell_proportion_df %>%
  pivot_longer(
    cols = -Cancer_Type, 
    names_to = "cell_type", 
    values_to = "Percentage"
  )

cell_proportion_long_df$Cancer_Type <- factor(cell_proportion_long_df$Cancer_Type, levels=c("OC_proportion","EC_proportion","CC_proportion"))
cell_proportion_long_df$cell_type <- factor(cell_proportion_long_df$cell_type, levels=c("Others","Fibroblast","Malignant","Macrophage","Endothelial","T","DC","B","Neutrophil","NK","Mast"))


In [None]:
options(repr.plot.width = 7, repr.plot.height = 5)

condition_colors <- c("NK"="#CDCE6B", "T"="#FFFF99", "B"="#00CC99", "Neutrophil"="#3366CC", "Macrophage"="#990066", "DC"="#FF9999", "Mast"="#6699FF", "Endothelial"="#FF3366", "Fibroblast"="#008080", "Malignant"="#0D5886", "Others"="#d3d3d3")

p <- ggplot(cell_proportion_long_df, aes(x = "", y = Percentage, fill = cell_type)) +
  geom_bar(stat = "identity", position = "fill") +
  facet_wrap(~ Cancer_Type, ncol = 1, scales = "free_x") +
  coord_flip() +
  scale_y_continuous(breaks = seq(0, 1, 0.25)) +
  scale_fill_manual(values = condition_colors) +
  labs(x = NULL, y = "Proportion of Samples (%)", fill = "Condition") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        legend.position = "bottom",
        panel.border = element_blank(),
        panel.grid = element_blank())

print(p)


### Figure S3k

In [None]:
proportion <- read.csv("./100samples_cell_count.csv",header=1,row.names=1)
proportion <- proportion[rownames(proportion) != "Others", ]


In [None]:
proportion$CellType <- rownames(proportion)
long_df <- tidyr::pivot_longer(proportion, cols = -CellType, names_to = "Sample", values_to = "Count")

stats_df <- long_df %>%
  group_by(CellType) %>%
  summarise(
    Mean = mean(Count),
    LowerQuartile = quantile(Count, 0.25),
    UpperQuartile = quantile(Count, 0.75)
  ) %>%
  arrange(desc(Mean))

cell_type_order <- stats_df$CellType
long_df$CellType <- factor(long_df$CellType, levels = cell_type_order)


In [None]:
colors <- c("NK"="#CDCE6B", "T"="#FFFF99", "B"="#00CC99", "Neutrophil"="#3366CC", "Macrophage"="#990066", "DC"="#FF9999", "Mast"="#6699FF", "Endothelial"="#FF3366", "Fibroblast"="#008080", "Malignant/Epithelial"="#0D5886", "Others"="lightgray")
stats_df$CellType <- factor(stats_df$CellType, levels = unique(stats_df$CellType))

p <- ggplot(stats_df, aes(x = CellType, y = Mean, fill = CellType)) +
  geom_col(color="black", size = 1.5, width=0.7) +
  geom_jitter(data = long_df, aes(x = CellType, y = Count, group = CellType), size = 1,
            position = position_jitter(width = 0.3), color = "#2c2c2c") +
  geom_errorbar(aes(ymin = LowerQuartile, ymax = UpperQuartile), width = 0.2) +
  scale_fill_manual(values = colors) +
  labs(title = "Distribution of Cell Types Across Samples",
       x = "Cell Type",
       y = "Average Count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none",
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        axis.ticks = element_line(color = "black"),
        panel.grid.minor = element_blank())

print(p)