Cancer evolution analysis in single-cell resolution with both genomic and transcriptomic information
devtools::install_github("jaewon-cho/canvolution",ref = "master")
- library(Seurat)
- library(igraph)
- #version >= 1.3.4
- library(reshape)
- library(ggplot2)
Please see "preprocess.md" file while making the "required files"
- 1: Seurat object with clonotype for each cancer cell (name of metadata should be “cancer_clone”) ** Example of metadata
meta_info <- Seurat_object@meta.data
#meta_info <- read.table("data/meta_info_lung", sep = "\t")
- 2: patient_clone_path (see 5. Evolution tree generation) ** Usage
clone_path <- read.table("data/robustclone/AZ003_clone_path")
- 3: sample_clone_mut_list (see 6. Mutation list object generation) ** Usage
load("data/clone_mutation_info/LT_S21_clone_mut_list")
- 4: Clonal abundance (see 7. Clonal (cluster) abundance) [for selection analysis only]
** Usage
a <- read.csv("data/lung_clone_ratio.csv")
- 5: cell-cell interaction results (see 8. Inferring cell-cell interaction) [for ligand-receptor analysis only]
** Output: data/cellchat/lung_source_gn, data/cellchat/lung_target_gn ** source_gn: source from cancer cell ** target_gn: target from cancer cell ** List object name: res
** Usage
load("data/cellchat/lung_source_gn")
names(res)
# target celltypes
res$Dendritic
#mutation profiles of each cancer cluster (by gene expression)
- 6: Preparation of gmt format for geneset (9. Preparation of gmt format for geneset) ** Usage
library(GSEABase)
gmt <- getGmt("data/cancersea_gmt")
library(canvolution)
library(GSEABase)
Generation of signature scores for each clone. Users are also allowed to generate their own signature score for their preference.
Signature score is generated by jaccard index between mutation profile of clone and a given geneset.
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t")
sample_list <- unique(meta_info$sample_name)
gmt_list <- list()
gmt <- getGmt("data/cancersea_gmt")
gmt_list$cancersea<- gmt
gmt <- getGmt("data/core_fitness_gmt")
gmt_list$core_fitness <- gmt
mut_gmt(sample_list, gmt_list, "data/clone_mutation_info/", "_clone_mut_list")
#should prepare mutation profile each clone, see 3: sample_clone_mut_list
- Output: data/feature/sample_mut_gmt_jacc (sample: LT_S01, LT_S80 ...) ** column: clonotype, row: feature
Signature score is generated by AddModuleScore function in Seurat package.
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t")
sample_list <- unique(meta_info$sample_name)
gmt_list <- list()
gmt <- getGmt("data/cancersea_gmt")
gmt_list$cancersea<- gmt
gmt <- getGmt("data/core_fitness_gmt")
gmt_list$core_fitness <- gmt
addmodulescore_multiple_pathway(seurat_object, gmt_list, "lung")
#seurat_object: only cancer cells
- Output: data/feature/lung_cell_addmodule ** column: single-cell, row: feature
Signature score is generated by jaccard index between mutation profile of clone and a given ligand or receptor of cell-cell interaction from each clone_cluster. (see 8. Inferring cell-cell interaction)
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t")
sample_list <- unique(meta_info$sample_name)
mut_gmt_lr(sample_list, "data/cellchat/", "lung", "data/clone_mutation_info/", "_clone_mut_list")
- Output: sample_mut_cellchat_jacc.csv (sample: AZ005) ** column: sample_clonotype, row: (target: receptor of cancer cluster, source: ligand of cancer cluster)(cancer cluster)(other celltypes)
Mean expression of mutated genes for each signature (feature)
gmt_list <- list()
gmt <- getGmt("cancersea_gmt")
gmt_list$cancersea<- gmt
load("your_seurat_object")
#object name: d3
sample_info <- "sample_name"
exp <- GetAssayData(d3)
meta_info <- d3@meta.data
name <- "lung"
clone_info_prefix <- "data/clone_mutation_info/"
clone_info_suffix <- "_clone_mut_list"
mut_gmt_module(exp, meta_info, sample_info, gmt_list, clone_info_prefix,clone_info_suffix, name)#seurat_object: only cancer cells
- Output: name_mut_gmt_addmodule (name: lung) ** column: single-cell, row: feature (same as Transcriptome signature analysis)
Correlation analysis (spearman correlation) between each evolution path and a given signature scores
- Information of clonotype for each cancer cell (name of metadata should be “cancer_clone”)
- Output format: column: sample_path (ex: LT_S66__6_2_13_7_11_9), row: features
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/feature/"
#suffix: _mut_gmt_jacc (see Signature generation – Mutation signature)
output_file_prefix <- "./"
evolution_mut_corr_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, output_file_prefix)
- Output: name_sample_clone_evolution_mut_scc_sample (name: lung, sample: LT_S66)
- Ex) lung_LT_S66_clone_evolution_mut_scc_sample
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/feature/"
#feature file suffix: _cell_addmodule (see Signature generation – Transcriptome signature)
output_file_prefix <- "./"
evolution_signature_cellwise_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, output_file_prefix)
- Output: name_sample_clone_evolution_signature_scc_sample (name: lung, sample: LT_S66)
- Ex) lung_LT_S66_clone_evolution_signature_scc_sample
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
cluster_info <- "RNA_snn_res.1"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/cellchat/"
#feature file suffix: _mut_LR_jacc (see Signature generation – ligand-receptor signature)
output_file_prefix <- "./"
clone_cluster_weighted_lr(meta_info, name, pat_info, sample_info, cluster_info, clone_path_file_prefix, feature_file_prefix, output_file_prefix)
- Output: name_sample_clone_cluster_weighted_lr_step_scc (name: lung, sample: LT_S66)
- Ex) lung_LT_S66_clone_cluster_weighted_lr_step_scc
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/feature/"
# please run mut_gmt_module and save in the right directory (see Signature generation – Mutated gene expression)
# feature file suffix: _mut_gmt_addmodule
output_file_prefix <- "./"
evolution_mut_exp_cellwise_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, output_file_prefix)
- Output: name_sample_clone_evolution_mut_exp_scc_sample (name: lung, sample: LT_S66)
- Ex) lung_LT_S66_clone_evolution_mut_exp_scc_sample
Correlation analysis (spearman correlation) between abundance of clone and a given signature scores in each evolution path
- Information of clonotype for each cancer cell (name of metadata should be “cancer_clone”)
- Output format: column: sample_path (ex: LT_S66__6_2_13_7_11_9), row: features
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/feature/"
#suffix: _mut_gmt_jacc (see Signature generation – Mutation signature)
abundance_file_prefix <- "data/"
output_file_prefix <- "./"
evolution_clone_abundance_mut_corr_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, abundance_file_prefix, output_file_prefix)
- Output: name_sample_clone_abundance_mut_scc_sample (name: lung, sample: LT_S66)
- Ex) lung_LT_S66_clone_abundance_mut_scc_sample
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/cellchat/"
#suffix: _mut_gmt_jacc (see Signature generation – Mutation signature)
abundance_file_prefix <- "data/"
output_file_prefix <- "./"
evolution_clone_abundance_transcriptomic_corr_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, abundance_file_prefix, output_file_prefix)
- Output: name_sample_clone_abundance_transcriptomic_scc_sample (name: lung, sample: LT_S66)
- Ex) lung_LT_S66_clone_abundance_transcriptomic_scc_sample
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
cluster_info <- "RNA_snn_res.1"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/cellchat/"
#feature file suffix: _mut_LR_jacc (see Signature generation – ligand-receptor signature)
abundance_file_prefix <- "data/"
output_file_prefix <- "./"
clone_cluster_weighted_lr_abundance (meta_info, name, pat_info, sample_info, cluster_info, clone_path_file_prefix, feature_file_prefix, abundance_file_prefix, output_file_prefix)
- Output: name_sample_clone_cluster_weighted_lr_abundance_scc (name: lung, sample: LT_S66)
- Ex) lung_LT_S66_clone_cluster_weighted_lr_abundance_scc
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/feature/"
# please run mut_gmt_module and save in the right directory (see Signature generation – Mutated gene expression)
# feature file suffix: _mut_gmt_addmodule
abundance_file_prefix <- "data/"
output_file_prefix <- "./"
evolution_clone_abundance_mut_exp_corr_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, output_file_prefix)
- Output: name_sample _clone_abundance_mut_exp_scc_sample (name: lung, sample: LT_S66)
- Ex) lung_LT_S66 _clone_abundance_mut_exp_scc_sample
Statistical test between two (wilcoxon rank sum test) or multiple groups (Kruskal-wallis test) with results from evolution path analysis (clonal selection analysis). Horizontal barplot shows a group which is the highest among other groups
- meta_info_list: names of the meta data to analyze (ex: ("primary_or_metastaic","biopsy_site","smokingHx" ...)
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapsed"
sample_info <- "sample_name"
meta_info_list <- c("primary_or_metastaic","biopsy_site","smokingHx","histolgy","best_rxn_status")
file_prefix <- "./"
file_suffix <- "_clone_evolution_mut_scc_sample"
result_suffix <- "evolution_mut_scc_analysis"
result <- generate_table_for_analysis(name, meta_info, pat_info, sample_info, meta_info_list,file_prefix, file_suffix)
total_table <- result[[1]]
feature_size <- result[[2]]
statistic_res<-statistic_test(total_table, feature_size, "primary_or_metastaic", "wilcox", "lung", result_suffix, T)
# two groups
statistic_res<-statistic_test(total_table, feature_size, "best_rxn_status", "kruskal", "lung", result_suffix, T)
# multiple groups
Clustering features to see if there are co-evolving features. Basically, based on the distance matrix, it will filter out edges by certain thresholds (ex: Z-score). Then firstly, connected component will be used for the clustering. But if it is too crowded, it will also provide Louvain clustering for further clustering. With clustering information, it is possible to conduct statistical test by average value of features within clustering membership.
- Clustering plot
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapsed"
sample_info <- "sample_name"
meta_info_list <- c("primary_or_metastaic","biopsy_site","smokingHx","histolgy","best_rxn_status")
file_prefix <- "data/"
file_suffix <- "_clone_evolution_mut_scc_sample"
result_suffix <- "evolution_mut_scc_analysis"
result <- generate_table_for_analysis(name, meta_info, pat_info, sample_info, meta_info_list,file_prefix, file_suffix)
total_table_pre <- result[[1]]
feature_size_pre <- result[[2]]
g3 <-feature_graph(total_table_pre, feature_size_pre)
# [1: plotting connected componenets]
new_result <- feature_clustering_connected_comp(g3, total_table_pre, feature_size_pre, z_thr = 1.645)
g6<-new_result[[3]]
clustering_res <- new_result[[4]]
clustering_plot(g6,clustering_res,"myplot.png")
# [2: plotting louvain clustering]
new_result1 <- feature_clustering_louvain(g6, total_table_pre, feature_size_pre, resolution = 1.1)
#' g3<-new_result1[[3]]
#' g4<-new_result1[[4]]
#' clustering_plot(g3,g4,"myplot.png")
- Statistical test
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapsed"
sample_info <- "sample_name"
meta_info_list <- c("primary_or_metastaic","biopsy_site","smokingHx","histolgy","best_rxn_status")
file_prefix <- "data/"
file_suffix <- "_clone_evolution_mut_scc_sample"
result_suffix <- "evolution_mut_scc_analysis"
result <- generate_table_for_analysis(name, meta_info, pat_info, sample_info, meta_info_list,file_prefix, file_suffix)
total_table_pre <- result[[1]]
feature_size_pre <- result[[2]]
g3 <-feature_graph(total_table_pre, feature_size_pre)
# [1: Connected componenets]
new_result <- feature_clustering_connected_comp(g3, total_table_pre, feature_size_pre)
total_table<-new_result[[1]]
module_info_new <- new_result[[2]]
feature_size <- length(module_info_new)
g6<-new_result[[3]]
clustering_res <- new_result[[4]]
statistic_res<-statistic_test(total_table, feature_size, "primary_or_metastaic", "wilcox", name, result_suffix, T)
# [2: Louvain clustering]
new_result1 <- feature_clustering_louvain(g6, total_table_pre, feature_size_pre, resolution = 1.1)
total_table<-new_result1[[1]]
module_info <- new_result1[[2]]
feature_size <- length(module_info)
statistic_res<-statistic_test(total_table, feature_size, " primary_or_metastaic", "wilcox", name, result_suffix, T)
To select evolution paths by certain threshold of feature.
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
#' name <- "lung"
pat_info <- "pat_collapsed"
sample_info <- "sample_name"
meta_info_list <- c("primary_or_metastaic","biopsy_site","smokingHx","histolgy","best_rxn_status")
file_prefix <- "data/"
file_suffix <- "_clone_evolution_mut_scc_sample"
result_suffix <- "evolution_mut_scc_analysis"
result <- generate_table_for_analysis(name, meta_info, pat_info, sample_info, meta_info_list,file_prefix, file_suffix)
total_table <- result[[1]]
path_list <- select_path_by_thr(total_table, "LT_S21", "Angiogenesis",0, "high")
- To extract mutation profile from given evolution paths.
load("data/clone_mutation_info/LT_S21_clone_mut_list")
# extract all the mutation
path_mut_gn_list <- mutation_from_path("LT_S21__29_23_27_25", "LT_S21", sample_clone_mut_list, "all", F)
# extract only genes related to the given geneset
path_mut_gn_list <- mutation_from_path("LT_S21__29_23_27_25", "LT_S21", sample_clone_mut_list, feature_gn_list, F)
- Extracting common genes from multiple paths
path_list_info<-list()
a<- c("LT_S21__29_23_27_25", "LT_S53__3_1_2")
for (j in c(1:length(a))){
path <- a[j]
sample<-strsplit(a[1], "__")[[1]][1]
file_name <- paste(sample, "_clone_mut_list",sep = "")
load(file_name)
path_list_info[[j]] <- list(sample, sample_clone_mut_list)
names(path_list_info)[j] <- path
}
#names(path_list_info): path
#path_list_info[[1]][[1]]: sample
#path_list_info[[1]][[2]]: sample_clone_mut_list
genes <- common_mutation_from_path(path_list_info, "all", F)
Joint analysis of mutational and transcriptional landscapes in human cancer reveals key perturbations during cancer evolution Genome Biol. 2024; 25: 65 doi: 10.1186/s13059-024-03201-1