Skip to content

yanglq-bioinfo/canvolution

 
 

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

43 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

canvolution

Cancer evolution analysis in single-cell resolution with both genomic and transcriptomic information

Installation

devtools::install_github("jaewon-cho/canvolution",ref = "master")

required libraries

  • library(Seurat)
  • library(igraph)
  • #version >= 1.3.4
  • library(reshape)
  • library(ggplot2)

Preprocessing

Please see "preprocess.md" file while making the "required files"

Required files (must match the format)

  • 1: Seurat object with clonotype for each cancer cell (name of metadata should be “cancer_clone”) ** Example of metadata
meta_info <- Seurat_object@meta.data
#meta_info <- read.table("data/meta_info_lung", sep = "\t")

image

  • 2: patient_clone_path (see 5. Evolution tree generation) ** Usage
clone_path <- read.table("data/robustclone/AZ003_clone_path")

image

  • 3: sample_clone_mut_list (see 6. Mutation list object generation) ** Usage
load("data/clone_mutation_info/LT_S21_clone_mut_list")

image

  • 4: Clonal abundance (see 7. Clonal (cluster) abundance) [for selection analysis only]

** Usage

a <- read.csv("data/lung_clone_ratio.csv")

image

  • 5: cell-cell interaction results (see 8. Inferring cell-cell interaction) [for ligand-receptor analysis only]

** Output: data/cellchat/lung_source_gn, data/cellchat/lung_target_gn ** source_gn: source from cancer cell ** target_gn: target from cancer cell ** List object name: res

** Usage

load("data/cellchat/lung_source_gn")
names(res)
# target celltypes
res$Dendritic
#mutation profiles of each cancer cluster (by gene expression)

image

  • 6: Preparation of gmt format for geneset (9. Preparation of gmt format for geneset) ** Usage
library(GSEABase)
gmt <- getGmt("data/cancersea_gmt")

Start!

library(canvolution)
library(GSEABase)

Evolution analysis

Signature generation

Generation of signature scores for each clone. Users are also allowed to generate their own signature score for their preference.

Mutation signature

Signature score is generated by jaccard index between mutation profile of clone and a given geneset.

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t")
sample_list <- unique(meta_info$sample_name)
gmt_list <- list()
gmt <- getGmt("data/cancersea_gmt")
gmt_list$cancersea<- gmt
gmt <- getGmt("data/core_fitness_gmt")
gmt_list$core_fitness <- gmt
mut_gmt(sample_list, gmt_list, "data/clone_mutation_info/", "_clone_mut_list")
#should prepare mutation profile each clone, see 3: sample_clone_mut_list
  • Output: data/feature/sample_mut_gmt_jacc (sample: LT_S01, LT_S80 ...) ** column: clonotype, row: feature

image

Transcriptome signature

Signature score is generated by AddModuleScore function in Seurat package.

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t")
sample_list <- unique(meta_info$sample_name)
gmt_list <- list()
gmt <- getGmt("data/cancersea_gmt")
gmt_list$cancersea<- gmt
gmt <- getGmt("data/core_fitness_gmt")
gmt_list$core_fitness <- gmt
addmodulescore_multiple_pathway(seurat_object, gmt_list, "lung")
#seurat_object: only cancer cells
  • Output: data/feature/lung_cell_addmodule ** column: single-cell, row: feature

image

ligand-receptor signature

Signature score is generated by jaccard index between mutation profile of clone and a given ligand or receptor of cell-cell interaction from each clone_cluster. (see 8. Inferring cell-cell interaction)

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t")
sample_list <- unique(meta_info$sample_name)
mut_gmt_lr(sample_list, "data/cellchat/", "lung", "data/clone_mutation_info/", "_clone_mut_list") 
  • Output: sample_mut_cellchat_jacc.csv (sample: AZ005) ** column: sample_clonotype, row: (target: receptor of cancer cluster, source: ligand of cancer cluster)(cancer cluster)(other celltypes)

image

Mutated gene expression

Mean expression of mutated genes for each signature (feature)

gmt_list <- list()
gmt <- getGmt("cancersea_gmt")
gmt_list$cancersea<- gmt
load("your_seurat_object")
#object name: d3
sample_info <- "sample_name"
exp <- GetAssayData(d3)
meta_info <- d3@meta.data
name <- "lung"
clone_info_prefix <- "data/clone_mutation_info/"
clone_info_suffix <- "_clone_mut_list"
mut_gmt_module(exp, meta_info, sample_info, gmt_list, clone_info_prefix,clone_info_suffix, name)#seurat_object: only cancer cells
  • Output: name_mut_gmt_addmodule (name: lung) ** column: single-cell, row: feature (same as Transcriptome signature analysis)

Evolution path analysis

Correlation analysis (spearman correlation) between each evolution path and a given signature scores

  • Information of clonotype for each cancer cell (name of metadata should be “cancer_clone”)

image

  • Output format: column: sample_path (ex: LT_S66__6_2_13_7_11_9), row: features

image

Mutation signature

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/feature/"
#suffix: _mut_gmt_jacc (see Signature generation – Mutation signature)
output_file_prefix <- "./"
evolution_mut_corr_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, output_file_prefix)
  • Output: name_sample_clone_evolution_mut_scc_sample (name: lung, sample: LT_S66)
  • Ex) lung_LT_S66_clone_evolution_mut_scc_sample

Transcriptome signature

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/feature/"
#feature file suffix: _cell_addmodule (see Signature generation – Transcriptome signature)
output_file_prefix <- "./"
evolution_signature_cellwise_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, output_file_prefix)
  • Output: name_sample_clone_evolution_signature_scc_sample (name: lung, sample: LT_S66)
  • Ex) lung_LT_S66_clone_evolution_signature_scc_sample

Ligand-receptor

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
cluster_info <- "RNA_snn_res.1"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/cellchat/"
#feature file suffix: _mut_LR_jacc (see Signature generation – ligand-receptor signature)
output_file_prefix <- "./"
clone_cluster_weighted_lr(meta_info, name, pat_info, sample_info, cluster_info, clone_path_file_prefix, feature_file_prefix, output_file_prefix) 
  • Output: name_sample_clone_cluster_weighted_lr_step_scc (name: lung, sample: LT_S66)
  • Ex) lung_LT_S66_clone_cluster_weighted_lr_step_scc

Mutated gene expression

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/feature/"
# please run mut_gmt_module and save in the right directory (see Signature generation – Mutated gene expression)
# feature file suffix: _mut_gmt_addmodule
output_file_prefix <- "./"
evolution_mut_exp_cellwise_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, output_file_prefix)
  • Output: name_sample_clone_evolution_mut_exp_scc_sample (name: lung, sample: LT_S66)
  • Ex) lung_LT_S66_clone_evolution_mut_exp_scc_sample

Clonal selection analysis

Correlation analysis (spearman correlation) between abundance of clone and a given signature scores in each evolution path

  • Information of clonotype for each cancer cell (name of metadata should be “cancer_clone”)

image

  • Output format: column: sample_path (ex: LT_S66__6_2_13_7_11_9), row: features

Mutation signature

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/feature/"
#suffix: _mut_gmt_jacc (see Signature generation – Mutation signature)
abundance_file_prefix <- "data/"
output_file_prefix <- "./"
evolution_clone_abundance_mut_corr_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, abundance_file_prefix, output_file_prefix)
  • Output: name_sample_clone_abundance_mut_scc_sample (name: lung, sample: LT_S66)
  • Ex) lung_LT_S66_clone_abundance_mut_scc_sample

Transcriptome signature

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/cellchat/"
#suffix: _mut_gmt_jacc (see Signature generation – Mutation signature)
abundance_file_prefix <- "data/"
output_file_prefix <- "./"
evolution_clone_abundance_transcriptomic_corr_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, abundance_file_prefix, output_file_prefix)
  • Output: name_sample_clone_abundance_transcriptomic_scc_sample (name: lung, sample: LT_S66)
  • Ex) lung_LT_S66_clone_abundance_transcriptomic_scc_sample

Ligand-receptor

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
cluster_info <- "RNA_snn_res.1"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/cellchat/"
#feature file suffix: _mut_LR_jacc (see Signature generation – ligand-receptor signature)
abundance_file_prefix <- "data/"
output_file_prefix <- "./"
clone_cluster_weighted_lr_abundance (meta_info, name, pat_info, sample_info, cluster_info, clone_path_file_prefix, feature_file_prefix, abundance_file_prefix, output_file_prefix) 
  • Output: name_sample_clone_cluster_weighted_lr_abundance_scc (name: lung, sample: LT_S66)
  • Ex) lung_LT_S66_clone_cluster_weighted_lr_abundance_scc

Mutated gene expression

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapse"
sample_info <- "sample_name"
clone_path_file_prefix <- "data/robustclone/"
feature_file_prefix <- "data/feature/"
# please run mut_gmt_module and save in the right directory (see Signature generation – Mutated gene expression)
# feature file suffix: _mut_gmt_addmodule
abundance_file_prefix <- "data/"
output_file_prefix <- "./"
evolution_clone_abundance_mut_exp_corr_samplewise(meta_info, name, pat_info, sample_info, clone_path_file_prefix, feature_file_prefix, output_file_prefix)
  • Output: name_sample _clone_abundance_mut_exp_scc_sample (name: lung, sample: LT_S66)
  • Ex) lung_LT_S66 _clone_abundance_mut_exp_scc_sample

Statistical test

Statistical test between two (wilcoxon rank sum test) or multiple groups (Kruskal-wallis test) with results from evolution path analysis (clonal selection analysis). Horizontal barplot shows a group which is the highest among other groups

  • meta_info_list: names of the meta data to analyze (ex: ("primary_or_metastaic","biopsy_site","smokingHx" ...)
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapsed"
sample_info <- "sample_name"
meta_info_list <- c("primary_or_metastaic","biopsy_site","smokingHx","histolgy","best_rxn_status")
file_prefix <- "./"
file_suffix <- "_clone_evolution_mut_scc_sample"
result_suffix <- "evolution_mut_scc_analysis"
result <- generate_table_for_analysis(name, meta_info, pat_info, sample_info, meta_info_list,file_prefix, file_suffix)
total_table <- result[[1]]
feature_size <- result[[2]]

statistic_res<-statistic_test(total_table, feature_size, "primary_or_metastaic", "wilcox", "lung", result_suffix, T)
# two groups

statistic_res<-statistic_test(total_table, feature_size, "best_rxn_status", "kruskal", "lung", result_suffix, T)
# multiple groups
  • total_table image

  • statistic_res ** value for each group indicate the mean value

image

  • statistic_test plot image

Feature clustering

Clustering features to see if there are co-evolving features. Basically, based on the distance matrix, it will filter out edges by certain thresholds (ex: Z-score). Then firstly, connected component will be used for the clustering. But if it is too crowded, it will also provide Louvain clustering for further clustering. With clustering information, it is possible to conduct statistical test by average value of features within clustering membership.

  • Clustering plot
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapsed"
sample_info <- "sample_name"
meta_info_list <- c("primary_or_metastaic","biopsy_site","smokingHx","histolgy","best_rxn_status")
file_prefix <- "data/"
file_suffix <- "_clone_evolution_mut_scc_sample"
result_suffix <- "evolution_mut_scc_analysis"
result <- generate_table_for_analysis(name, meta_info, pat_info, sample_info, meta_info_list,file_prefix, file_suffix)
total_table_pre <- result[[1]]
feature_size_pre <- result[[2]]
g3 <-feature_graph(total_table_pre,  feature_size_pre)

# [1: plotting connected componenets]
new_result <- feature_clustering_connected_comp(g3, total_table_pre, feature_size_pre, z_thr = 1.645)
g6<-new_result[[3]]
clustering_res <- new_result[[4]]
clustering_plot(g6,clustering_res,"myplot.png")

# [2: plotting louvain clustering]
new_result1 <- feature_clustering_louvain(g6, total_table_pre, feature_size_pre, resolution = 1.1)
#' g3<-new_result1[[3]]
#' g4<-new_result1[[4]]
#' clustering_plot(g3,g4,"myplot.png")

image

  • Statistical test
#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
name <- "lung"
pat_info <- "pat_collapsed"
sample_info <- "sample_name"
meta_info_list <- c("primary_or_metastaic","biopsy_site","smokingHx","histolgy","best_rxn_status")
file_prefix <- "data/"
file_suffix <- "_clone_evolution_mut_scc_sample"
result_suffix <- "evolution_mut_scc_analysis"
result <- generate_table_for_analysis(name, meta_info, pat_info, sample_info, meta_info_list,file_prefix, file_suffix)
total_table_pre <- result[[1]]
feature_size_pre <- result[[2]]
g3 <-feature_graph(total_table_pre,  feature_size_pre)

# [1: Connected componenets]
new_result <- feature_clustering_connected_comp(g3, total_table_pre, feature_size_pre)
total_table<-new_result[[1]]
module_info_new <- new_result[[2]]
feature_size <- length(module_info_new)
g6<-new_result[[3]]
clustering_res <- new_result[[4]]
statistic_res<-statistic_test(total_table, feature_size, "primary_or_metastaic", "wilcox", name, result_suffix, T)

# [2: Louvain clustering]
new_result1 <- feature_clustering_louvain(g6, total_table_pre, feature_size_pre, resolution = 1.1)
total_table<-new_result1[[1]]
module_info <- new_result1[[2]]
feature_size <- length(module_info)
statistic_res<-statistic_test(total_table, feature_size, " primary_or_metastaic", "wilcox", name, result_suffix, T)

Path selection

To select evolution paths by certain threshold of feature.

#meta_info <- seurat_obj@meta.data
meta_info <- read.table("data/meta_info_lung", sep = "\t", check.names = F)
#' name <- "lung"
pat_info <- "pat_collapsed"
sample_info <- "sample_name"
meta_info_list <- c("primary_or_metastaic","biopsy_site","smokingHx","histolgy","best_rxn_status")
file_prefix <- "data/"
file_suffix <- "_clone_evolution_mut_scc_sample"
result_suffix <- "evolution_mut_scc_analysis"
result <- generate_table_for_analysis(name, meta_info, pat_info, sample_info, meta_info_list,file_prefix, file_suffix)
total_table <- result[[1]]

path_list <- select_path_by_thr(total_table,  "LT_S21", "Angiogenesis",0, "high")

image

Mutation extraction from cancer clones

  • To extract mutation profile from given evolution paths.
load("data/clone_mutation_info/LT_S21_clone_mut_list")
# extract all the mutation
path_mut_gn_list <- mutation_from_path("LT_S21__29_23_27_25", "LT_S21", sample_clone_mut_list, "all", F)

# extract only genes related to the given geneset
path_mut_gn_list <- mutation_from_path("LT_S21__29_23_27_25", "LT_S21", sample_clone_mut_list, feature_gn_list, F)

image

  • Extracting common genes from multiple paths
path_list_info<-list()
a<- c("LT_S21__29_23_27_25", "LT_S53__3_1_2")
for (j in c(1:length(a))){
     path <- a[j]
     sample<-strsplit(a[1], "__")[[1]][1]
     file_name <- paste(sample, "_clone_mut_list",sep = "")
     load(file_name)
     path_list_info[[j]] <- list(sample, sample_clone_mut_list)
     names(path_list_info)[j] <- path
}

#names(path_list_info): path
#path_list_info[[1]][[1]]: sample
#path_list_info[[1]][[2]]: sample_clone_mut_list

genes <- common_mutation_from_path(path_list_info, "all", F)

image

Citation

Joint analysis of mutational and transcriptional landscapes in human cancer reveals key perturbations during cancer evolution Genome Biol. 2024; 25: 65 doi: 10.1186/s13059-024-03201-1

About

No description, website, or topics provided.

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages

  • R 98.5%
  • Shell 1.5%