In [None]:
library(GenomicRanges)
library(dplyr)
library(data.table)
library(ggplot2)
library(viridis)
library(readxl)
library(pheatmap)
library(RColorBrewer)
library(tidyr)
library(gridExtra)
library(corrplot)
library(tibble)
library(ggrepel)
library(ggpubr)
# library(DESeq2)
library(DEFormats)
library(BiocParallel)
register(MulticoreParam(4))
library(chromVAR)
## library(motifmatchr) ##not used by far
library(GenomicFeatures)
library(BSgenome.Hsapiens.UCSC.hg38)
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(org.Hs.eg.db)
library(pheatmap)
library(RColorBrewer)
library(tidyr)
library(gridExtra)
library(ggrepel)
library(limma)
library(ggdendro)
library(ggfortify)
library(ggpubr)
library(magrittr)
library(Seurat)

In [None]:
## Patient data infusion product of CM-derived cells 
patientRaw = read_xlsx("CART_CUTRUN_Project/report/CART_CUTRUN/analysis/20201028_NHL_CUTandRUN_PatientData.xlsx")
cm_ind = which(patientRaw$`CD8+ T cell isolation` == "CD8TCM")
patient_id_list = patientRaw$Xnumber[cm_ind] %>% tolower


In [None]:
## Patient data infusion product of CM-derived cells 
inPath = "CART_CUTRUN_Project/results/RNAseq/process/NHL_RSEM_gencode_v21/"

pt_expCount = c()
pt_name_tmp_list = c()
for(sample in paste0("Sample_", patient_id_list, "_IP_CD8-CAR")){
  if(file.exists(paste0(inPath, sample, "/RSEM.genes.results"))){
    print(sample)
    tmp = fread(paste0(inPath, sample, "/RSEM.genes.results")) %>% dplyr::select(expected_count)
    pt_expCount = cbind(pt_expCount, round(tmp))
    pt_name_tmp_list = c(pt_name_tmp_list, sample)
  }
}
pt_expCount = data.frame(pt_expCount)
colnames(pt_expCount) = paste0(pt_name_tmp_list, "_PT") #paste0(patient_id_list, "_PT")

geneID = fread(paste0(inPath, sample, "/RSEM.genes.results"))$gene_id
pt_expCount$gene_name = geneID


In [None]:
## healthy donor data
## Read in RNA-seq data
cellList <- "CM" #c("N", "CM", "EM", "EMRA")
hdList <- paste0("HD", c(1:3, 5:7))
exprList = "Product" #c("Input", "Product", "Stim1", "Stim2", "Stim3")

inPath <- "CART_CUTRUN_Project/results/RNAseq/process/HD_RSEM_gencode_v21/" #"CART_CUTRUN_Project/results/RNAseq/process/RSEM/"

hd_expCount = c()
for(cell in cellList){
  for(expr in exprList){
    for(hd in hdList){
      # print(paste(expr, cell, hd, sep = "_"))
      sample = paste0("RNA_CD8_", cell, "_", expr, "_", hd)
      if(file.exists(paste0(inPath, sample, "/RSEM.genes.results"))){
        print(sample)
        dataTmp = fread(paste0(inPath, sample, "/RSEM.genes.results")) %>% dplyr::select(expected_count)
        # dataTmp <- fread(paste0(inPath, "RNA_CD8_", cell, "_", expr, "_", hd, ".genes.results")) %>% dplyr::select(expected_count)
        hd_expCount <- cbind(hd_expCount, round(dataTmp))
      }

    }
  }

}
hd_expCount = data.frame(hd_expCount)
colnames(hd_expCount) <- paste(rep(rep(exprList, each = length(hdList)), length(cellList)), rep(cellList, each = length(hdList)*length(exprList)), rep(hdList, length(exprList)*length(cellList)), sep = "_")

geneID = fread(paste0(inPath, sample, "/RSEM.genes.results"))$gene_id
hd_expCount$gene_name <- geneID



In [None]:
groupList = c("PT", "HD")
target = data.frame(
    group = c(
        rep("PT", ncol(pt_expCount) - 1), 
        rep("HD", ncol(hd_expCount) - 1)
    )
)

target$group = factor(target$group, levels = groupList)

## Experimental design
treat <- factor(target$group)
design <- model.matrix(~0 + treat)
colnames(design) <- levels(treat)
contrast <- makeContrasts(PT_HD = PT - HD, levels = design)


In [None]:
## Filter and delete low expressed genes
outPath <- "CART_CUTRUN_Project/results/RNAseq/analysis/RSEM/"

selectR = which(rowSums(countMat) > 10) ## remove low count genes
dataS = countMat[selectR, ]
voomDDS = voom(counts = dataS, design = design, normalize.method = "cyclicloess", plot = FALSE)
## option 1 using voomDDS option2 using normDDS as normalized input.
inputDDS = voomDDS
## corfit$consensus
fit = lmFit(inputDDS, design)
fitContrast = contrasts.fit(fit, contrast)
fitBayes = eBayes(fitContrast, robust = TRUE)

ncol(contrast)
i = 1
# for(i in 1:ncol(contrast)){
## Results
res = topTable(fit = fitBayes, adjust.method = 'fdr', coef = i, number = nrow(inputDDS), sort = 'P') 
res = data.table(GeneName = rownames(res), res)
res[, Significance := ifelse((adj.P.Val <= 0.1 & sign(logFC) == 1 & abs(logFC) >= 1), 'Up', ifelse((adj.P.Val <= 0.1 & sign(logFC) == -1 & abs(logFC) >= 1), 'Down', 'notDE'))]
results = left_join(res, dataS %>% data.frame %>% dplyr::mutate(GeneName = rownames(dataS)), by = "GeneName")
## Output
write.table(results, file = paste0(outPath, '/RNA_limma_tables/RNA-seq_DE_cmPatient_vs_cmHealthyDonor_InfusionProduct_adj0.1_logFC2.csv'), quote = FALSE, row.names = FALSE, sep = ",")

save(results, voomDDS, file = paste0(outPath, "/RData/results_RNA-seq_DE_cmPatient_vs_cmHealthyDonor_InfusionProduct.RData"))
# load(file = paste0(outPath, "/RData/results_RNA-seq_DE_cmPatient_vs_cmHealthyDonor_InfusionProduct.RData"))



In [None]:
options(repr.plot.width=7, repr.plot.height=6)
selectR = which(rowSums(countMat) > 10) ## remove low count genes


rna_obj = CreateSeuratObject(counts = countMat[selectR, ], project = "cart", min.cells = 0, min.features = 10) %>%  
NormalizeData(., normalization.method = "LogNormalize", scale.factor = 1000000) %>% 
FindVariableFeatures(., selection.method = "vst", nfeatures = length(selectR)) %>%  # length(selectR)
ScaleData()

rna_obj = RunPCA(rna_obj, features = VariableFeatures(object = rna_obj), npcs = 10)
rna_plot = Embeddings(rna_obj, reduction = "pca")[, 1:2] %>% data.frame %>% mutate(group = c(rep("PT", ncol(pt_expCount) - 1), rep("HD", ncol(hd_expCount) - 1))) %>% 
ggplot(aes(x = PC_1, y = PC_2, color = group)) +
geom_point() +
theme_bw(base_size = 20) +
scale_color_viridis(discrete = TRUE, begin = 0.2, end = 0.8, option = "magma") +
xlab("PC 1") +
ylab("PC 2") +
ggtitle("RNA-seq") +
rremove("legend.title")

pdf("CART_CUTRUN_Project/results/paper_figure/patient-related/CMpatient_CMhealthyDonor_PCA_on_RNA-seq_allfeatures.pdf", width = 7, height = 6)
rna_plot
dev.off()
rna_plot


rna_obj = CreateSeuratObject(counts = countMat[selectR, ], project = "cart", min.cells = 0, min.features = 10) %>%  
NormalizeData(., normalization.method = "LogNormalize", scale.factor = 1000000) %>% 
FindVariableFeatures(., selection.method = "vst", nfeatures = 2000) %>%  # length(selectR)
ScaleData()

rna_obj = RunPCA(rna_obj, features = VariableFeatures(object = rna_obj), npcs = 10)
rna_plot = Embeddings(rna_obj, reduction = "pca")[, 1:2] %>% data.frame %>% mutate(group = c(rep("PT", ncol(pt_expCount) - 1), rep("HD", ncol(hd_expCount) - 1))) %>% 
ggplot(aes(x = PC_1, y = PC_2, color = group)) +
geom_point() +
theme_bw(base_size = 20) +
scale_color_viridis(discrete = TRUE, begin = 0.2, end = 0.8, option = "magma") +
xlab("PC 1") +
ylab("PC 2") +
ggtitle("RNA-seq") +
rremove("legend.title")

pdf("CART_CUTRUN_Project/results/paper_figure/patient-related/CMpatient_CMhealthyDonor_PCA_on_RNA-seq_2000features.pdf", width = 7, height = 6)
rna_plot
dev.off()
rna_plot