## 1.1 Upstream analysis by cellranger

In [None]:
export PATH=/storage/public/home/2008116199/yxd/soft/cellranger/cellranger-7.1.0:$PATH
cellranger testrun --id=tiny
/storage/public/home/2008116199/yxd/soft/cellranger/cellranger-8.0.1/bin/cellranger testrun --id=tiny
mkdir ~/yard/run_cellranger_mkfastq
cellranger mkgtf Sus_scrofa.Sscrofa11.1.109.gtf Sus_scrofa.Sscrofa11.1.109.filtered.gtf --attribute=key:allowable_value
cellranger mkref --genome=Sus_scrofa --fasta=/home/yixudong/REF/Sus_scrofa.Sscrofa11.1.dna.toplevel.fa --genes=/home/yixudong/REF/cellranger/Sus_scrofa.Sscrofa11.1.109.chr.filtered.gtf
cellranger mkref --genome=Danio.rerio_genome --fasta=Danio_rerio.GRCz11.dna.primary_assembly.fa \
  --genes=Danio_rerio.GRCz11.105.filtered.gtf
cellranger count --id=run_count_LD --fastqs=/home/yixudong/sn/LD --sample=LD-1,LD-2,LD-3 --transcriptome=/home/yixudong/sn/SOL/Sus_scrofa
/home/yixudong/software/cellranger/cellranger-7.1.0/bin/cellranger count --id=run_count_LD --fastqs=/home/yixudong/Data/muscle --sample=WHB5CDNAPEP00008519 --transcriptome=/home/yixudong/sn/SOL/Sus_scrofa

## 1.2 QC and Cell annotation

In [None]:
library(Seurat)
library(SeuratData)
library(patchwork)
library(limma)
library(dplyr)
library(magrittr)
library(clustree)

IntegrateAndProcessData <- function(
  data_dirs = c(
    Sol = "Z:\\A\\ST\\Analysis\\SnRNAseq\\run_count_SOL\\outs\\filtered_feature_bc_matrix",
    EDL = "Z:\\A\\ST\\Analysis\\SnRNAseq\\run_count_EDL\\outs\\filtered_feature_bc_matrix",
    LD  = "Z:\\A\\ST\\Analysis\\SnRNAseq\\run_count_LD\\outs\\filtered_feature_bc_matrix"
  )
) {
  library(Seurat)
  process_single_dataset <- function(data_dir, project_name) {
    counts <- Read10X(data.dir = data_dir)
    obj <- CreateSeuratObject(counts = counts, project = project_name, min.cells = 3, min.features = 200)
    obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^MT-")
    obj <- NormalizeData(obj, normalization.method = "LogNormalize", scale.factor = 10000)
    obj <- FindVariableFeatures(obj, selection.method = "vst", nfeatures = 2000)
    return(obj)
  }
  datasets <- lapply(names(data_dirs), function(name) {
    process_single_dataset(data_dir = data_dirs[[name]], project_name = name)
  })
  names(datasets) <- names(data_dirs)
  for(name in names(datasets)) {
    VlnPlot(datasets[[name]], features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), pt.size=0, cols="#CCC9E6", ncol = 3)  
  }
  my36colors <- c('#CE0317', '#009959', '#3AD7CC', '#993591', '#D6E7A3', '#57C3F3', '#476D87',
                  '#E95C59', '#E59CC4', '#AB3282', '#23452F', '#BD956A', '#8C549C', '#585658',
                  '#9FA3A8', '#E0D4CA', '#5F3D69', '#C5DEBA', '#58A4C3', '#E4C755', '#F7F398',
                  '#AA9A59', '#E63863', '#E39A35', '#C1E6F3', '#6778AE', '#91D0BE', '#B53E2B',
                  '#712820', '#DCC1DD', '#CCE0F5', '#CCC9E6', '#625D9E', '#68A180', '#3A6963',
                  '#968175')
  plot_list <- list()
  for(name in names(datasets)) {
    top20 <- head(VariableFeatures(datasets[[name]]), 20)
    p <- VariableFeaturePlot(datasets[[name]], pt.size = 3)
    p <- LabelPoints(plot = p, points = top20, repel = TRUE)
    plot_list[[name]] <- p
  }
  return(list(
    datasets = datasets,
    plots = plot_list,
    colors = my36colors
  ))
}

# result <- IntegrateAndProcessData()


MuScle.list.Sc=list(Sol_Sc,EDL_Sc,LD_Sc)

MuScle.list.Sc <- lapply(X = MuScle.list.Sc, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

features.Sc <- SelectIntegrationFeatures(object.list = MuScle.list.Sc)
MuScle.Sc.anchors <- FindIntegrationAnchors(object.list = MuScle.list.Sc, anchor.features = features.Sc)
MuScle.list.Sc <- IntegrateData(anchorset = MuScle.Sc.anchors)

DefaultAssay(MuScle.list.Sc) <- "integrated"
FindClusters(MuScle.list.Sc, resolution = seq(0.1, 0.5, 0.05))

DimPlot(MuScle.list.Sc, reduction = "umap", group.by = "orig.ident",cols=c("#CE0317","#EFD06A","#009959"), pt.size=0.5)
DimPlot(MuScle.list.Sc, reduction = "umap", label = TRUE, repel = TRUE, cols=my36colors, pt.size=0.5)
DimPlot(MuScle.list.Sc, reduction = "umap", label = TRUE, repel = TRUE, cols=my36colors, pt.size=0.5, split.by="orig.ident")

head(AverageExpression(object = MuScle.list.Sc))
AverageExp <- AverageExpression(object = MuScle.list.Sc)
typeof(AverageExp)
head(AverageExp$RNA)
custom_row_order <- c("0", "3", "1", "9","12", "7", "19", "20", "2", "4", "14","16", "17", "13", "15", "8", "10", "5", "11", "6", "18")
custom_col_order <- c("0", "3", "1", "9","12", "7", "19", "20", "2", "4", "14","16", "17", "13", "15", "8", "10", "5", "11", "6", "18")
ordered_matrix <- coorda$r[custom_row_order, custom_col_order]
coorda<-corr.test(AverageExp$RNA,AverageExp$RNA,method = "spearman")
colors <- colorRampPalette(c("white","red"))(100)
pheatmap(ordered_matrix,color = colors,cluster_rows = FALSE, cluster_cols = FALSE)
pheatmap(coorda$r,color = colors)

total.prop=prop.table(table(Idents(MuScle.list.Sc)))
total.prop <- as.data.frame(total.prop)
ggplot(total.prop, aes(x = "", y = Freq, fill = Var1)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  scale_fill_manual(values = my36colors)+
  labs(x = NULL, y = "Ratio", fill = "Sample Type", title = "Sample Ratio by Type") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank())
table(Idents(MuScle.list.Sc), MuScle.list.Sc$orig.ident)


## 1.3 SigleR

In [None]:
expr_matrix <- GetAssayData(SOL5M, assay = "RNA", slot = "counts")
sce_ref <- SingleCellExperiment(assays = list(counts = expr_matrix))
logcounts <- log1p(expr_matrix)
assays(sce_ref)$logcounts <- logcounts
labels <- as.character(SOL5M@active.ident)

MuScle.list.SOL5M <- SingleR(test = MuScle.listSingleR, ref = sce_ref, labels = labels)

table(MuScle.list.SOL5M$labels, MuScle.list$seurat_clusters)

MuScle.list@meta.data$labels <- MuScle.list.SOL5M$labels
DimPlot(MuScle.list, group.by = c("seurat_clusters", "labels"), reduction = "pca")

process_seurat_object <- function(seurat_obj) {
  library(Seurat)
  library(SingleCellExperiment)
  expr_matrix <- GetAssayData(seurat_obj, assay = "RNA", slot = "counts")
  expr_matrix@Dimnames[[1]] <- toupper(expr_matrix@Dimnames[[1]])
  sce_ref <- SingleCellExperiment(assays = list(counts = expr_matrix))
  logcounts_data <- log1p(expr_matrix)
  assays(sce_ref)$logcounts <- logcounts_data
  labels <- as.character(seurat_obj@active.ident)
  return(list(sce = sce_ref, labels = labels))
}
