In [None]:
# The cross-species integration analysis was performed using SelectIntegrationFeatures(), FindIntegrationAnchors(), and IntegrateData(), and will not be further elaborated.

## 7.1 Comparative Analysis of  GNG FAPs

In [None]:
H.GAS <- UpdateSeuratObject(H.GAS)
H.TA <- UpdateSeuratObject(H.TA)
H.VL <- UpdateSeuratObject(H.VL)

H.GAS[["RNA"]] <- as(H.GAS[["RNA"]], "Assay5")
H.TA[["RNA"]] <- as(H.TA[["RNA"]], "Assay5")
H.VL[["RNA"]] <- as(H.VL[["RNA"]], "Assay5")

H.TA <- JoinLayers(H.TA)
H.VL <- JoinLayers(H.VL)
H.GAS <- JoinLayers(H.GAS)

H.GAS <- RenameCells(H.GAS, add.cell.id = "H.GAS")
H.TA <- RenameCells(H.TA, add.cell.id = "H.TA")
H.VL <- RenameCells(H.VL, add.cell.id = "H.VL")

H.GAS$orig.ident <- "H.GAS"
H.TA$orig.ident <- "H.TA"
H.VL$orig.ident <- "H.VL"

All.list <- list(H.GAS, H.TA, H.VL)
All.list <- lapply(All.list, function(x) {
  x <- NormalizeData(x)
  x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
  return(x)
})

features <- SelectIntegrationFeatures(object.list = All.list)
anchors <- FindIntegrationAnchors(object.list = All.list, anchor.features = features)
All.integrated <- IntegrateData(anchorset = anchors)

DefaultAssay(All.integrated) <- "integrated"

All.integrated <- ScaleData(All.integrated, verbose = FALSE)
All.integrated <- RunPCA(All.integrated, npcs = 30, verbose = FALSE)
All.integrated <- RunUMAP(All.integrated, reduction = "pca", dims = 1:30)
All.integrated <- FindNeighbors(All.integrated, reduction = "pca", dims = 1:30)
All.integrated <- FindClusters(All.integrated, resolution = 0.3)
DimPlot(All.integrated, reduction = "umap", group.by = "orig.ident", pt.size = 0.5)

pbmc.markers <- FindAllMarkers(pbmc3k, only.pos = FALSE,
                               min.pct = 0.25,
                               logfc.threshold = 0)
markerVolcano(
    markers = pbmc.markers,
    topn = 5,
    labelCol = ggsci::pal_npg()(12)
)

## 7.2 CORR analysis

In [None]:
library(ggplot2)

scaled_data_human <- GetAssayData(type1_2a_cells_human, assay = 'integrated', slot = 'scale.data')
avg_expr_human <- rowMeans(scaled_data_human)
avg_exp_human_df <- data.frame(gene = names(avg_expr_human), avg_expr_human = avg_expr_human)

scaled_data_refer <- GetAssayData(type1_2a_cells_refer, assay = 'integrated', slot = 'scale.data')
avg_expr_refer <- rowMeans(scaled_data_refer)
avg_exp_refer_df <- data.frame(gene = names(avg_expr_refer), avg_expr_refer = avg_expr_refer)

merged_df <- merge(avg_exp_refer_df, avg_exp_human_df, by = 'gene')


correlation <- cor(merged_df$avg_expr_refer, merged_df$avg_expr_human, method = "pearson")

print(paste("Pearson correlation between Type 1/2a myofibers in human and refer:", correlation))

ggplot(merged_df, aes(x = avg_expr_refer, y = avg_expr_human)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = 'lm', color = 'blue') +
  xlab('Average Expression in refer') +
  ylab('Average Expression in human') +
  ggtitle(paste('Correlation between Type 1/2a myofibers in human and refer:', round(correlation, 2)))

## 7.3 Gene correlation analysis of myopathy

In [None]:
#Figure7 O-Q
DefaultAssay(human_ann) <- 'RNA'
DefaultAssay(mice_ann) <- 'RNA'

genes_of_interest <- c(
    "DMD", "PKP2", "SNTA1", "DAG1", "BEST1", "VDR", "TNFRSF11B", "BTF3P11",
    "UTRN", "ESR1", "PTH", "CLCN1", "TNF", "TNFSF11", "SGCA", "IL1B", "TGFB1",
    "LRP5", "CTSK", "PRKDC", "DNM2", "FGF23", "ACP5", "PPARG",
    "BGLAP", "NOS1", "MTHFR", "GH1", "LEP"
) ## Myopathy genes

human_data <- GetAssayData(human_ann, assay = 'RNA', layer = human_default_layer)
human_data <- as.data.frame(human_data)

mice_data <- GetAssayData(mice_ann, assay = 'RNA', layer = mice_default_layer)
mice_data <- as.data.frame(mice_data)

rownames(mice_data) <- toupper(rownames(mice_data))

clean_gene_names <- function(gene_names) {
    gene_names <- gsub("\\..*", "", gene_names)  # 移除 '.' 及其之后的内容
    gene_names <- gsub("[^A-Za-z0-9]", "", gene_names)  # 移除非字母数字字符
    gene_names[gene_names == ""] <- NA  # 将空字符串替换为 NA
    return(gene_names)
}

human_genes <- rownames(human_data)
human_genes_clean <- clean_gene_names(human_genes)
human_data$gene <- human_genes_clean

mice_genes <- rownames(mice_data)
mice_genes_clean <- clean_gene_names(mice_genes)
mice_data$gene <- mice_genes_clean

human_data <- human_data[!is.na(human_data$gene), ]
mice_data <- mice_data[!is.na(mice_data$gene), ]

human_data <- human_data[!duplicated(human_data$gene), ]
mice_data <- mice_data[!duplicated(mice_data$gene), ]

rownames(human_data) <- human_data$gene
human_data$gene <- NULL

rownames(mice_data) <- mice_data$gene
mice_data$gene <- NULL

genes_of_interest_clean <- clean_gene_names(genes_of_interest)

genes_in_human <- intersect(genes_of_interest_clean, rownames(human_data))
genes_in_mouse <- intersect(genes_of_interest_clean, rownames(mice_data))
common_genes <- intersect(genes_in_human, genes_in_mouse)

human_data_subset <- human_data[common_genes, ]
mice_data_subset <- mice_data[common_genes, ]

human_gene_avg <- rowMeans(human_data_subset)

mice_gene_avg <- rowMeans(mice_data_subset)

expression_df <- data.frame(
    gene = common_genes,
    human_expression = human_gene_avg[common_genes],
    mice_expression = mice_gene_avg[common_genes]
)

ggplot(expression_df, aes(x = human_expression, y = mice_expression, label = gene)) +
    geom_point(color = "blue", size = 3) +
    geom_text(vjust = -1, hjust = 0.5, size = 3) +
    xlab("Huamn") +
    ylab("Mice") +
    ggtitle("CORR") +
    theme_minimal()