-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_singleR.R
88 lines (75 loc) · 2.98 KB
/
run_singleR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
library(Seurat)
library(SingleR)
# 1. load facs and droplet data
facs_cell_ids <- readRDS('facs_cell_ids.rds')
droplet_cell_types <- readRDS('droplet_cell_types.rds')
facs_merged <- readRDS('facs_merged.rds')
facs_merged <- NormalizeData(facs_merged)
facs_merged <- as.SingleCellExperiment(facs_merged)
trained <- trainSingleR(facs_merged, facs_cell_ids, genes="de", sd.thresh=0.5, aggr.ref=TRUE)
# TODO: try using facs data, and also try using an existing reference.
all_predictions <- c()
for (cell in droplet_cell_types) {
print(cell)
query_data <- readRDS(paste('droplet_seurat', cell, '.rds'))
query_data <- NormalizeData(query_data)
query_data <- as.SingleCellExperiment(query_data)
predic <- classifySingleR(test=query_data, trained=trained)
p_table <- table(predic$labels)
print('table:')
print(p_table)
data_table <- as.data.frame(p_table)
data_table <- data_table[order(data_table$Freq),]
print(data_table)
all_predictions[[cell]] <- lapply(data_table[,'Var1'], as.character)
}
saveRDS(all_predictions, 'droplet_to_facs_predictions_singleR.rds')
for (cell in names(all_predictions)) {
print(cell)
results = rev(all_predictions[[cell]])
outstr <- paste(c(cell, results), collapse="\t")
write(outstr, 'droplet_to_facs_predictions_singleR.txt', append=TRUE)
write("\n", 'droplet_to_facs_predictions_singleR.txt', append=TRUE)
}
# try prediction using an existing reference
gene_intersection <- function(data1, data2) {
genes1 <- rownames(data1)
genes2 <- rownames(data2)
int <- intersect(genes1, genes2)
m1 <- match(int, genes1)
m2 <- match(int, genes2)
return(list('m1'=m1, 'm2'=m2))
}
# 1. load ref from celldex
library(celldex)
ref <- MouseRNAseqData()
gene_subsets <- gene_intersection(ref, query_data)
m1 <- gene_subsets$m1
m2 <- gene_subsets$m2
ref <- ref[m1]
ref_trained <- trainSingleR(ref, ref$label.main, genes="de", sd.thresh=0.5, aggr.ref=FALSE)
ref_all_predictions <- c()
for (cell in droplet_cell_types) {
print(cell)
query_data <- readRDS(paste('droplet_seurat', cell, '.rds'))
query_data <- query_data[m2]
query_data <- NormalizeData(query_data)
query_data <- as.SingleCellExperiment(query_data)
predic <- classifySingleR(test=query_data, trained=ref_trained)
p_table <- table(predic$labels)
print('table:')
print(p_table)
data_table <- as.data.frame(p_table)
data_table <- data_table[order(data_table$Freq),]
print(data_table)
ref_all_predictions[[cell]] <- lapply(data_table[,'Var1'], as.character)
}
saveRDS(ref_all_predictions, 'droplet_to_bulk_ref_predictions_singleR.rds')
ref_all_predictions <- readRDS('droplet_to_bulk_ref_predictions_singleR.rds')
for (cell in names(ref_all_predictions)) {
print(cell)
results = rev(ref_all_predictions[[cell]])
outstr <- paste(c(cell, results), collapse="\t")
write(outstr, 'droplet_to_bulk_ref_predictions_singleR.txt', append=TRUE)
write("\n", 'droplet_to_bulk_ref_predictions_singleR.txt', append=TRUE)
}