Skip to content

Commit

Permalink
incorporating utility function from base image
Browse files Browse the repository at this point in the history
  • Loading branch information
Brian Lawney committed Feb 15, 2024
1 parent 076736e commit fa806d4
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 62 deletions.
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
FROM ghcr.io/web-mev/base-spatialge-docker:sha-09c68030bc197fbdc91b712b3e54597b4896eb91
FROM ghcr.io/web-mev/base-spatialge-docker:sha-b9c9d80863af47a4ac85f420b96fd1ff57e2cde7

ADD stnormalize.R /usr/local/bin
67 changes: 6 additions & 61 deletions docker/stnormalize.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
suppressMessages(suppressWarnings(library('spatialGE')))
suppressMessages(suppressWarnings(library('optparse')))
source('/usr/local/bin/prep_stlist.R')

# args from command line:
args <- commandArgs(TRUE)
Expand Down Expand Up @@ -59,72 +60,16 @@ if (is.null(opt$normalization)){
working_dir <- dirname(opt$input_file)
setwd(working_dir)

# STlist expects that the first column is the gene names- so we don't use row.names arg
rnacounts <- read.table(opt$input_file, sep='\t', header=T, check.names=F)
# call the utility function which will return a list with the necessary items:
spat_list <- prep_stlist(opt$input_file, opt$coordinates_file, opt$sample_name)

# Same as for the counts, the expectation is that the coordinates file does not
# have row.names and instead has the barcodes in the first column. For now, however,
# we set the row names and then later alter.
spotcoords <- read.table(opt$coordinates_file, sep='\t', row.names=1, header=T, check.names=T)

# the barcodes in coords dataframe can be a superset of the count matrix columns.
# For example, if the matrix is filtered for QC, there may be poor quality spots
# that were filtered out.
# The opposite is not the case since we cannot have a barcode without a position.
barcodes_from_counts <- colnames(rnacounts)[2:dim(rnacounts)[2]]
diff_set <- setdiff(barcodes_from_counts, rownames(spotcoords))
num_invalid_barcodes <- length(diff_set)
if (num_invalid_barcodes > 0) {
max_print <- 5
if (num_invalid_barcodes < max_print) {
invalid_barcodes <- paste(diff_set[1:num_invalid_barcodes], collapse=', ')
} else {
invalid_barcodes <- sprintf('%s, and %d others.', paste(diff_set[1:max_print], collapse=', '), num_invalid_barcodes-max_print)
}
message(sprintf('The set of barcodes in your count matrix must be a subset of those in your coordinates file. Problems include: %s', invalid_barcodes))
quit(status=1)
} else {
# the STList constructor below will not accept coordinate files which are a superset of the
# count matrix barcodes. We handle that here:
spotcoords <- spotcoords[barcodes_from_counts,]

# we need the barcodes in the first col, not the row names. We used the rownames for indexing
# convenience above, but need to change that now:
spotcoords <- cbind(rownames(spotcoords), data.frame(spotcoords, row.names=NULL))
}

# Now, to avoid any unexpected issues downstream, we need to conver the column names, etc.
# to preserve the barcodes/column names, we create a dataframe of the original and 'R mutated'
# names. We then run through everything with the mutated names and finally map back.
orig_col_names <- colnames(rnacounts)
proper_names <- make.names(orig_col_names)
colname_mapping = data.frame(
orig_names = orig_col_names,
row.names=proper_names,
stringsAsFactors=F)
colnames(rnacounts) <- proper_names


spotcoords[,1] <- make.names(spotcoords[,1])
rownames(spotcoords) <- make.names(rownames(spotcoords))

# We will use a list of dataframes in the call to STlist
rnacounts_list <- list()
rnacounts_list[[opt$sample_name]] <- rnacounts
spotcoords_list <- list()
spotcoords_list[[opt$sample_name]] <- spotcoords

# Import input data into spatialGE object
spat <- STlist(
rnacounts=rnacounts_list,
spotcoords=spotcoords_list,
samples=c(opt$sample_name)
)
# unpack:
colname_mapping <- spat_list$colname_mapping
spat <- spat_list$spat

# Transform the data
spat <- transform_data(spat, method=norm_scheme)


# Export of the normalized data to full matrix flat file
# Note: we could convert to a .h5
df <- data.frame(
Expand Down

0 comments on commit fa806d4

Please sign in to comment.