Based on tutorials like pachterlab-kallisto-sleuth-workshop-2016-e9cb2d3/sleuth.html

https://pachterlab.github.io/sleuth_walkthroughs/trapnell/analysis.html

https://pachterlab.github.io/sleuth_walkthroughs/pval_agg/analysis.html

In [None]:
options(repr.plot.width=16, repr.plot.height=9)

In [None]:
#path='/francislab/data1/raw/20191008_Stanford71/trimmed/unpaired'
#metadata='/francislab/data1/raw/20191008_Stanford71/metadata.csv'
#suffix='kallisto.single.hp_11'
path=Sys.getenv('datapath')
metadata=Sys.getenv('metadata')
suffix=Sys.getenv('suffix')

In [None]:
print(path)

In [None]:
print(metadata)

In [None]:
print(suffix)

First, we load the sleuth package. Next, we load cowplot which has some nice formatting modifications of the standard ggplot2 figures.

In [None]:
library('sleuth')
library('cowplot')
library('ggplot2')

In [None]:
print(paste0('Processing ',suffix))

Reading metadata

In [None]:
md <- read.table(metadata, sep=',',
  header=TRUE,
  stringsAsFactors = FALSE,
  colClasses='character')
head(md)

Only want the 2 columns, but metadata file may contain more.

In [None]:
md <- dplyr::select(md, c( 'id','cc') )
head(md)

Only non-empty rows ...

In [None]:
md[is.na(md)] <- 0
head(md)

In [None]:
nrow(md)

Rename "id" column to "sample".

In [None]:
md <- dplyr::rename(md, sample = id )
head(md)

Really could've / should've just had the metadata file like that.

Set paths to abundance files.

In [None]:
md <- dplyr::mutate(md, path = file.path( paste0(path,'/',sample,'.',suffix), 'abundance.h5'))
head(md)

In [None]:
#norm_factors(md)  # do or do not do???
#head(md)
#
# Not sure why this errors. Wrong order?
#
#Error in round(row): non-numeric argument to mathematical function
#Traceback:
#1. norm_factors(metadata)
#2. apply(mat, 1, function(row) !any(round(row) == 0))
#3. FUN(newX[, i], ...)

In [None]:
so <- sleuth_prep(md, extra_bootstrap_summary = TRUE)
# so <- sleuth_prep(stc, ~tissue + center, target_mapping = ttg, max_bootstrap = 30)
#head(so)  # do??

In [None]:
plot_pca(so, text_labels = TRUE, color_by = 'cc')

Fitting full

In [None]:
so <- sleuth_fit(so, ~cc, 'full')

Fitting reduced

In [None]:
so <- sleuth_fit(so, ~1, 'reduced')

Performing likelihood ratio test

In [None]:
so <- sleuth_lrt(so, 'reduced', 'full')

In [None]:
models(so)

In [None]:
tests(so)

Obtaining differential expression results

In [None]:
sleuth_table <- sleuth_results(so, 'reduced:full', 'lrt', show_all = FALSE)
head(sleuth_table,20)

In [None]:
head(sleuth_table[order(sleuth_table$pval),],20)

In [None]:
sleuth_table_select <- dplyr::filter(sleuth_table, qval <= 0.05)
head(sleuth_table_select,20)

In [None]:
print('Looping over top 10')
print(head(sleuth_table[order(sleuth_table$pval),],10)[['target_id']])
for(ref in head(sleuth_table[order(sleuth_table$pval),],10)[['target_id']]){
  l=sleuth_table[sleuth_table$target_id == ref, ]
  #print(l$target_id)
  #print(l$pval)
  #print(l$qval)
  p <- plot_bootstrap(so, ref, units = 'est_counts', color_by = 'cc') + 
   ggtitle(paste(ref,' - pval',l$pval, 'qval',l$qval, sep=' : ')) + 
   #labs(title = paste(ref,' - pval',l$pval, 'qval',l$qval, sep=' : ')) + 
   theme( plot.title = element_text( size=30, face='bold', hjust = 0.5 ) )
  print(p)
}
print('end loop over top 10')

In [None]:
plot_pca(so, color_by = 'cc')

In [None]:
plot_pca(so, text_labels = TRUE, color_by = 'cc')

In [None]:
plot_group_density(so, use_filtered = TRUE, units = "est_counts",
  trans = "log", grouping = setdiff(colnames(so$sample_to_covariates),
  "sample"), offset = 1)

In [None]:

#obj - a sleuth object
#which_df - character vector of length one. Which type of data to use ("obs_norm" or "obs_raw")
#which_units - character vector of length one. Which units to use ("tpm" or "est_counts")

sm <- sleuth_to_matrix(so, 'obs_norm', 'est_counts')
head(sm)

In [None]:
sm <- sleuth_to_matrix(so, 'obs_raw', 'est_counts')
head(sm)

In [None]:
sm <- sleuth_to_matrix(so, 'obs_norm', 'tpm')
head(sm)

In [None]:
sm <- sleuth_to_matrix(so, 'obs_raw', 'tpm')
head(sm)

In [None]:
sm <- sleuth_to_matrix(so, 'obs_norm', 'est_counts')
pc<-prcomp(t(sm)) #$data))

In [None]:
head(pc$x[,1])

In [None]:
md$cc

In [None]:
md$colors = 'Red'
head(md$colors)

In [None]:
md$colors[md$cc == 'Case'] = 'Blue'
head(md$colors)

In [None]:
plot(pc$x[,1],pc$x[,2],
  col=md$colors,
  main='PCA of normalized and est counts')


From https://www.biostars.org/p/282685


In [None]:
project.pca <- pc #prcomp(t(MyReadCountMatrix))

In [None]:
summary(project.pca)

In [None]:
#Determine the proportion of variance of each component
#Proportion of variance equals (PC stdev^2) / (sum all PCs stdev^2)
project.pca.proportionvariances <- ((project.pca$sdev^2) / (sum(project.pca$sdev^2)))*100

In [None]:
barplot(project.pca.proportionvariances, 
        cex.names=1, 
        xlab=paste("Principal component (PC), 1-", length(project.pca$sdev)), 
        ylab="Proportion of variation (%)", 
        main="Scree plot", 
        ylim=c(0,100))

In [None]:
par(cex=1.0, cex.axis=0.8, cex.main=0.8)
pairs(project.pca$x[,1:5], col=md$colors, 
      main="Principal components analysis bi-plot\nPCs 1-5", pch=16)
pairs(project.pca$x[,6:10], col=md$colors, 
      main="Principal components analysis bi-plot\nPCs 6-10", pch=16)

In [None]:
par(mar=c(4,4,4,4), mfrow=c(1,3), cex=1.0, cex.main=0.8, cex.axis=0.8)

#Plots scatter plot for PC 1 and 2
plot(project.pca$x, type="n", 
     main="Principal components analysis bi-plot", 
     xlab=paste("PC1, ", round(project.pca.proportionvariances[1], 2), "%"), 
     ylab=paste("PC2, ", round(project.pca.proportionvariances[2], 2), "%"))
points(project.pca$x, col=md$colors, pch=16, cex=1)

#Plots scatter plot for PC 1 and 3
plot(project.pca$x[,1], project.pca$x[,3], type="n", 
     main="Principal components analysis bi-plot", 
     xlab=paste("PC1, ", round(project.pca.proportionvariances[1], 2), "%"), 
     ylab=paste("PC3, ", round(project.pca.proportionvariances[3], 2), "%"))
points(project.pca$x[,1], project.pca$x[,3], 
       col=md$colors, pch=16, cex=1)

#Plots scatter plot for PC 2 and 3
plot(project.pca$x[,2], project.pca$x[,3], type="n", 
     main="Principal components analysis bi-plot", 
     xlab=paste("PC2, ", round(project.pca.proportionvariances[2], 2), "%"), 
     ylab=paste("PC3, ", round(project.pca.proportionvariances[3], 2), "%"))
points(project.pca$x[,2], project.pca$x[,3],
       col=md$colors, pch=16, cex=1)