Based on tutorials like pachterlab-kallisto-sleuth-workshop-2016-e9cb2d3/sleuth.html

https://pachterlab.github.io/sleuth_walkthroughs/trapnell/analysis.html

https://pachterlab.github.io/sleuth_walkthroughs/pval_agg/analysis.html

In [None]:
options(repr.plot.width=16, repr.plot.height=9)

In [None]:
path='/francislab/data1/raw/20191008_Stanford71/trimmed/unpaired'
metadata='/francislab/data1/raw/20191008_Stanford71/metadata.csv'
suffix='kallisto.single.hp_11'

First, we load the sleuth package. Next, we load cowplot which has some nice formatting modifications of the standard ggplot2 figures.

In [None]:
library('sleuth')
library('cowplot')

In [None]:
print(paste0('Processing ',suffix))

Reading metadata

In [None]:
md <- read.table(metadata, sep=',',
  header=TRUE,
  stringsAsFactors = FALSE,
  colClasses='character')
head(md)

Only want the 2 columns, but metadata file may contain more.

In [None]:
md <- dplyr::select(md, c( 'id','cc') )
head(md)

Only non-empty rows ...

In [None]:
md[is.na(md)] <- 0
head(md)

In [None]:
nrow(md)

Rename "id" column to "sample".

In [None]:
md <- dplyr::rename(md, sample = id )
head(md)

Really could've / should've just had the metadata file like that.

Set paths to abundance files.

In [None]:
md <- dplyr::mutate(md, path = file.path( paste0(path,'/',sample,'.',suffix), 'abundance.h5'))
head(md)

In [None]:
#norm_factors(md)  # do or do not do???
#head(md)
#
# Not sure why this errors. Wrong order?
#
#Error in round(row): non-numeric argument to mathematical function
#Traceback:
#1. norm_factors(metadata)
#2. apply(mat, 1, function(row) !any(round(row) == 0))
#3. FUN(newX[, i], ...)

In [None]:
so <- sleuth_prep(md, extra_bootstrap_summary = TRUE)
# so <- sleuth_prep(stc, ~tissue + center, target_mapping = ttg, max_bootstrap = 30)
head(so)  # do??

In [None]:
plot_pca(so, text_labels = TRUE, color_by = 'cc')

Fitting full

In [None]:
so <- sleuth_fit(so, ~cc, 'full')

Fitting reduced

In [None]:
so <- sleuth_fit(so, ~1, 'reduced')

Performing likelihood ratio test

In [None]:
so <- sleuth_lrt(so, 'reduced', 'full')

In [None]:
models(so)

In [None]:
tests(so)

Obtaining differential expression results

In [None]:
sleuth_table <- sleuth_results(so, 'reduced:full', 'lrt', show_all = FALSE)
head(sleuth_table,20)

In [None]:
head(sleuth_table[order(sleuth_table$pval),],20)

In [None]:
sleuth_table_select <- dplyr::filter(sleuth_table, qval <= 0.05)
head(sleuth_table_select,20)

In [None]:
print('Looping over top 10')
print(head(sleuth_table[order(sleuth_table$pval),],10)[['target_id']])
for(ref in head(sleuth_table[order(sleuth_table$pval),],10)[['target_id']]){
  print(ref)
  print(plot_bootstrap(so, ref, units = 'est_counts', color_by = 'cc'))
}
print('end loop over top 10')

In [None]:
plot_pca(so, color_by = 'cc')

In [None]:
plot_pca(so, text_labels = TRUE, color_by = 'cc')

In [None]:
plot_group_density(so, use_filtered = TRUE, units = "est_counts",
  trans = "log", grouping = setdiff(colnames(so$sample_to_covariates),
  "sample"), offset = 1)