# VGAM

This shows how to apply VGAM to on a sample scRNA-seq dataset which contains 17 clusters and each cluster contains 20 genes. It cannot return results for most genes.

In [None]:
# install.packages("VGAM")

In [1]:
library(VGAM)
library(matrixStats)
library(magrittr)
library(ggplot2)
library(biomaRt)
library(data.table)

Loading required package: stats4

Loading required package: splines



In [2]:
df_g = read.csv(file = '../data/model_sel_genes.csv')
df_m = read.csv(unz('../data/meta.zip', "meta.tsv"),sep ="\t")
df = read.csv(unz('../data/model_sel_count.zip', "model_sel_count.csv"))

In [3]:
setnames(df_m, 
         old = c('post.mortem.interval..hours.', 'RNA.Integrity.Number', 'RNA.mitochondr..percent', 'RNA.ribosomal.percent'), 
         new = c('PMI', 'RIN', 'ribo_pct', 'mito_pct')
        )

In [4]:
df_m['UMIs_log']=log(df_m['UMIs'])
df_m['genes_log']=log(df_m['genes'])

In [5]:
normFunc <- function(x){(x-mean(x, na.rm = T))/sd(x, na.rm = T)}
features = c("UMIs",
    "genes",
    "UMIs_log",
    "genes_log",
    "sex",
    "age",
    "Capbatch",
    "PMI",
    "RIN",
    "ribo_pct",
    "mito_pct")

f_to_norm  = c('UMIs_log',
 'age',
 'PMI',
 'RIN',
 'ribo_pct',
 'mito_pct')

clusters = unique(df_m[,'cluster'])

In [6]:
formula_base =paste('~',paste(features,collapse='+'),sep='')

In [7]:
df_r = NULL
for(cluster in clusters) {
    print(cluster)
    gene_ids=df_g[df_g$cluster==cluster,'gene_id']
    df_f = df_m[df_m$cluster==cluster,features]
    df_f[f_to_norm] <- apply(df_f[f_to_norm], 2, normFunc)
    Y = t(df[df$cell %in% df_m[df_m$cluster==cluster,'cell'],names(df) %in% gene_ids])
    X=with(df_f,model.matrix(as.formula(formula_base)))
    
    # VGAM
    data = cbind(df_f,t(Y))
    ts = c()
    llfs = c()
    for(gene_id in rownames(Y)) {
        rrzinb = NULL
        
        start_time <- Sys.time()
        try({
              rrzinb <- rrvglm(as.formula(paste(gene_id,formula_base)), 
                             zinegbinomial(zero = NULL), data = data,
                             Index.corner = 2, str0 = 3, control = rrvglm.control(trace = F,noWarning=T))
        })
        end_time <- Sys.time()
        t = difftime(end_time, start_time, units = "secs")
        
        ts = c(ts,t)
        if(is.null(rrzinb) || class(rrzinb) == 'character') {
            llfs = c(llfs,NA)
        }
        else {
            llfs = c(llfs,logLik(rrzinb))
        }
    }
    df_t <- data.frame("gene_id"=rownames(Y))
    df_t['cpu_time']=ts
    df_t['llf']=llfs
    
    if(is.null(df_r)) {
        df_r = df_t
    }
    else {
        df_r = rbind(df_r,df_t)
    }
    write.csv(df_r,'VGAM.csv')
}

[1] "Neu-NRGN-II"


“from the initial values only, the data appears to have little or no 0-inflation, and possibly 0-deflation.”


Error in checkwz(wz, M = M, trace = trace, wzepsilon = control$wzepsilon) : 
  NAs found in the working weights variable 'wz'


“from the initial values only, the data appears to have little or no 0-inflation, and possibly 0-deflation.”


Error in checkwz(wz, M = M, trace = trace, wzepsilon = control$wzepsilon) : 
  NAs found in the working weights variable 'wz'


“1744 diagonal elements of the working weights variable 'wz' have been replaced by 1.819e-12”
“1896 diagonal elements of the working weights variable 'wz' have been replaced by 1.819e-12”
“404 diagonal elements of the working weights variable 'wz' have been replaced by 1.819e-12”
“437 diagonal elements of the working weights variable 'wz' have been replaced by 1.819e-12”
“1300 diagonal elements of the working weights variable 'wz' have been replaced by 1.819e-12”
“1284 diagonal elements of the working weights variable 'wz' have been replaced by 1.819e-12”
“690 diagonal elements of the working weights variable 'wz' have been replaced by 1.819e-12”
“707 diagonal elements of the working weights variable 'wz' have been replaced by 1.819e-12”
“1069 diagonal elements of the working weights variable 'wz' have been replaced by 1.819e-12”
“1067 diagonal elements of the working weights variable 'wz' have been replaced by 1.819e-12”
“822 diagonal elements of the working weights variable 'wz' have

Error in checkwz(wz, M = M, trace = trace, wzepsilon = control$wzepsilon) : 
  NAs found in the working weights variable 'wz'


“from the initial values only, the data appears to have little or no 0-inflation, and possibly 0-deflation.”


Error in checkwz(wz, M = M, trace = trace, wzepsilon = control$wzepsilon) : 
  NAs found in the working weights variable 'wz'
