# Matrix eQTL

In [6]:
library("MatrixEQTL");

In [7]:
base.dir = '/data_volume/memento/lupus/mateqtl_input/'

In [8]:
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS

# Genotype file name
SNP_file_name = paste(base.dir, "filtered_genos.tsv", sep="");

# Covariates file name
# Set to character() for no covariates
covariates_file_name = paste(base.dir, "cm_cg.mateqtl_cov.txt", sep="");

# Output file name
# output_file_name_cis = paste(base.dir, "cm_cg_matqtl_cis.out", sep="");

# Only associations significant at this level will be saved
pvOutputThreshold = 1;
# pvOutputThreshold_tra = 0;


# Error covariance matrix
# Set to numeric() for identity.
errorCovariance = numeric();

In [9]:
## Load genotype data

snps = SlicedData$new();
snps$fileDelimiter = "\t";      # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;          # one row of column labels
snps$fileSkipColumns = 1;       # one column of row labels
snps$fileSliceSize = 100000;      # read file in slices of 2,000 rows
snps$LoadFile(SNP_file_name);


## Load covariates

cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t";      # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1;          # one row of column labels
cvrt$fileSkipColumns = 1;       # one column of row labels
if(length(covariates_file_name)>0) {
cvrt$LoadFile(covariates_file_name);
}

Rows read: 7917 done.

Rows read: 21 done.



In [12]:
# snps_location_file_name = paste(base.dir, "cm_cg.snpspos.txt", sep="");
# gene_location_file_name = paste(base.dir, "../geneloc.txt", sep="");

In [13]:
# cisDist = 1e5;


In [14]:
# snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);


In [15]:
# genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE, sep=",");

In [16]:
for (ct in c('B', 'T8', 'T4', 'NK', 'ncM', 'cM')){
    
#     ct <- 'cM'
    # Gene expression file name
    expression_file_name = paste(base.dir, "../pseudobulk/", ct,'_filtered.csv',sep="");
#     expression_file_name = paste(base.dir, "cm_cg.expr",sep="")
    output_file_name = paste(base.dir, "../mateqtl_output/",ct, "_filtered.out", sep="");


    ## Load gene expression data

    gene = SlicedData$new();
    gene$fileDelimiter = "\t";      # the TAB character
    gene$fileOmitCharacters = "NA"; # denote missing values;
    gene$fileSkipRows = 1;          # one row of column labels
    gene$fileSkipColumns = 1;       # one column of row labels
    gene$fileSliceSize = 2000;      # read file in slices of 2,000 rows
    gene$LoadFile(expression_file_name);


    me = Matrix_eQTL_main(
        snps = snps,
        gene = gene,
        cvrt = cvrt,
        output_file_name     = output_file_name,
        pvOutputThreshold     = pvOutputThreshold,
        useModel = useModel,
        errorCovariance = errorCovariance,
        verbose = TRUE,
    #     output_file_name.cis = output_file_name_cis,
    #     pvOutputThreshold.cis = pvOutputThreshold_cis,
    #     snpspos = snpspos,
    #     genepos = genepos,
    #     cisDist = cisDist,
        pvalue.hist = "qqplot",
        min.pv.by.genesnp = FALSE,
        noFDRsaveMemory = FALSE);
    
}

Rows read: 502 done.

Processing covariates

Task finished in 0.003 seconds

Processing gene expression data (imputation, residualization)

Task finished in 0.04 seconds

Creating output file(s)

Expected number of findings > 3974334”
Task finished in 0.097 seconds

Performing eQTL analysis

100.00% done, 3,974,334 eQTLs

Task finished in 21.541 seconds



Rows read: 836 done.

Processing covariates

Task finished in 0.002 seconds

Processing gene expression data (imputation, residualization)

Task finished in 0.055 seconds

Creating output file(s)

Expected number of findings > 6618612”
Task finished in 0.072 seconds

Performing eQTL analysis

100.00% done, 6,618,612 eQTLs

Task finished in 34.779 seconds



Rows read: 966 done.

Processing covariates

Task finished in 0.002 seconds

Processing gene expression data (imputation, residualization)

Task finished in 0.013 seconds

Creating output file(s)

Expected number of findings > 7647822”
Task finished in 0.09 seconds

Performing eQT