# Matrix eQTL

In [1]:
library("MatrixEQTL");

In [2]:
base.dir = '/data_volume/memento/lupus/full_analysis/mateqtl/inputs/'

In [3]:
pop <- 'asian'

In [4]:
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
SNP_file_name = paste(base.dir, pop, '_genos.tsv', sep="");
snps_location_file_name = paste(base.dir, pop, "_snpspos.txt", sep="");
covariates_file_name = paste(base.dir,  pop, '_mateqtl_cov.txt', sep="");
pvOutputThreshold_cis = 1;
errorCovariance = numeric();
cisDist = 100000;

gene_location_file_name = paste(base.dir, "geneloc_tss_hg19.txt", sep="");


## Load genotype data

snps = SlicedData$new();
snps$fileDelimiter = "\t";      # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;          # one row of column labels
snps$fileSkipColumns = 1;       # one column of row labels
snps$fileSliceSize = 100000;      # read file in slices of 2,000 rows
snps$LoadFile(SNP_file_name);


## Load covariates

cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t";      # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1;          # one row of column labels
cvrt$fileSkipColumns = 1;       # one column of row labels
if(length(covariates_file_name)>0) {
cvrt$LoadFile(covariates_file_name);
}



Rows read: 100,000

Rows read: 200,000

Rows read: 300,000

Rows read: 400,000

Rows read: 500,000

Rows read: 600,000

Rows read: 700,000

Rows read: 800,000

Rows read: 900,000

Rows read: 1,000,000

Rows read: 1,100,000

Rows read: 1,200,000

Rows read: 1,300,000

Rows read: 1,400,000

Rows read: 1,500,000

Rows read: 1,600,000

Rows read: 1,700,000

Rows read: 1,800,000

Rows read: 1,900,000

Rows read: 2,000,000

Rows read: 2,100,000

Rows read: 2,200,000

Rows read: 2,300,000

Rows read: 2,400,000

Rows read: 2,500,000

Rows read: 2,600,000

Rows read: 2,700,000

Rows read: 2,800,000

Rows read: 2,900,000

Rows read: 3,000,000

Rows read: 3,100,000

Rows read: 3,200,000

Rows read: 3285470 done.

Rows read: 35 done.



In [5]:
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE, sep='\t');

for (ct in c('T8', 'T4', 'NK', 'ncM', 'cM', 'B')){
# for (ct in c('B')){


    expression_file_name = paste(base.dir, "../pseudobulk/", pop, '_', ct, '.csv',sep="");
    output_file_name = paste(base.dir, "../outputs/", pop, '_', ct, '_all_hg19.csv', sep="");
    ## Load gene expression data

    gene = SlicedData$new();
    gene$fileDelimiter = "\t";      # the TAB character
    gene$fileOmitCharacters = "NA"; # denote missing values;
    gene$fileSkipRows = 1;          # one row of column labels
    gene$fileSkipColumns = 1;       # one column of row labels
    gene$fileSliceSize = 2000;      # read file in slices of 2,000 rows
    gene$LoadFile(expression_file_name);


    me = Matrix_eQTL_main(
        snps = snps,
        gene = gene,
        cvrt = cvrt,
#         output_file_name     = output_file_name,
#         pvOutputThreshold     = pvOutputThreshold,
        useModel = useModel,
        errorCovariance = errorCovariance,
        verbose = FALSE,
        output_file_name.cis = output_file_name,
        pvOutputThreshold.cis = pvOutputThreshold_cis,
        snpspos = snpspos,
        genepos = genepos,
        cisDist = cisDist,
        pvalue.hist = "qqplot",
        min.pv.by.genesnp = FALSE,
        noFDRsaveMemory = FALSE);
    
    }

Rows read: 2,000

Rows read: 4,000

Rows read: 6,000

Rows read: 7531 done.

7531 of 7531 genes matched

3285470 of 3285470 SNPs matched


 0.75% done, 40,622 cis-eQTLs, 2,780 trans-eQTLs

 1.51% done, 6,003 trans-eQTLs

 2.27% done, 9,949 trans-eQTLs

 3.03% done, 12,110 trans-eQTLs

 3.78% done, 102,495 cis-eQTLs, 14,197 trans-eQTLs

 4.54% done, 16,099 trans-eQTLs

 5.30% done, 18,420 trans-eQTLs

 6.06% done, 19,972 trans-eQTLs

 6.81% done, 164,339 cis-eQTLs, 24,562 trans-eQTLs

 7.57% done, 29,099 trans-eQTLs

 8.33% done, 32,208 trans-eQTLs

 9.09% done, 36,089 trans-eQTLs

 9.84% done, 209,051 cis-eQTLs, 38,931 trans-eQTLs

10.60% done, 41,076 trans-eQTLs

11.36% done, 43,724 trans-eQTLs

12.12% done, 45,724 trans-eQTLs

12.87% done, 281,226 cis-eQTLs, 48,017 trans-eQTLs

13.63% done, 49,639 trans-eQTLs

14.39% done, 51,576 trans-eQTLs

15.15% done, 52,555 trans-eQTLs

15.90% done, 326,690 cis-eQTLs, 53,936 trans-eQTLs

16.66% done, 55,312 trans-eQTLs

17.42% done, 56,864 trans