# Matrix eQTL

In [1]:
library("MatrixEQTL");

In [2]:
base.dir = '/data_volume/memento/lupus/full_analysis/mateqtl/inputs/'

In [3]:
pop <- 'eur'

In [4]:
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
SNP_file_name = paste(base.dir, pop, '_genos.tsv', sep="");
snps_location_file_name = paste(base.dir, pop, "_snpspos.txt", sep="");
covariates_file_name = paste(base.dir,  pop, '_mateqtl_cov.txt', sep="");
pvOutputThreshold_cis = 1;
errorCovariance = numeric();
cisDist = 100000;

gene_location_file_name = paste(base.dir, "geneloc_tss_hg19.txt", sep="");


## Load genotype data

snps = SlicedData$new();
snps$fileDelimiter = "\t";      # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;          # one row of column labels
snps$fileSkipColumns = 1;       # one column of row labels
snps$fileSliceSize = 100000;      # read file in slices of 2,000 rows
snps$LoadFile(SNP_file_name);


## Load covariates

cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t";      # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1;          # one row of column labels
cvrt$fileSkipColumns = 1;       # one column of row labels
if(length(covariates_file_name)>0) {
cvrt$LoadFile(covariates_file_name);
}



Rows read: 100,000

Rows read: 200,000

Rows read: 300,000

Rows read: 400,000

Rows read: 500,000

Rows read: 600,000

Rows read: 700,000

Rows read: 800,000

Rows read: 900,000

Rows read: 1,000,000

Rows read: 1,100,000

Rows read: 1,200,000

Rows read: 1,300,000

Rows read: 1,400,000

Rows read: 1,500,000

Rows read: 1,600,000

Rows read: 1,700,000

Rows read: 1,800,000

Rows read: 1,900,000

Rows read: 2,000,000

Rows read: 2,100,000

Rows read: 2,200,000

Rows read: 2,300,000

Rows read: 2,400,000

Rows read: 2,500,000

Rows read: 2,600,000

Rows read: 2,700,000

Rows read: 2,800,000

Rows read: 2,900,000

Rows read: 3,000,000

Rows read: 3,100,000

Rows read: 3,200,000

Rows read: 3285470 done.

Rows read: 35 done.



In [5]:
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE, sep='\t');

for (ct in c('B','T8', 'T4', 'NK', 'ncM', 'cM')){


    expression_file_name = paste(base.dir, "../pseudobulk/", pop, '_', ct, '.csv',sep="");
    output_file_name = paste(base.dir, "../outputs/", pop, '_', ct, '_all_hg19.csv', sep="");
    ## Load gene expression data

    gene = SlicedData$new();
    gene$fileDelimiter = "\t";      # the TAB character
    gene$fileOmitCharacters = "NA"; # denote missing values;
    gene$fileSkipRows = 1;          # one row of column labels
    gene$fileSkipColumns = 1;       # one column of row labels
    gene$fileSliceSize = 2000;      # read file in slices of 2,000 rows
    gene$LoadFile(expression_file_name);


    me = Matrix_eQTL_main(
        snps = snps,
        gene = gene,
        cvrt = cvrt,
#         output_file_name     = output_file_name,
#         pvOutputThreshold     = pvOutputThreshold,
        useModel = useModel,
        errorCovariance = errorCovariance,
        verbose = FALSE,
        output_file_name.cis = output_file_name,
        pvOutputThreshold.cis = pvOutputThreshold_cis,
        snpspos = snpspos,
        genepos = genepos,
        cisDist = cisDist,
        pvalue.hist = "qqplot",
        min.pv.by.genesnp = FALSE,
        noFDRsaveMemory = FALSE);
    
    }

Rows read: 2,000

Rows read: 4,000

Rows read: 6,000

Rows read: 7681 done.

7681 of 7681 genes matched

3285470 of 3285470 SNPs matched


 0.75% done, 44,834 cis-eQTLs, 2,338 trans-eQTLs

 1.51% done, 4,685 trans-eQTLs

 2.27% done, 6,692 trans-eQTLs

 3.03% done, 8,790 trans-eQTLs

 3.78% done, 109,452 cis-eQTLs, 10,730 trans-eQTLs

 4.54% done, 12,928 trans-eQTLs

 5.30% done, 15,141 trans-eQTLs

 6.06% done, 17,236 trans-eQTLs

 6.81% done, 172,533 cis-eQTLs, 19,388 trans-eQTLs

 7.57% done, 21,493 trans-eQTLs

 8.33% done, 23,669 trans-eQTLs

 9.09% done, 25,721 trans-eQTLs

 9.84% done, 218,145 cis-eQTLs, 28,287 trans-eQTLs

10.60% done, 30,120 trans-eQTLs

11.36% done, 32,109 trans-eQTLs

12.12% done, 33,963 trans-eQTLs

12.87% done, 294,641 cis-eQTLs, 36,516 trans-eQTLs

13.63% done, 38,131 trans-eQTLs

14.39% done, 39,999 trans-eQTLs

15.15% done, 41,537 trans-eQTLs

15.90% done, 343,971 cis-eQTLs, 43,705 trans-eQTLs

16.66% done, 45,786 trans-eQTLs

17.42% done, 47,603 trans-