# Matrix eQTL

In [1]:
library("MatrixEQTL");

In [2]:
base.dir = '/data_volume/memento/lupus/full_analysis/mateqtl/inputs/'

In [3]:
pop <- 'eur'

In [4]:
useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
SNP_file_name = paste(base.dir, pop, '_genos.tsv', sep="");
snps_location_file_name = paste(base.dir, pop, "_snpspos.txt", sep="");
covariates_file_name = paste(base.dir,  pop, '_mateqtl_cov.txt', sep="");
pvOutputThreshold_cis = 1;
errorCovariance = numeric();
cisDist = 100000;

gene_location_file_name = paste(base.dir, "geneloc_tss_hg19.txt", sep="");


## Load genotype data

snps = SlicedData$new();
snps$fileDelimiter = "\t";      # the TAB character
snps$fileOmitCharacters = "NA"; # denote missing values;
snps$fileSkipRows = 1;          # one row of column labels
snps$fileSkipColumns = 1;       # one column of row labels
snps$fileSliceSize = 100000;      # read file in slices of 2,000 rows
snps$LoadFile(SNP_file_name);


## Load covariates

cvrt = SlicedData$new();
cvrt$fileDelimiter = "\t";      # the TAB character
cvrt$fileOmitCharacters = "NA"; # denote missing values;
cvrt$fileSkipRows = 1;          # one row of column labels
cvrt$fileSkipColumns = 1;       # one column of row labels
if(length(covariates_file_name)>0) {
cvrt$LoadFile(covariates_file_name);
}



Rows read: 100,000

Rows read: 200,000

Rows read: 300,000

Rows read: 400,000

Rows read: 500,000

Rows read: 600,000

Rows read: 700,000

Rows read: 800,000

Rows read: 900,000

Rows read: 1,000,000

Rows read: 1,100,000

Rows read: 1,200,000

Rows read: 1,300,000

Rows read: 1,400,000

Rows read: 1,500,000

Rows read: 1,600,000

Rows read: 1,700,000

Rows read: 1,800,000

Rows read: 1,900,000

Rows read: 2,000,000

Rows read: 2,100,000

Rows read: 2,200,000

Rows read: 2,300,000

Rows read: 2,400,000

Rows read: 2,500,000

Rows read: 2,600,000

Rows read: 2,700,000

Rows read: 2,800,000

Rows read: 2,900,000

Rows read: 3,000,000

Rows read: 3,100,000

Rows read: 3,200,000

Rows read: 3285470 done.

Rows read: 35 done.



In [10]:
snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE, sep='\t');

for (ct in c('B','T8', 'T4', 'NK', 'ncM', 'cM')){


    expression_file_name = paste(base.dir, "../pseudobulk/", pop, '_', ct, '.csv',sep="");
    output_file_name = paste(base.dir, "../outputs/", pop, '_', ct, '_all_hg19.csv', sep="");
    ## Load gene expression data

    gene = SlicedData$new();
    gene$fileDelimiter = "\t";      # the TAB character
    gene$fileOmitCharacters = "NA"; # denote missing values;
    gene$fileSkipRows = 1;          # one row of column labels
    gene$fileSkipColumns = 1;       # one column of row labels
    gene$fileSliceSize = 2000;      # read file in slices of 2,000 rows
    gene$LoadFile(expression_file_name);


    me = Matrix_eQTL_main(
        snps = snps,
        gene = gene,
        cvrt = cvrt,
#         output_file_name     = output_file_name,
#         pvOutputThreshold     = pvOutputThreshold,
        useModel = useModel,
        errorCovariance = errorCovariance,
        verbose = FALSE,
        output_file_name.cis = output_file_name,
        pvOutputThreshold.cis = pvOutputThreshold_cis,
        snpspos = snpspos,
        genepos = genepos,
        cisDist = cisDist,
        pvalue.hist = "qqplot",
        min.pv.by.genesnp = FALSE,
        noFDRsaveMemory = FALSE);
    
    }

Rows read: 2,000

Rows read: 3945 done.

3886 of 3945 genes matched

3285470 of 3285470 SNPs matched


 1.51% done, 20,341 cis-eQTLs, 2,128 trans-eQTLs

 3.03% done, 4,230 trans-eQTLs

 4.54% done, 44,741 cis-eQTLs, 6,513 trans-eQTLs

 6.06% done, 9,052 trans-eQTLs

 7.57% done, 76,495 cis-eQTLs, 11,117 trans-eQTLs

 9.09% done, 13,242 trans-eQTLs

10.60% done, 97,091 cis-eQTLs, 15,417 trans-eQTLs

12.12% done, 17,120 trans-eQTLs

13.63% done, 141,077 cis-eQTLs, 19,088 trans-eQTLs

15.15% done, 20,606 trans-eQTLs

16.66% done, 162,484 cis-eQTLs, 22,618 trans-eQTLs

18.18% done, 24,292 trans-eQTLs

19.69% done, 172,970 cis-eQTLs, 25,576 trans-eQTLs

21.21% done, 26,925 trans-eQTLs

22.72% done, 186,519 cis-eQTLs, 28,586 trans-eQTLs

24.24% done, 30,254 trans-eQTLs

25.75% done, 205,461 cis-eQTLs, 31,766 trans-eQTLs

27.27% done, 33,922 trans-eQTLs

28.78% done, 223,983 cis-eQTLs, 35,975 trans-eQTLs

30.30% done, 37,382 trans-eQTLs

31.81% done, 254,279 cis-eQTLs, 38,884 trans-eQTLs

33.