In [None]:
import numpy
import bayes_factors
import trait_simulation
import itertools

def is_colocalised(X, LD_matrix ,trait1, trait2,db=0):
    """
    With respect to a shared genotype X, Determine whether trait1 and trait2 are colocalised
    given an LD matric LD_matrix
    I.e. whether there is evidence that they share a genetic basis.
    """

    ### Get number of samples
    n = X.shape[0]
    
    ### generate individual linear models
    models1 = trait_simulation.build_linear_models(X,trait1)
    models2 = trait_simulation.build_linear_models(X,trait2)

    ### pull out slope and standard error terms.
    beta1 = [x.slope for x in models1]
    se_beta1 = [x.stderr for x in models1]

    beta2 = [x.slope for x in models2]
    se_beta2 = [x.stderr for x in models2]

    ### calculate z scores
    simulated_effectsize_data1 = ([x*numpy.sqrt(n) for x in beta1], LD_matrix, n)
    simulated_effectsize_data2 = ([x*numpy.sqrt(n) for x in beta2], LD_matrix, n)

    ### generate the gene set Bayes Factors
    gene_set_BFs1 = bayes_factors.calc_variant_set_BFs(simulated_effectsize_data1,k=4,v=0.01)
    gene_set_BFs2 = bayes_factors.calc_variant_set_BFs(simulated_effectsize_data2,k=4,v=0.01)
    

    ### calculate the posteriors
    gene_set_posteriors1 = bayes_factors.calc_posterior(gene_set_BFs1)
    gene_set_posteriors2 = bayes_factors.calc_posterior(gene_set_BFs2)
    
    if db == 1: 
        
        print gene_set_BFs1[0:10]
        print gene_set_BFs2[0:10]
        
        print gene_set_posteriors1[0:10]
        print gene_set_posteriors2[0:10]


    ### sort by posterior size
    gene_set_posteriors1.sort(key=lambda x: x[0], reverse=False)
    gene_set_posteriors2.sort(key=lambda x: x[0], reverse=False)

    ### select just toe posteriors
    posteriors1 = [x[1] for x in gene_set_posteriors1]
    posteriors2 = [x[1] for x in gene_set_posteriors2]

    ### generate cartesian product from the posteriors
    cart_product = list(itertools.product(posteriors1,posteriors2))

    gene_set_len1 = len(gene_set_posteriors1)
    gene_set_len2 = len(gene_set_posteriors2)

    ### calculate colocalisation posteriors with a specificed scoring function.
    colocalisations = numpy.array(map(lambda x: min(x[0],x[1]), cart_product)).reshape(gene_set_len1,gene_set_len2)


    ### pull out sorted set list
    sorted_setlist1 = [x[0] for x in gene_set_posteriors1]
    sorted_setlist2 = [x[0] for x in gene_set_posteriors2]

    if db == 1:
        
        ###  create bidirectional map from gene_set to positon in colocalisation array
        setlist_1map = bidict([(sorted_setlist1[i],i) for i in range(len(sorted_setlist1))])
        setlist_2map = bidict([(sorted_setlist1[i],i) for i in range(len(sorted_setlist2))])

        bf_1map = dict(gene_set_BFs1)
        bf_2map = dict(gene_set_BFs2)


        posterior1_map = dict(gene_set_posteriors1)
        posterior2_map = dict(gene_set_posteriors2)
        pdb.set_trace()

    ### output total evidence for colocalisation
    return sum([colocalisations[i][i] for i in range(colocalisations.shape[0])])

In [4]:
if __name__ == '__main__':
    %reset -f
    import numpy
    from sklearn import preprocessing
    import sys
    sys.path.append('/Users/fonz/Projects/Notebooks/Fine-mapping/src')
    import models
    

    gene_ratio_sets = [({8:1},{8:1}),
                 ({8:1},{8:2}),
                 ({8:1},{10:1}),
                 ({8:1, 10:1},{8:1, 10:1}),
                 ({8:1, 10:1},{8:1, 10:2}),
                 ({8:1, 10:1},{8:1, 15:1}),
                 ({8:1, 10:1, 12:1},{8:1, 10:1, 12:1}),
                 ({8:1, 10:1, 12:1},{8:1, 10:1, 12:2}),
                 ({8:1, 10:1, 12:1},{8:1, 10:1, 15:1}),
                ]
    ### set sample size
    n = 10000

    ### simulate genotypes and scale columns
    X = preprocessing.scale(models.trait_simulation.simulate_genotype(n, 30, (0.85, 0.1, 0.05)))

    ### calculate LD matrix
    LD_matrix = numpy.corrcoef(X,rowvar=0)

    for gr in gene_ratio_sets:        

        ### simulate two traits and scale columns
        y1 = preprocessing.scale(models.trait_simulation.simulate_traits(X, snp_ratios=gr[0], beta_var=0.2))
        y2 = preprocessing.scale(models.trait_simulation.simulate_traits(X, snp_ratios=gr[1], beta_var=0.2))

        print gr, models.colocalisation.is_colocalised(X,LD_matrix,y1,y2)

({8: 1}, {8: 1}) 0.985738914623
({8: 1}, {8: 2}) 0.967978797588
({8: 1}, {10: 1}) 0.00367235528804
({8: 1, 10: 1}, {8: 1, 10: 1}) 0.980846736048
({8: 1, 10: 1}, {8: 1, 10: 2}) 0.981668034489
({8: 1, 10: 1}, {8: 1, 15: 1}) 0.00308870570135
({8: 1, 10: 1, 12: 1}, {8: 1, 10: 1, 12: 1}) 1.0
({8: 1, 10: 1, 12: 1}, {8: 1, 10: 1, 12: 2}) 1.0
({8: 1, 10: 1, 12: 1}, {8: 1, 10: 1, 15: 1}) 0.0
