# Introduction

In this notebook I implement a basic fine-mapping method.

### Load modules and data

In [6]:
import numpy
import itertools as it
import math


s_tss_1=numpy.load('summary_stats_g1_tss60.npy')[0]
s_tss_2=numpy.load('summary_stats_g2_tss60.npy')[0]
LD_tss_1=numpy.load('LD_g1_TSS60.npy')
LD_tss_2=numpy.load('LD_g2_TSS60.npy')

### Generate z arrays

In [2]:
z1 = numpy.array(numpy.divide(s_tss_1['beta'],numpy.sqrt(s_tss_1['var_beta'])))
z2 = numpy.array(numpy.divide(s_tss_2['beta'],numpy.sqrt(s_tss_1['var_beta'])))
z1 = numpy.ndarray.flatten(z1)
z2 = numpy.ndarray.flatten(z2)

### Create selection of SNPs

In [3]:
def select_snps(z, subset):
    return [z[i] for i in subset]

#example
# for subset in it.combinations(range(len(z1)),3):
#     print subset, select_snps(z1, subset)    

### Select covariance submatrix

In [4]:
def select_cov(cov, subset):
    return cov[numpy.ix_(subset,subset)]

#example   
#select_cov(LD_tss_1, (0,1,5))

### Calculate Bayes Factor

In [22]:
def calc_BF(z, cov, v,n):
    z = numpy.matrix(z)
    z = z.T
    v = numpy.matrix(v)
    coeff = 1. / math.sqrt(numpy.linalg.det((numpy.matrix(numpy.eye(len(z))) + n * numpy.matrix(v) * numpy.matrix(cov)).I))
    exponent = 0.5* z.T * numpy.matrix((n*v.I + cov)).I * z
    return numpy.array(coeff * numpy.exp(exponent))[0][0]

# example
subset = (0,1,5,8)
cov = select_cov(LD_tss_1, subset)
z = select_snps(z1, subset)
v = numpy.eye(len(z))/1000
n = 1000
calc_BF(z,cov,v,n)


    
    

3.5062534142909358

In [28]:
numpy.matrix(v).I

matrix([[ 1000.,     0.,     0.,     0.],
        [    0.,  1000.,     0.,     0.],
        [    0.,     0.,  1000.,     0.],
        [    0.,     0.,     0.,  1000.]])

In [30]:
def choose_variant_set(data,k,v):
    results = []
    for i in range(1,k):
        for subset in it.combinations(range(len(data[0])),i):
            z = select_snps(data[0], subset)
            cov = select_cov(data[1],subset)
            n = data[2]
            v_matrix = numpy.eye(len(z)) * v
            results.append((subset, calc_BF(z, cov,v_matrix,n)))
    results.sort(key=lambda x: x[1], reverse=True)
    return results[0:10]

k=5
data1 = (z1, LD_tss_1,10000)
data2 = (z2, LD_tss_2,1000)
v = 0.000001
    
set1 = choose_variant_set(data1,k,v)
set2 = choose_variant_set(data2,k,v)

In [31]:
set1

[((5, 14, 20, 29), 1.0201000102521578),
 ((0, 14, 20, 29), 1.0200999980606142),
 ((2, 11, 18, 33), 1.0200999968504421),
 ((10, 14, 20, 29), 1.0200999954886145),
 ((7, 11, 15, 32), 1.0200999945245564),
 ((5, 14, 20, 32), 1.0200999920514213),
 ((7, 12, 15, 32), 1.0200999892529128),
 ((2, 11, 18, 29), 1.0200999864872113),
 ((5, 14, 20, 33), 1.0200999864414928),
 ((5, 14, 27, 32), 1.0200999842289329)]

In [32]:
set2

[((26, 28, 29, 33), 1.0020052234096173),
 ((26, 27, 29, 33), 1.0020052201331757),
 ((26, 29, 31, 33), 1.0020051899574445),
 ((9, 26, 29, 33), 1.0020051721291141),
 ((2, 26, 29, 33), 1.002005168638189),
 ((26, 29, 30, 33), 1.0020051654666102),
 ((6, 26, 29, 33), 1.002005163724669),
 ((3, 26, 29, 33), 1.0020051631158506),
 ((8, 26, 29, 33), 1.0020051594602768),
 ((4, 26, 29, 33), 1.0020051560124581)]