In [1]:
import numpy as np
import pandas as pd
import torch

In [2]:
def make_square(df):
    df = df.copy()
    index = df.index.union(df.columns)
    df = df.reindex(index=index, columns=index)
    return df

def make_symmetric(df):
    df = df.copy().fillna(0)
    diag = np.diag(np.diag(df))
    df = df + df.T - diag
    return df

def fill_diag(df, value):
    df = df.copy()
    np.fill_diagonal(df.values, value)
    return df

In [3]:
coheritability_df = pd.read_csv('../data/phenotypes_meta/coheritability.tsv', sep='\t')

coheritability_df.head(2)

Unnamed: 0,code_1,code_2,h2_liability_1,h2_liability_2,rg,rp,coheritability
0,A09,C18,-0.029687,0.120329,-1.013,0.022819,
1,A09,C34,-0.029687,0.116966,-0.07082,0.023801,


In [4]:
h2_df = pd.read_csv('../data/phenotypes_meta/heritability.tsv', sep='\t', usecols=['code', 'h2_liability'])

h2_df.head(2)

Unnamed: 0,code,h2_liability
0,A09,-0.029687
1,C18,0.120329


In [5]:
# Heritabilities are the main diagonal of the genetic covariance matrix
diagonal_df = (
    h2_df
    .merge(h2_df, on='code')
    .assign(code_2=lambda df: df['code'])
    .rename(columns={'code': 'code_1', 'h2_liability_x': 'rg'})
    .filter(items=['code_1', 'code_2', 'rg'])
)

diagonal_df.head(2)

Unnamed: 0,code_1,code_2,rg
0,A09,A09,-0.029687
1,C18,C18,0.120329


In [6]:
genetic_covariance_matrix_df = (
    pd.concat([coheritability_df[['code_1', 'code_2', 'rg']], diagonal_df], ignore_index=True)
    .pivot(index='code_1', columns='code_2', values='rg')
    .pipe(make_square)
    .pipe(make_symmetric)
)

genetic_covariance_matrix_df.to_csv('../data/qtphenproxy_data/genetic_covariance_matrix.tsv', sep='\t')

genetic_covariance_matrix_df.iloc[:5, :5]

Unnamed: 0,A09,C18,C34,C43,C44
A09,-0.029687,-1.013,-0.07082,-0.08575,0.4585
C18,-1.013,0.120329,0.2104,-0.1396,-0.02444
C34,-0.07082,0.2104,0.116966,-0.3188,0.05157
C43,-0.08575,-0.1396,-0.3188,0.081283,0.5355
C44,0.4585,-0.02444,0.05157,0.5355,0.141495


In [7]:
phenotypic_covariance_matrix_df = (
    coheritability_df
    .pivot(index='code_1', columns='code_2', values='rp')
    .pipe(make_square)
    .pipe(make_symmetric)
    .pipe(fill_diag, 1)
)

phenotypic_covariance_matrix_df.to_csv('../data/qtphenproxy_data/phenotypic_covariance_matrix.tsv', sep='\t')

phenotypic_covariance_matrix_df.iloc[:5, :5]

Unnamed: 0,A09,C18,C34,C43,C44
A09,1.0,0.022819,0.023801,0.002891,-0.003809
C18,0.022819,1.0,0.012019,0.000307,-0.002211
C34,0.023801,0.012019,1.0,0.005983,-0.004887
C43,0.002891,0.000307,0.005983,1.0,0.055742
C44,-0.003809,-0.002211,-0.004887,0.055742,1.0
