In [1]:
import pathlib

import pandas as pd

In [2]:
codes_df = pd.read_csv('../data/chosen_icd_codes.csv', header=None, names=['code'])

codes_df.head(2)

Unnamed: 0,code
0,A09
1,C18


# Heritability information

In [3]:
h2_df = pd.read_csv('../data/ukb31063_h2_all.02Oct2019.tsv.gz', sep='\t', compression='gzip')

h2_df.head(2)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,phenotype,sex,dilute,gwas_file,variable_type,n_missing,source,description,n,n_cases,...,isBadPower,isLowNeff,isMidNeff,isExtremeSE,isHighSE,isSexBias,isBadOrdinal,isNumericOrdinal,conf_simple,h2_sig
0,100001_irnt,both_sexes,,100001_irnt.gwas.imputed_v3.both_sexes.tsv.bgz,continuous_irnt,309741,phesant,Food weight,51453,,...,False,False,False,False,False,False,False,False,high,z4
1,100001_raw,both_sexes,,100001_raw.gwas.imputed_v3.both_sexes.tsv.bgz,continuous_raw,309741,phesant,Food weight,51453,,...,False,False,False,False,False,False,False,False,,


In [4]:
relevant_h2_df = (
    h2_df
    .query('sex == "both_sexes"')
    .merge(codes_df, left_on='phenotype', right_on='code')
    .filter(items=['phenotype', 'n_cases', 'n_controls', 'h2_liability', 
                   'h2_liability_se', 'h2_z', 'h2_p'])
)

relevant_h2_df.to_csv('../data/relevant_h2.tsv', sep='\t', index=False)

relevant_h2_df.head(2)

Unnamed: 0,phenotype,n_cases,n_controls,h2_liability,h2_liability_se,h2_z,h2_p
0,A09,2161.0,359033.0,-0.029687,0.043285,-0.685859,0.753599
1,C18,2226.0,358968.0,0.120329,0.042461,2.833887,0.002299


# Genetic correlation information

In [5]:
rg_df = pd.read_csv('../data/geno_correlation.r2', sep='\s+')

rg_df.head(2)

Unnamed: 0,p1,p2,rg,se,z,p,h2_obs,h2_obs_se,h2_int,h2_int_se,gcov_int,gcov_int_se
0,../../results/UKbb/ldsc-additive-export/sumsta...,../../results/UKbb/ldsc-additive-export/sumsta...,0.9998,0.002135,468.4,0.0,0.06278,0.009665,1.006,0.006722,0.9886,0.006696
1,../../results/UKbb/ldsc-additive-export/sumsta...,../../results/UKbb/ldsc-additive-export/sumsta...,0.6925,0.09116,7.597,3.041e-14,0.04259,0.009526,1.007,0.006756,0.5438,0.00528


In [6]:
relevant_rg_df = (
    rg_df
    .filter(items=['p1', 'p2', 'rg', 'se', 'z', 'p'])
    .assign(
        parts_1=lambda df: df['p1'].apply(lambda x: pathlib.Path(x).name.split('.')),
        sex_1=lambda df: df['parts_1'].apply(lambda x: x[3]),
        p1=lambda df: df['parts_1'].apply(lambda x: x[0]),
        
        parts_2=lambda df: df['p2'].apply(lambda x: pathlib.Path(x).name.split('.')),
        sex_2=lambda df: df['parts_2'].apply(lambda x: x[3]),
        p2=lambda df: df['parts_2'].apply(lambda x: x[0]),
    )
    .query('(sex_1 == "both_sexes") & (sex_2 == "both_sexes")')
    .merge(codes_df, left_on='p1', right_on='code')
    .merge(codes_df, left_on='p2', right_on='code')
    .drop(columns=['parts_1', 'sex_1', 'parts_2', 'sex_2', 'code_x', 'code_y'])
)

relevant_rg_df.to_csv('../data/relevant_rg.tsv', sep='\t', index=False)

relevant_rg_df.head(2)

Unnamed: 0,p1,p2,rg,se,z,p
0,A09,C18,-1.013,1.092,-0.9279,0.3535
1,A09,C34,-0.07082,0.6842,-0.1035,0.9176


# Phenotypic correlation information