In [1]:
import numpy as np
import pandas as pd
import torch
import tqdm

from plotnine import *

In [2]:
import sys

sys.path.append('../qtphenproxy_h2/')

import model

In [3]:
target_to_icd10 = {'stroke': 'I63', 'MI': 'I21'}

In [4]:
heritability_df = pd.read_csv('../data/phenotypes_meta/heritability.tsv', sep='\t')

heritability_df.head(2)

Unnamed: 0,code,n_cases,n_controls,h2_liability,h2_liability_se,h2_z,h2_p
0,A09,2161.0,359033.0,-0.029687,0.043285,-0.685859,0.753599
1,C18,2226.0,358968.0,0.120329,0.042461,2.833887,0.002299


In [5]:
genetic_covariance_matrix_df = pd.read_csv('../data/qtphenproxy_data/genetic_covariance_matrix.tsv',
                                           sep='\t', index_col=0)

genetic_covariance_matrix_df.iloc[:3, :3]

Unnamed: 0,A09,C18,C34
A09,-0.029687,-1.013,-0.07082
C18,-1.013,0.120329,0.2104
C34,-0.07082,0.2104,0.116966


In [6]:
phenotypic_covariance_matrix_df = pd.read_csv('../data/qtphenproxy_data/phenotypic_covariance_matrix.tsv',
                                              sep='\t', index_col=0)

phenotypic_covariance_matrix_df.iloc[:3, :3]

Unnamed: 0,A09,C18,C34
A09,1.0,0.022819,0.023801
C18,0.022819,1.0,0.012019
C34,0.023801,0.012019,1.0


In [7]:
phenotypes_data_df = pd.read_csv('../data/phenotypes/relevant_occurrences_wide.tsv', 
                                 sep='\t', index_col=0)

phenotypes_data_df.head(0)

Unnamed: 0_level_0,A09,C18,C34,C43,C44,C50,C67,D12,D17,D22,...,Z01,Z03,Z08,Z09,Z12,Z42,Z43,Z45,Z47,Z53
IID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1


# Train model

In [8]:
def train(code, learning_rate=0.001, n_iter=5000, name=None):
    """Simple helper function for running this multiple times"""
    fitter = model.CombinationFitter.from_tables(code, genetic_covariance_matrix_df, 
                                                 phenotypic_covariance_matrix_df, phenotypes_data_df)
    fitter.fit(n_iter=n_iter, seed=0, learning_rate=learning_rate, verbose=False)

    if name is None:
        name = f'saved_qtphenproxy_{code}'
    fitter.save_fit(f'../data/{name}', person_ids=phenotypes_data_df.index.tolist())
    return fitter

In [24]:
mod1 = train('I21')

  0%|          | 0/10 [00:00<?, ?it/s]

In [25]:
mod2 = train('I63')

  0%|          | 0/10 [00:00<?, ?it/s]