In [1]:
import pathlib
import shlex
import subprocess
from string import Template

import numpy as np
import pandas as pd
import polars as pl
import tqdm.notebook as tqdm

# Format simulated data

## Recode the chromosome in genotypes

In [2]:
(
    pl.read_csv("data/simulated-phenotypes/Genotypes.bim", separator="\t", has_header=False)
    .with_columns(pl.lit(1).alias("column_1"))
    .write_csv("data/simulated-phenotypes/Genotypes.bim", separator="\t", include_header=False)
)

## Add names to the phenotype file

In [3]:
(
    pl.read_csv("data/simulated-phenotypes/Ysim_plink.txt", separator="\t", has_header=False)
    .rename({
        "column_1": "FID",
        "column_2": "IID",
        "column_3": "feature_01",
        "column_4": "feature_02",
        "column_5": "feature_03",
        "column_6": "feature_04",
        "column_7": "feature_05",
        "column_8": "feature_06",
        "column_9": "feature_07",
        "column_10": "feature_08",
        "column_11": "feature_09",
        "column_12": "feature_10",
    })
    .write_csv("data/simulated-phenotypes/Ysim_plink_names.txt", separator="\t")
)

## Combine phenotypes and covariates into a single file

Needed for SAIGE

In [4]:
(
    pl.read_csv("data/simulated-phenotypes/Ysim_plink_names.txt", separator="\t")
    .join(
        pl.read_csv("data/simulated-phenotypes/Covs_plink.txt", separator="\t"), 
        on=["FID", "IID"]
    )
    .write_csv("data/simulated-phenotypes/Ysim_saige.txt", separator="\t")
)

# Setup directories

In [5]:
pathlib.Path("data/grm/").mkdir(exist_ok=True)

pathlib.Path("data/projected-phenotypes/").mkdir(exist_ok=True)

gwas_root = pathlib.Path("data/gwas/")
gwas_root.mkdir(exist_ok=True)

gwas_methods = ["ols_univariate", "ols_multivariate", "fastgwa", "saige", "regenie"]
for gwas_method in gwas_methods:
    gwas_root.joinpath(gwas_method).mkdir(exist_ok=True)
    gwas_root.joinpath(gwas_method).joinpath("features").mkdir(exist_ok=True)
    gwas_root.joinpath(gwas_method).joinpath("projections").mkdir(exist_ok=True)

# Random linear combinations

In [6]:
np.random.seed(0)

n_features = 10
n_projections = 10

projection_matrix = np.random.normal(size=(n_features, n_projections))
projection_names = [f"projection_{i:03}" for i in range(1, n_projections + 1)]
feature_names = [f"feature_{i:02}" for i in range(1, n_features + 1)]
projection_df = pd.DataFrame(projection_matrix, columns=projection_names, index=feature_names)

feature_df = (
    pl.read_csv("data/simulated-phenotypes/Ysim_plink_names.txt", separator="\t")
    .to_pandas()
    .set_index(["FID", "IID"])
)

# Normalize the coefficients so that projections have unit variance
projection_df /= np.sqrt(np.diag(projection_df.T @ feature_df.cov() @ projection_df))

projected_df = feature_df @ projection_df

assert np.allclose(feature_df.mean(), 0)
assert np.allclose(projected_df.mean(), 0)

assert np.allclose(np.diag(feature_df.cov()), 1)
assert np.allclose(np.diag(projected_df.cov()), 1)

(
    projection_df
    .reset_index()
    .pipe(pl.DataFrame)
    .write_csv("data/projected-phenotypes/projection_matrix.tsv", separator="\t")
)

(
    projected_df
    .reset_index()
    .pipe(pl.DataFrame)
    .write_csv("data/projected-phenotypes/phenotypes.tsv", separator="\t")
)

In [7]:
(
    pl.read_csv("data/projected-phenotypes/phenotypes.tsv", separator="\t")
    .join(
        pl.read_csv("data/simulated-phenotypes/Covs_plink.txt", separator="\t"), 
        on=["FID", "IID"]
    )
    .write_csv("data/projected-phenotypes/phenotypes_saige.tsv", separator="\t")
)

# Make GRM

## GCTA

In [8]:
command = """
gcta64 \
    --bfile data/simulated-phenotypes/Genotypes \
    --chr 1 \
    --make-grm \
    --out data/grm/gcta
"""
result = subprocess.run(shlex.split(command))

*******************************************************************
* GCTA (Genome-wide Complex Trait Analysis)
* Version v1.94.1 Mac
* (C) 2010-present, Yang Lab, Westlake University
* Please report bugs to Jian Yang jian.yang@westlake.edu.cn
* MIT License
*******************************************************************
Analysis started at 16:46:09 PST on Wed Nov 13 2024.
Hostname: awlpdcg540mz

Options: 
 
--bfile data/simulated-phenotypes/Genotypes 
--chr 1 
--make-grm 
--out data/grm/gcta 

Note: GRM is computed using the SNPs on the autosomes.
Reading PLINK FAM file from [data/simulated-phenotypes/Genotypes.fam]...
10000 individuals to be included from FAM file.
10000 individuals to be included. 0 males, 0 females, 10000 unknown.
Reading PLINK BIM file from [data/simulated-phenotypes/Genotypes.bim]...
10000 SNPs to be included from BIM file(s).
Computing the genetic relationship matrix (GRM) v2 ...
Subset 1/1, no. subject 1-10000
  10000 samples, 10000 markers, 50005000 GRM ele

In [9]:
command = """
gcta64 \
    --grm data/grm/gcta \
    --make-bK-sparse 0.05 \
    --out data/grm/gcta-sparse
"""
result = subprocess.run(shlex.split(command))

*******************************************************************
* GCTA (Genome-wide Complex Trait Analysis)
* Version v1.94.1 Mac
* (C) 2010-present, Yang Lab, Westlake University
* Please report bugs to Jian Yang jian.yang@westlake.edu.cn
* MIT License
*******************************************************************
Analysis started at 16:46:12 PST on Wed Nov 13 2024.
Hostname: awlpdcg540mz

Options: 
 
--grm data/grm/gcta 
--make-bK-sparse 0.05 
--out data/grm/gcta-sparse 

Pruning the GRM to a sparse matrix with a cutoff of 0.050000...
Total number of parts to be processed: 2
    Saving 10000 individual IDs
    Processing part 1
Saving the sparse GRM (10018 pairs) to [data/grm/gcta-sparse.grm.sp]
finished generating a sparse GRM Success:

Analysis finished at 16:46:12 PST on Wed Nov 13 2024
Overall computational time: 0.18 sec.


# GWAS

## OLS univariate

### Features

In [10]:
command = """
plink2 \
    --bfile data/simulated-phenotypes/Genotypes \
    --pheno data/simulated-phenotypes/Ysim_plink.txt \
    --glm allow-no-covars \
    --out data/gwas/ols_univariate/features/result
"""
result = subprocess.run(shlex.split(command))

PLINK v2.00a6 M1 (18 Aug 2024)                 www.cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to data/gwas/ols_univariate/features/result.log.
Options in effect:
  --bfile data/simulated-phenotypes/Genotypes
  --glm allow-no-covars
  --out data/gwas/ols_univariate/features/result
  --pheno data/simulated-phenotypes/Ysim_plink.txt

Start time: Wed Nov 13 16:46:12 2024
65536 MiB RAM detected; reserving 32768 MiB for main workspace.
Using up to 16 threads (change this with --threads).
10000 samples (0 females, 0 males, 10000 ambiguous; 10000 founders) loaded from
data/simulated-phenotypes/Genotypes.fam.
10000 variants loaded from data/simulated-phenotypes/Genotypes.bim.
10 quantitative phenotypes loaded.
Calculating allele frequencies... done.
--glm linear regression on quantitative phenotypes #1-10: done.
Results written to data/gwas/ols_univariate/features/result.<phenotype
name>.glm.linear .
End time: Wed Nov 13 16

### Projections

In [11]:
command = """
plink2 \
    --bfile data/simulated-phenotypes/Genotypes \
    --pheno data/projected-phenotypes/phenotypes.tsv \
    --glm allow-no-covars \
    --out data/gwas/ols_univariate/projections/result
"""
result = subprocess.run(shlex.split(command))

PLINK v2.00a6 M1 (18 Aug 2024)                 www.cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to data/gwas/ols_univariate/projections/result.log.
Options in effect:
  --bfile data/simulated-phenotypes/Genotypes
  --glm allow-no-covars
  --out data/gwas/ols_univariate/projections/result
  --pheno data/projected-phenotypes/phenotypes.tsv

Start time: Wed Nov 13 16:46:12 2024
65536 MiB RAM detected; reserving 32768 MiB for main workspace.
Using up to 16 threads (change this with --threads).
10000 samples (0 females, 0 males, 10000 ambiguous; 10000 founders) loaded from
data/simulated-phenotypes/Genotypes.fam.
10000 variants loaded from data/simulated-phenotypes/Genotypes.bim.
10 quantitative phenotypes loaded.
Calculating allele frequencies... done.
--glm linear regression on quantitative phenotypes #1-10: done.
Results written to data/gwas/ols_univariate/projections/result.<phenotype
name>.glm.linear .
End time: Wed 

## OLS multivariate

### Features

In [12]:
command = """
plink2 \
    --bfile data/simulated-phenotypes/Genotypes \
    --pheno data/simulated-phenotypes/Ysim_plink.txt \
    --covar data/simulated-phenotypes/Covs_plink.txt \
    --glm hide-covar \
    --out data/gwas/ols_multivariate/features/result
"""
result = subprocess.run(shlex.split(command))

PLINK v2.00a6 M1 (18 Aug 2024)                 www.cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to data/gwas/ols_multivariate/features/result.log.
Options in effect:
  --bfile data/simulated-phenotypes/Genotypes
  --covar data/simulated-phenotypes/Covs_plink.txt
  --glm hide-covar
  --out data/gwas/ols_multivariate/features/result
  --pheno data/simulated-phenotypes/Ysim_plink.txt

Start time: Wed Nov 13 16:46:12 2024
65536 MiB RAM detected; reserving 32768 MiB for main workspace.
Using up to 16 threads (change this with --threads).
10000 samples (0 females, 0 males, 10000 ambiguous; 10000 founders) loaded from
data/simulated-phenotypes/Genotypes.fam.
10000 variants loaded from data/simulated-phenotypes/Genotypes.bim.
10 quantitative phenotypes loaded.
5 covariates loaded from data/simulated-phenotypes/Covs_plink.txt.
Calculating allele frequencies... done.
--glm linear regression on quantitative phenotypes #1-10: do

### Projections

In [13]:
command = """
plink2 \
    --bfile data/simulated-phenotypes/Genotypes \
    --pheno data/projected-phenotypes/phenotypes.tsv \
    --covar data/simulated-phenotypes/Covs_plink.txt \
    --glm hide-covar \
    --out data/gwas/ols_multivariate/projections/result
"""
result = subprocess.run(shlex.split(command))

PLINK v2.00a6 M1 (18 Aug 2024)                 www.cog-genomics.org/plink/2.0/
(C) 2005-2024 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to data/gwas/ols_multivariate/projections/result.log.
Options in effect:
  --bfile data/simulated-phenotypes/Genotypes
  --covar data/simulated-phenotypes/Covs_plink.txt
  --glm hide-covar
  --out data/gwas/ols_multivariate/projections/result
  --pheno data/projected-phenotypes/phenotypes.tsv

Start time: Wed Nov 13 16:46:13 2024
65536 MiB RAM detected; reserving 32768 MiB for main workspace.
Using up to 16 threads (change this with --threads).
10000 samples (0 females, 0 males, 10000 ambiguous; 10000 founders) loaded from
data/simulated-phenotypes/Genotypes.fam.
10000 variants loaded from data/simulated-phenotypes/Genotypes.bim.
10 quantitative phenotypes loaded.
5 covariates loaded from data/simulated-phenotypes/Covs_plink.txt.
Calculating allele frequencies... done.
--glm linear regression on quantitative phenotypes #1-

## Regenie

### Features

In [14]:
command = """
docker run \
    -v ./data:/data -w / \
    ghcr.io/rgcgithub/regenie/regenie:v4.0.gz regenie \
    --step 1 \
    --bed data/simulated-phenotypes/Genotypes \
    --covarFile data/simulated-phenotypes/Covs_plink.txt \
    --phenoFile data/simulated-phenotypes/Ysim_plink_names.txt \
    --bsize 100 \
    --qt \
    --out data/gwas/regenie/features/result
"""
result = subprocess.run(shlex.split(command))



Start time: Thu Nov 14 00:46:15 2024

              |      REGENIE v4.0.gz      |

Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov and Jonathan Marchini.
Distributed under the MIT License.
Compiled with Boost Iostream library.
Compiled with HTSlib.
Using Intel MKL with Eigen.

Log of output saved in file : data/gwas/regenie/features/result.log

Options in effect:
  --step 1 \
  --bed data/simulated-phenotypes/Genotypes \
  --covarFile data/simulated-phenotypes/Covs_plink.txt \
  --phenoFile data/simulated-phenotypes/Ysim_plink_names.txt \
  --bsize 100 \
  --qt \
  --out data/gwas/regenie/features/result

Fitting null model
 * bim              : [data/simulated-phenotypes/Genotypes.bim] n_snps = 10000
 * fam              : [data/simulated-phenotypes/Genotypes.fam] n_samples = 10000
 * bed              : [data/simulated-phenotypes/Genotypes.bed]
 * phenotypes       : [data/simulated-phenotypes/Ysim_plink_names.txt] n_pheno = 10
   -keeping and mean-imputing missing observatio

In [15]:
command = """
docker run \
    -v ./data:/data -w / \
    ghcr.io/rgcgithub/regenie/regenie:v4.0.gz regenie \
    --step 2 \
    --bed data/simulated-phenotypes/Genotypes \
    --covarFile data/simulated-phenotypes/Covs_plink.txt \
    --phenoFile data/simulated-phenotypes/Ysim_plink_names.txt \
    --bsize 100 \
    --qt \
    --pred data/gwas/regenie/features/result_pred.list \
    --out data/gwas/regenie/features/result
"""
result = subprocess.run(shlex.split(command))



Start time: Thu Nov 14 00:46:23 2024

              |      REGENIE v4.0.gz      |

Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov and Jonathan Marchini.
Distributed under the MIT License.
Compiled with Boost Iostream library.
Compiled with HTSlib.
Using Intel MKL with Eigen.

Log of output saved in file : data/gwas/regenie/features/result.log

Options in effect:
  --step 2 \
  --bed data/simulated-phenotypes/Genotypes \
  --covarFile data/simulated-phenotypes/Covs_plink.txt \
  --phenoFile data/simulated-phenotypes/Ysim_plink_names.txt \
  --bsize 100 \
  --qt \
  --pred data/gwas/regenie/features/result_pred.list \
  --out data/gwas/regenie/features/result

Association testing mode with fast multithreading using OpenMP
 * bim              : [data/simulated-phenotypes/Genotypes.bim] n_snps = 10000
 * fam              : [data/simulated-phenotypes/Genotypes.fam] n_samples = 10000
 * bed              : [data/simulated-phenotypes/Genotypes.bed]
 * phenotypes       : [data/simul

### Projections

In [16]:
command = """
docker run \
    -v ./data:/data -w / \
    ghcr.io/rgcgithub/regenie/regenie:v4.0.gz regenie \
    --step 1 \
    --bed data/simulated-phenotypes/Genotypes \
    --covarFile data/simulated-phenotypes/Covs_plink.txt \
    --phenoFile data/projected-phenotypes/phenotypes.tsv \
    --bsize 100 \
    --qt \
    --out data/gwas/regenie/projections/result
"""
result = subprocess.run(shlex.split(command))



Start time: Thu Nov 14 00:46:25 2024

              |      REGENIE v4.0.gz      |

Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov and Jonathan Marchini.
Distributed under the MIT License.
Compiled with Boost Iostream library.
Compiled with HTSlib.
Using Intel MKL with Eigen.

Log of output saved in file : data/gwas/regenie/projections/result.log

Options in effect:
  --step 1 \
  --bed data/simulated-phenotypes/Genotypes \
  --covarFile data/simulated-phenotypes/Covs_plink.txt \
  --phenoFile data/projected-phenotypes/phenotypes.tsv \
  --bsize 100 \
  --qt \
  --out data/gwas/regenie/projections/result

Fitting null model
 * bim              : [data/simulated-phenotypes/Genotypes.bim] n_snps = 10000
 * fam              : [data/simulated-phenotypes/Genotypes.fam] n_samples = 10000
 * bed              : [data/simulated-phenotypes/Genotypes.bed]
 * phenotypes       : [data/projected-phenotypes/phenotypes.tsv] n_pheno = 10
   -keeping and mean-imputing missing observations (do

In [17]:
command = """
docker run \
    -v ./data:/data -w / \
    ghcr.io/rgcgithub/regenie/regenie:v4.0.gz regenie \
    --step 2 \
    --bed data/simulated-phenotypes/Genotypes \
    --covarFile data/simulated-phenotypes/Covs_plink.txt \
    --phenoFile data/projected-phenotypes/phenotypes.tsv \
    --bsize 100 \
    --qt \
    --pred data/gwas/regenie/projections/result_pred.list \
    --out data/gwas/regenie/projections/result
"""
result = subprocess.run(shlex.split(command))



Start time: Thu Nov 14 00:46:33 2024

              |      REGENIE v4.0.gz      |

Copyright (c) 2020-2024 Joelle Mbatchou, Andrey Ziyatdinov and Jonathan Marchini.
Distributed under the MIT License.
Compiled with Boost Iostream library.
Compiled with HTSlib.
Using Intel MKL with Eigen.

Log of output saved in file : data/gwas/regenie/projections/result.log

Options in effect:
  --step 2 \
  --bed data/simulated-phenotypes/Genotypes \
  --covarFile data/simulated-phenotypes/Covs_plink.txt \
  --phenoFile data/projected-phenotypes/phenotypes.tsv \
  --bsize 100 \
  --qt \
  --pred data/gwas/regenie/projections/result_pred.list \
  --out data/gwas/regenie/projections/result

Association testing mode with fast multithreading using OpenMP
 * bim              : [data/simulated-phenotypes/Genotypes.bim] n_snps = 10000
 * fam              : [data/simulated-phenotypes/Genotypes.fam] n_samples = 10000
 * bed              : [data/simulated-phenotypes/Genotypes.bed]
 * phenotypes       : [data/pr

## SAIGE

### Features

In [18]:
command_template = Template("""
docker run \
    -v ./data:/data -w / \
    wzhou88/saige:1.3.0 step1_fitNULLGLMM.R \
    --plinkFile=data/simulated-phenotypes/Genotypes \
    --useSparseGRMtoFitNULL=FALSE \
    --covarColList=sharedConfounder_norm1,sharedConfounder_norm2,sharedConfounder_norm3,independentConfounder_norm1,independentConfounder_norm2 \
    --sampleIDColinphenoFile=IID \
    --invNormalize=TRUE \
    --traitType=quantitative \
    --nThreads=16 \
    --IsOverwriteVarianceRatioFile=TRUE \
    --phenoCol=$PHENOTYPE \
    --phenoFile=data/simulated-phenotypes/Ysim_saige.txt \
    --outputPrefix=data/gwas/saige/features/$PHENOTYPE
""")

for feature_idx in tqdm.tnrange(1, n_features + 1):
    command = command_template.substitute(PHENOTYPE=f"feature_{feature_idx:02}")
    result = subprocess.run(shlex.split(command), capture_output=True)
    result.check_returncode()

  0%|          | 0/10 [00:00<?, ?it/s]

In [19]:
command_template = Template("""
docker run \
    -v ./data:/data -w / \
    wzhou88/saige:1.3.0 step2_SPAtests.R \
    --bedFile=data/simulated-phenotypes/Genotypes.bed \
    --bimFile=data/simulated-phenotypes/Genotypes.bim \
    --famFile=data/simulated-phenotypes/Genotypes.fam \
    --LOCO=FALSE \
    --GMMATmodelFile=data/gwas/saige/features/$PHENOTYPE.rda \
    --varianceRatioFile=data/gwas/saige/features/$PHENOTYPE.varianceRatio.txt \
    --SAIGEOutputFile=data/gwas/saige/features/$PHENOTYPE.txt
""")

for feature_idx in tqdm.tnrange(1, n_features + 1):
    command = command_template.substitute(PHENOTYPE=f"feature_{feature_idx:02}")
    result = subprocess.run(shlex.split(command), capture_output=True)
    try:
        result.check_returncode()
    except Exception as e:
        print(result.stderr.decode())

  0%|          | 0/10 [00:00<?, ?it/s]

### Projections

In [22]:
command_template = Template("""
docker run \
    -v ./data:/data -w / \
    wzhou88/saige:1.3.0 step1_fitNULLGLMM.R \
    --plinkFile=data/simulated-phenotypes/Genotypes \
    --useSparseGRMtoFitNULL=FALSE \
    --covarColList=sharedConfounder_norm1,sharedConfounder_norm2,sharedConfounder_norm3,independentConfounder_norm1,independentConfounder_norm2 \
    --sampleIDColinphenoFile=IID \
    --invNormalize=TRUE \
    --traitType=quantitative \
    --nThreads=16 \
    --IsOverwriteVarianceRatioFile=TRUE \
    --phenoCol=$PHENOTYPE \
    --phenoFile=data/projected-phenotypes/phenotypes_saige.tsv \
    --outputPrefix=data/gwas/saige/projections/$PHENOTYPE
""")

for projection_idx in tqdm.tnrange(1, n_projections + 1):
    command = command_template.substitute(PHENOTYPE=f"projection_{projection_idx:03}")
    result = subprocess.run(shlex.split(command), capture_output=True)
    try:
        result.check_returncode()
    except Exception as e:
        print(result.stderr.decode())

  0%|          | 0/10 [00:00<?, ?it/s]

Loading required package: optparse
Error in glmmkin.ai_PCG_Rcpp_Quantitative(bedFile, bimFile, famFile, Xorig,  : 
  ERROR! The first variance component parameter estimate is 0
Calls: fitNULLGLMM -> system.time -> glmmkin.ai_PCG_Rcpp_Quantitative
Timing stopped at: 10.55 0.107 1.695
Execution halted



In [23]:
command_template = Template("""
docker run \
    -v ./data:/data -w / \
    wzhou88/saige:1.3.0 step2_SPAtests.R \
    --bedFile=data/simulated-phenotypes/Genotypes.bed \
    --bimFile=data/simulated-phenotypes/Genotypes.bim \
    --famFile=data/simulated-phenotypes/Genotypes.fam \
    --LOCO=FALSE \
    --GMMATmodelFile=data/gwas/saige/projections/$PHENOTYPE.rda \
    --varianceRatioFile=data/gwas/saige/projections/$PHENOTYPE.varianceRatio.txt \
    --SAIGEOutputFile=data/gwas/saige/projections/$PHENOTYPE.txt
""")

for projection_idx in tqdm.tnrange(1, n_projections + 1):
    command = command_template.substitute(PHENOTYPE=f"projection_{projection_idx:03}")
    result = subprocess.run(shlex.split(command), capture_output=True)
    try:
        result.check_returncode()
    except Exception as e:
        print(result.stderr.decode())

  0%|          | 0/10 [00:00<?, ?it/s]

Loading required package: RhpcBLASctl
Error in load(GMMATmodelFile) : empty (zero-byte) input file
Calls: SPAGMMATtest -> ReadModel -> load
Execution halted



## FastGWA

### Features

In [24]:
command_template = Template("""
gcta64 \
    --bfile data/simulated-phenotypes/Genotypes \
    --grm-sparse data/grm/gcta-sparse \
    --pheno data/simulated-phenotypes/Ysim_plink.txt \
    --mpheno $mpheno \
    --qcovar data/simulated-phenotypes/Covs_plink.txt \
    --fastGWA-mlm \
    --thread-num 16 \
    --est-vg HE \
    --save-fastGWA-mlm-residual \
    --out data/gwas/fastgwa/features/result_$mpheno
""")

for feature_idx in tqdm.tnrange(1, n_features + 1):
    command = command_template.substitute(mpheno=feature_idx)
    result = subprocess.run(shlex.split(command), capture_output=True)
    result.check_returncode()

  0%|          | 0/10 [00:00<?, ?it/s]

### Projections

In [25]:
command_template = Template("""
gcta64 \
    --bfile data/simulated-phenotypes/Genotypes \
    --grm-sparse data/grm/gcta-sparse \
    --pheno data/projected-phenotypes/phenotypes.tsv \
    --mpheno $mpheno \
    --qcovar data/simulated-phenotypes/Covs_plink.txt \
    --fastGWA-mlm \
    --thread-num 16 \
    --est-vg HE \
    --save-fastGWA-mlm-residual \
    --out data/gwas/fastgwa/projections/result_$mpheno
""")

for feature_idx in tqdm.tnrange(1, n_projections + 1):
    command = command_template.substitute(mpheno=feature_idx)
    result = subprocess.run(shlex.split(command), capture_output=True)
    result.check_returncode()

  0%|          | 0/10 [00:00<?, ?it/s]