In [1]:
import pandas as pd
import numpy as np
import subprocess
import os


In [5]:
phenotypes = pd.read_csv('../data/std_GD462.GeneQuantRPKM.50FN.samplename.resk10.txt.gz', compression='gzip', sep='\t')
filtered_phenotypes = phenotypes[~phenotypes['Chr'].isin(['X', 'Y', 'M'])]
to_calc = filtered_phenotypes[['TargetID', 'Chr']]
to_calc = to_calc[to_calc['Chr'].isin(['19'])].loc[6532:]
to_calc

Unnamed: 0,TargetID,Chr
6532,ENSG00000105492.10,19
6553,ENSG00000105499.8,19
6605,ENSG00000186230.5,19
6606,ENSG00000126934.6,19
6628,ENSG00000186806.3,19
...,...,...
23604,ENSG00000132017.5,19
23627,ENSG00000167676.3,19
23638,ENSG00000051128.12,19
23643,ENSG00000142396.4,19


In [6]:
to_calc[to_calc['TargetID'] == 'ENSG00000105492.10']
to_calc.loc[6532]

TargetID    ENSG00000105492.10
Chr                         19
Name: 6532, dtype: object

In [None]:
# Define constants for paths and parameters
RSCRIPT_PATH = "Rscript"
FUSION_SCRIPT = "fusion_twas-master/FUSION.compute_weights.R"
BFILE_TEMPLATE = "../data/LDREF_filtered/1000G.EUR.{chr}"  # Placeholder for chromosome
TMP_DIR = "temp_files/"
OUTPUT_DIR = "compute_weights_out/chr_{chr}"
MODELS = "enet"#,lasso"
PLINK_PATH = "plink.exe"
GCTA_PATH = "fusion_twas-master/gcta_nr_robust.exe"
PHENO_DIR = "../data/gene_expressions/chr_{chr}/"  # Directory with phenotype files
VERBOSE = 2

def compute_weights(gene, chromosome):
    """Runs the FUSION TWAS pipeline for a specific gene and chromosome."""
    # Construct file paths
    bfile = BFILE_TEMPLATE.format(chr=chromosome)
    output_dir = OUTPUT_DIR.format(chr=chromosome)
    pheno_dir = PHENO_DIR.format(chr=chromosome)
    
    os.makedirs(output_dir, exist_ok=True)


    tmp_file_prefix = os.path.join(TMP_DIR, f"test_chr{chromosome}_{gene}")
    output_path = os.path.join(output_dir, f"{gene}_chr{chromosome}")
    pheno_file = os.path.join(pheno_dir, f"{gene}.txt")

    
    # Construct the command
    command = [
        RSCRIPT_PATH, FUSION_SCRIPT,
        "--bfile", bfile,
        "--tmp", tmp_file_prefix,
        "--out", output_path,
        "--models", MODELS,
        "--PATH_plink", PLINK_PATH,
        "--PATH_gcta", GCTA_PATH,
        "--verbose", str(VERBOSE),
        "--pheno", pheno_file
    ]
    clear_command = ['rm', '-rf', 'temp_files/*']

    # Print command for debugging
    print(f"Running: {' '.join(command)}")
    
    # Run the command
    try:
        subprocess.run(command, check=True)
        subprocess.run(clear_command, check=True)
        
    except subprocess.CalledProcessError as e:
        print(f"Error running command for gene {gene}, chromosome {chromosome}: {e}")



for i in to_calc.index:
    gene_data = to_calc.loc[i]
    compute_weights(gene_data['TargetID'], gene_data['Chr'])

Running: Rscript fusion_twas-master/FUSION.compute_weights.R --bfile ../data/LDREF_filtered/1000G.EUR.19 --tmp temp_files/test_chr19_ENSG00000105492.10 --out compute_weights_out/chr_19\ENSG00000105492.10_chr19 --models enet --PATH_plink plink.exe --PATH_gcta fusion_twas-master/gcta_nr_robust.exe --verbose 2 --pheno ../data/gene_expressions/chr_19/ENSG00000105492.10.txt
Running: Rscript fusion_twas-master/FUSION.compute_weights.R --bfile ../data/LDREF_filtered/1000G.EUR.19 --tmp temp_files/test_chr19_ENSG00000105499.8 --out compute_weights_out/chr_19\ENSG00000105499.8_chr19 --models enet --PATH_plink plink.exe --PATH_gcta fusion_twas-master/gcta_nr_robust.exe --verbose 2 --pheno ../data/gene_expressions/chr_19/ENSG00000105499.8.txt
Running: Rscript fusion_twas-master/FUSION.compute_weights.R --bfile ../data/LDREF_filtered/1000G.EUR.19 --tmp temp_files/test_chr19_ENSG00000186230.5 --out compute_weights_out/chr_19\ENSG00000186230.5_chr19 --models enet --PATH_plink plink.exe --PATH_gcta fu