In [1]:
import glob
import pandas as pd
import scanpy as sc

import argparse
import sys

from arboreto.algo import grnboost2
from distributed import LocalCluster, Client

In [2]:
data_name = 'AdamsonWeissman2016_GSM2406677_2'
data_name_lower = data_name.lower()

In [3]:
parser = argparse.ArgumentParser()
split_dir =f'../{data_name_lower}/splits' ## Generate dataloader+splits first using GEARS dataloader
split_id =1
n_workers = 10
adata_path='None'
method='grnboost'
adata = sc.read_h5ad(f"../{data_name_lower}/perturb_processed.h5ad")

In [4]:
def get_split_adata(adata, split_dir, split_id):
    split_files = [f for f in glob.glob(split_dir + '/*') if 'subgroup' not in f]
    split_fname = [f for f in split_files if 'simulation_'+str(split_id) in f][0]
    split_dict = pd.read_pickle(split_fname)
    
    return adata[adata.obs['condition'].isin(split_dict['train'])]
    

def get_grnboost_GRN(split_adata, n_workers):
    TF_names = pd.read_csv('../TF_names_v_1.01.txt', delimiter='\t', header=None)
    TF_names = TF_names[0].values

    all_conds = [c.split('+') for c in adata.obs['condition'].values ]
    all_conds = [item for sublist in all_conds for item in sublist]
    all_conds = set(all_conds)

    aug_TF_names = list(TF_names) + list(all_conds)
    
    exp_matrix = split_adata.X.toarray()
    gene_names = split_adata.var['gene_name'].values
    
    local_cluster = LocalCluster(n_workers=n_workers, 
                             threads_per_worker=1)
    custom_client = Client(local_cluster)
    adjacencies = grnboost2(exp_matrix, tf_names=aug_TF_names, 
                        verbose=True, gene_names=gene_names,
                        client_or_address=custom_client)
    
    return adjacencies

def save_split_GRN(adata, split_dir, split_id, n_workers):
    
    split_adata = get_split_adata(adata, split_dir, split_id)
    
    if method == 'grnboost':
        adjacencies = get_grnboost_GRN(split_adata, n_workers)
    
    adjacencies.to_csv(f'{data_name_lower}_adjacencies_'+str(split_id)+'.csv')

if __name__ == '__main__':
    save_split_GRN(adata, split_dir, split_id, n_workers)

    for split_id in range(2, 6):
        save_split_GRN(adata, split_dir, split_id, n_workers)


distributed.diskutils - INFO - Found stale lock file and directory '/home/share/huadjyin/home/zhoumin3/zhoumin/benchmark_data/01A_total_re/07grn/01scenic_adj/dask-worker-space/worker-9oxr4qln', purging
distributed.diskutils - INFO - Found stale lock file and directory '/home/share/huadjyin/home/zhoumin3/zhoumin/benchmark_data/01A_total_re/07grn/01scenic_adj/dask-worker-space/worker-v7gr0rf0', purging
distributed.diskutils - INFO - Found stale lock file and directory '/home/share/huadjyin/home/zhoumin3/zhoumin/benchmark_data/01A_total_re/07grn/01scenic_adj/dask-worker-space/worker-21dj5rt4', purging
distributed.diskutils - INFO - Found stale lock file and directory '/home/share/huadjyin/home/zhoumin3/zhoumin/benchmark_data/01A_total_re/07grn/01scenic_adj/dask-worker-space/worker-24rllqae', purging
distributed.diskutils - INFO - Found stale lock file and directory '/home/share/huadjyin/home/zhoumin3/zhoumin/benchmark_data/01A_total_re/07grn/01scenic_adj/dask-worker-space/worker-qqe_3m4g'

preparing dask client
parsing input
creating dask graph
10 partitions
computing dask graph
not shutting down client, client was created externally
finished


Perhaps you already have a cluster running?
Hosting the HTTP server on port 41483 instead
  f"Port {expected} is already in use.\n"


preparing dask client
parsing input
creating dask graph
10 partitions
computing dask graph
not shutting down client, client was created externally
finished


Perhaps you already have a cluster running?
Hosting the HTTP server on port 44689 instead
  f"Port {expected} is already in use.\n"


preparing dask client
parsing input
creating dask graph
10 partitions
computing dask graph
not shutting down client, client was created externally
finished


Perhaps you already have a cluster running?
Hosting the HTTP server on port 41213 instead
  f"Port {expected} is already in use.\n"


preparing dask client
parsing input
creating dask graph
10 partitions
computing dask graph
not shutting down client, client was created externally
finished


Perhaps you already have a cluster running?
Hosting the HTTP server on port 36135 instead
  f"Port {expected} is already in use.\n"


preparing dask client
parsing input
creating dask graph
10 partitions
computing dask graph
not shutting down client, client was created externally
finished


