In [19]:
import json
import os
import pandas as pd
import logging
import pickle
import argparse

logging.basicConfig(format="%(asctime)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG)
def merge_tax(file_list, name_list, format='json'):
    total_json = {}
    total_df = pd.DataFrame()
    logging.info('merge_tax...')
    for f, sample_name in zip(file_list, name_list):
        with open(f, 'rt') as h:
            res_json = json.load(h)
            total_json[sample_name] = res_json['tax_value_dict']
            res_df = pd.DataFrame.from_dict(res_json['tax_value_dict']).reset_index().set_index(['tax_level','index']).rename(columns={'relative_abundance':sample_name})
        total_df = pd.concat([total_df, res_df], axis=1)
    if format == 'json':
        return total_json
    return total_df

def merge_json(file_list, name_list, key='alpha_dict', format='json'):
    total_json = {}
    total_df = pd.DataFrame()
    logging.info(f'merge_{key}...')
    for f, sample_name in zip(file_list, name_list):
        with open(f, 'rt') as h:
            res_json = json.load(h)
            total_json[sample_name] = res_json[key]
            res_df = pd.DataFrame.from_dict(res_json[key], orient='index').rename(columns={0:sample_name})
        total_df = pd.concat([total_df, res_df], axis=1)
    if format == 'json':
        return total_json
    return total_df

def get_file_list(res_dir):
    len(os.listdir(os.path.join(res_dir, os.listdir(res_dir)[0])))
    n = 0
    res_file_list = []
    sample_name_list = []
    for fir_d in os.listdir(res_dir):
        sec_d = os.path.join(res_dir, fir_d)
        for sample in os.listdir(sec_d):
            res_file = os.path.join(sec_d, sample, f"{sample}.res.json")
            if os.path.isfile(res_file):
                n+=1
                res_file_list.append(res_file)
                sample_name_list.append(sample)
            else:
                logging.info(res_file)
    logging.info(f"total sample: {n}")
    return res_file_list, sample_name_list

def merge_res(file_list, sample_list, outdir):
    # total_json = {}
    df = merge_tax(file_list, sample_list, format='df')
    df = df.reset_index()
    tax_level = df['tax_level'].unique()
    for t in tax_level:
        df[df['tax_level'] == t].fillna(0).drop('tax_level',axis=1).to_csv(os.path.join(outdir, f'tax_{t}.csv'), sep='\t', index=False)

    for k in ['alpha_dict', 'rgi_res_dict', 'vf_res_dict', 'pathway_res_dict']:
        df = merge_json(file_list, sample_list, key=k, format='df')
        df.fillna(0).to_csv(os.path.join(outdir, f'{k.split("_")[0]}.csv'), sep='\t')

    # with open(outfile, 'wb') as f:
        # pickle.dump(total_json, f)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, usage=f'python {__file__} -d /home/hdd/gmxMeta/db/M2023/ -o /home/hdd/gmxMeta/db/Merge/' )
    parser.add_argument('-d','--res_dir',required='True',help='result dir path')
    parser.add_argument('-o','--out_dir',required='True',help='merge result dir')
    args = parser.parse_args()

    # out_file = os.path.join(args.out_dir, 'test.out.df.pickle')
    file_list, sample_list = get_file_list(args.res_dir)
    merge_res(file_list, sample_list, args.out_dir)




In [4]:
res_dir = '/mnt/c/work/metagenome/gmxMeta_276/gmxMeta_data/'
len(os.listdir(os.path.join(res_dir, os.listdir(res_dir)[0])))
n = 0
res_file_list = []
sample_name_list = []
for fir_d in os.listdir(res_dir):
    sec_d = os.path.join(res_dir, fir_d)
    for sample in os.listdir(sec_d):
        res_file = os.path.join(sec_d, sample, f"{sample}.res.json")
        if os.path.isfile(res_file):
            n+=1
            res_file_list.append(res_file)
            sample_name_list.append(sample)
        else:
            print(res_file)