# Trophic model for gut data processing
This file is used to pre-process all data (especially Chia network and Thai Children data) into the format which is convenient for simulations.

In [1]:
########### Self-customized setting
import pandas as pd
import numpy as np

In [2]:
########### Load Chia network as net (containing information of metabolite consumption and production)
net = pd.read_csv('pruned_chia_network.csv')
mean_net = net.groupby('microbes_ID').mean()
selfish_net = mean_net[mean_net.iloc[:,1] == 2]
i_selfish = selfish_net.index.values   #### i_selfish returns IDs of microbes don't generate byproducts
print(net.head())
print('###################################################################################################')

########### Load names of all nodes in the Chia network
names = pd.read_csv('names_ID.txt',sep=': ')
names.set_index('IDs', inplace=True)
print(names.head())
print('###################################################################################################')

########### Load names of all nodes in the Chia network
i_intake = pd.read_csv('nutrient_intake_ID.txt',sep=': ')
i_intake = i_intake['IDs'].values
print(i_intake)
print('###################################################################################################')

########### Load all gut metagenomic data of all 41 individuals
thai_metagenome_all = pd.read_csv('abundance_matched_thai.txt', sep='\t')
thai_metagenome_all.head()
thai_metagenome_all = thai_metagenome_all.groupby('Chia_id').sum().iloc[1:,].reset_index()
thai_metagenome_ID = thai_metagenome_all['Chia_id']
#print((thai_metagenome_ID!=0).sum())
thai_metagenome = thai_metagenome_all[thai_metagenome_ID!=0].iloc[:,3:]
thai_metagenome_ID = thai_metagenome_ID[thai_metagenome_ID!=0]
print(thai_metagenome.head())
print('###################################################################################################')

########### Load all gut metabolome data of all 41 individuals
thai_metabolome_all = pd.read_excel('metabolome_matched_thai_modified_by_Tong.xlsx')
thai_metabolome_all = thai_metabolome_all.groupby('Chia_id').sum().iloc[1:,].reset_index()
thai_metabolome_ID = thai_metabolome_all['Chia_id']
#print((thai_metabolome_ID!=0).sum())
thai_metabolome = thai_metabolome_all[thai_metabolome_ID!=0].iloc[:,2:]
thai_metabolome_ID = thai_metabolome_ID[thai_metabolome_ID!=0]
print(thai_metabolome.head())
print('###################################################################################################')

intersected_names = np.intersect1d(thai_metagenome.columns.values, thai_metabolome.columns.values)
thai_metagenome = thai_metagenome[intersected_names]
thai_metabolome = thai_metabolome[intersected_names]
print('Intersection between metagenome and metabolome:')
print(thai_metagenome.head())
print(thai_metabolome.head())

   metabolites_ID  microbes_ID  \
0            2001          896   
1            2001          832   
2            2001          831   
3            2001          600   
4            2001          571   

   edge_types (2 represents intake, 3 represents secretion and 5 represents intake and secretion)  
0                                                  2                                               
1                                                  2                                               
2                                                  2                                               
3                                                  3                                               
4                                                  3                                               
###################################################################################################
                          Names
IDs                            
3    Acetivibrio cellulolyticus

  # Remove the CWD from sys.path while we load stuff.
  app.launch_new_instance()


    BK301  BK303   BK304  BK305   BK306   BK307    BK308   BK309    BK310  \
0  2746.0   23.0  1019.0  780.0   313.0   377.0    292.0  1058.0   1094.0   
1  3196.0  205.0  1543.0    0.0   688.0  1369.0      0.0   454.0    827.0   
2     0.0   15.0     0.0    0.0    16.0    20.0      0.0    24.0      0.0   
3   153.0   43.0    96.0  154.0  3433.0   370.0   5987.0   112.0   5122.0   
4   662.0  555.0   347.0  454.0  1335.0  9326.0  18560.0   255.0  30338.0   

    BK311  ...   BR318   BR319   BR320   BR321   BR322   BR323   BR325  \
0   125.0  ...    74.0   178.0    44.0    46.0    36.0   227.0    42.0   
1  4834.0  ...  1753.0   854.0  1123.0  3711.0  2013.0   270.0  3588.0   
2     0.0  ...    14.0     0.0     0.0    17.0     0.0     0.0     0.0   
3    86.0  ...   449.0   222.0    69.0    91.0  1524.0    60.0   308.0   
4  2067.0  ...   728.0  5806.0   105.0   707.0   430.0  1221.0   292.0   

    BR327   BR328    BR329  
0    68.0    93.0   2108.0  
1   898.0  1415.0      0.0  
2    

In [5]:
########### pickle all processed data which are useful for simulations
import pickle

pickle_out = open("Chia_network.pickle","wb")
#pickle.dump([net, i_selfish, i_intake, names], pickle_out)
pickle.dump([net, i_selfish, i_intake, names], pickle_out, protocol=2)
pickle_out.close()

pickle_out = open("Thai_data.pickle","wb")
#pickle.dump([thai_metagenome_ID, thai_metagenome, thai_metabolome_ID, thai_metabolome], pickle_out)
pickle.dump([thai_metagenome_ID, thai_metagenome, thai_metabolome_ID, thai_metabolome], pickle_out, protocol=2)
pickle_out.close()