# Get Taxonomy DBs, and associate with function counts by md5
 by merge with KO - md5 count data

In [2]:
import requests  # to make the GET request 
import json  # to parse the JSON response to a Python dictionary
import csv  # to write our data to a CSV
import pandas as pd # to see our CSV
import numpy as np
import itertools
import re
import string

## Get MG RAST m5nr taxonomy DB
Below is converted to raw to avoid redownloading DB, open from file after.
Broken into sections as steps are slow to repeat if error

## Reimport Taxonomy table 

In [2]:
# Reimport Taxonomy table from file
m5nr_Taxonomy = pd.read_csv("MG_RAST_m5nr_Taxonomy_ncbiIDs_ranks", sep ='\t' )   # m5nr_Taxonomy.head()

# Clean taxonomy by removing tax_id = 0, not useful for IDing otherwise          # m5nr_Taxonomy.shape
m5nr_TaxonomyCl = m5nr_Taxonomy[m5nr_Taxonomy.ncbi_tax_id !=0]                   # m5nr_TaxonomyCl.shape
m5nr_TaxonomyCl.head()

Unnamed: 0,ncbi_tax_id,domain,phylum,class,order,family,genus,species,organism
0,1128364.0,Eukaryota,Streptophyta,Liliopsida,Zingiberales,Marantaceae,Calathea,Calathea hagbergii,Calathea hagbergii
1,1252311.0,Eukaryota,Arthropoda,Insecta,Hymenoptera,Ichneumonidae,unclassified (derived from Ichneumonidae),Campopleginae sp. BOLD:AAM6952,Campopleginae sp. BOLD:AAM6952
2,179380.0,Eukaryota,Arthropoda,Insecta,Hymenoptera,Agaonidae,Apocryptophagus,Apocryptophagus sp. B73,Apocryptophagus sp. B73
3,1239215.0,Viruses,unclassified (derived from Viruses),unclassified (derived from Viruses),unclassified (derived from Viruses),Caliciviridae,Norovirus,Norwalk virus,Norovirus FE128/15-04-10/TUN
4,287018.0,Viruses,unclassified (derived from Viruses),unclassified (derived from Viruses),Picornavirales,Picornaviridae,Enterovirus,Human rhinovirus 263 Berlin 2004,Human rhinovirus 263 Berlin 2004


## Function: Get taxonomy IDs for each md5 from  MG RAST API  

List md5s queried one ID at a time, then concatentated. Although entire list can be queried by API directly,
API returns only 10 lines at a time, including multiple hits / md5 which can overruns the limit of 10 lines
This prevents successful list chunking by 10 IDs at a time.

In [None]:
def md5_get_ncbi(test_list, source):

    list_=[]             # make empty DF to start          # (use dummy below ???)

    for i in test_list:
        payload = '{"source":"' + source + '","data":["' + i + '"]}'                           # Paste data (md5) into payload         # payload
        API_return = requests.post('http://api.metagenomics.anl.gov/m5nr/md5', data=payload)   # API call for payload
        out = API_return.json()                                                                # get json formatted data 
        md5_2tax = out['data']                                                                 # get first data layer 
        md5_2tax2 = pd.DataFrame(md5_2tax)                                                     # Make data frame
    
        columns = ['md5','ncbi_tax_id',"organism"]                                             # define columns of interest
        index = [0]                                                                            # one blank row
    
        # Test df is empty, if so make dummy 
        if md5_2tax2.empty:
            Fail = pd.DataFrame(index=index, columns=columns)                                  # Make empty DF                   
            Fail.iloc[0,0] = i                                                                 # Write md5 to first cell
            Fail.iloc[0,1] =0                                                                  # Write 0 to tax ID
            md5_2tax4 = Fail.iloc[0,:]                                                         # Get first row (match below)
    
        # or cut down to cols of interest
        else:
            md5_2tax3 = md5_2tax2[columns]                                                     # Reduce DF cols
            md5_2tax4 = md5_2tax3.iloc[0,:]                                                    # only get first row (except?)

        listn=list_.append(md5_2tax4)                                                         # append list of DFs     #list_3

    # Concatenate list of dfs, transpose for output
    Out = pd.concat(list_, axis=1, ignore_index=True)
    OutT = Out.T
    return OutT

## Import KO- md5 sample count data from file 

In [3]:
# read list of file IDs, mgID and SPID (use later)
KO_md5samples = pd.read_csv("MG_RAST_md5_WIDE_sample_count_KO_DB_133.txt", sep ='\t' )
KO_md5samples.head()
#KO_md5samples.shape

Unnamed: 0,MD5,KO,Browns_ThreeSqA_D1,Browns_ThreeSqA_D2,Browns_ThreeSqB_D1,Browns_ThreeSqB_D2,Browns_ThreeSqC_D1,Browns_ThreeSqC_D2,Browns_TuleA_D1,Browns_TuleA_D2,...,WestPond_TuleC_D1,WestPond_TuleC_D2,White_CordA_D2,White_CordB_D2,White_ThreeSqA_D1,White_ThreeSqA_D2,White_ThreeSqB_D1,White_ThreeSqB_D2,White_ThreeSqC_D1,White_ThreeSqC_D2
0,00001508eba3f78863a4f9cb2463810d,K00566,4.0,1.0,3.0,2.0,3.0,1.0,0.0,0.0,...,3.0,2.0,0.0,1.0,1.0,1.0,2.0,1.0,0.0,1.0
1,00001a757949ba4df5f1a9f8f6ba6c09,K01687,2.0,4.0,0.0,3.0,3.0,2.0,0.0,1.0,...,3.0,5.0,10.0,9.0,1.0,1.0,3.0,6.0,1.0,1.0
2,00001aba8aee0c90a80969ea8da059f8,K03688,28.0,13.0,16.0,6.0,19.0,7.0,11.0,19.0,...,14.0,4.0,21.0,34.0,12.0,19.0,5.0,36.0,54.0,22.0
3,00002ee0efb6f4ef77f1a53bbeb207d0,K02013,2.0,1.0,8.0,3.0,4.0,2.0,3.0,4.0,...,6.0,3.0,1.0,3.0,1.0,2.0,2.0,3.0,1.0,2.0
4,00003a8575ab2461c908a808ffe2002a,K00066,33.0,31.0,39.0,22.0,43.0,28.0,30.0,29.0,...,28.0,15.0,19.0,15.0,27.0,33.0,24.0,18.0,14.0,33.0


## Import KO functions of interest table, ontology

In [148]:
# read list of file IDs, mgID and SPID (use later)
# NOTE THIS VERSION ONLY HAS NPSch4, but updated mcrA2
KO_CNPsCH4_DB = pd.read_csv("CNPSch4_KOs_whh12_17.txt", sep ='\t' )                      # KO_CNPsCH4_DB.head()

# Full ontology here
#KO_CNPsCH4_DB = pd.read_csv("Ontology_KO_CNPSch4_Fm_whh_12.17v0.txt", sep ='\t' )        # KO_CNPsCH4_DB.head()
#KO_CNPsCH4_DB_cl = KO_CNPsCH4_DB[['Index', 'L1', 'L2', 'KO','gene','fxn','EC']]
#KO_CNPsCH4_DB_cl

In [155]:
# No subset!!  Using NPSch4 functions
KO_subset = KO_CNPsCH4_DB

# Get vector of KOs of interest
#KOs_CNPsCH4 = pd.DataFrame(KO_CNPsCH4_DB['KO'])
#KOs_CNPsCH4.head()

# Note too many md5s to run fast enough, chunk them out by function
## ABSTRACTION FOR Below future function  # or KO_subset = KO_CH4_ox
#KO_subset0 = KO_CNPsCH4_DB_cl[KO_CNPsCH4_DB_cl.L1!="Fermentation"]
#KO_subset = KO_subset0[KO_subset0.L1 != ""]
# KO_subset = KO_CNPsCH4_DB[KO_CNPsCH4_DB.L1=="CH4_cycling"]
# KO_subset = KO_CNPsCH4_DB[KO_CNPsCH4_DB.L2=="CH4_oxidation"]
# KO_subset

## Get md5s corresponding to KOs of interest

In [156]:
# Get md5 list from KOs, merge
md5_KO_rast133 = KO_md5samples[['MD5', 'KO']]                                   # Get only MD5 & KO combinations  # md5_KO_rast133.head()       # md5_KO_rast133.shape
KO_subset_md5 = pd.merge(KO_subset, md5_KO_rast133, on = "KO", how='inner')     # Merge with KO subset            #  merge INNER needed for no-count KOs  #KO_subset_md5 # .head()   # KO_subset_md5.shape  #-- only 13 for CH4 oxidatio
subset_md5 = pd.DataFrame(KO_subset_md5['MD5'])                                 # Get only KO subset md5s         # subset_md5# .head()
subset_md5u = subset_md5.drop_duplicates(keep='first')                          # get unique md5s                 # subset_md5u# .head()      # CNPsCH4_md5u.shape 
subset_md5_list = subset_md5u['MD5'].tolist()                                   # make md5 list for API query # get list  
len(subset_md5_list)
#KO_subset_md5

31462

### Limit List size

In [158]:
# Get test list -- no LIMIT !!!
test_list = subset_md5_list # [0:5000]                    # Make test subset of list       # len(test_list)
test_listU = set(test_list)                               # get unique values from list    # len(test_listu)

md5_list_paste = '", "'.join(test_listU)                  # send list to string, adding middle quotes
md5_list_paste = md5_list_paste.replace('\r', '')         # replace returns with "" for flat string       # md5_list_paste 

## API query for md5 - taxID mappping  

In [159]:
# Get RefSeq data
# Insert md5 LIST data into API payload string 
payload = '{"source":"RefSeq","limit":500000,"data":["' + md5_list_paste + '"]}'                  # Paste data (md5) into payload         #payload

# API Call with md5 payload given above
API_return = requests.post('http://api.metagenomics.anl.gov/m5nr/md5', data=payload)   # API call for payload
out = API_return.json()                                                                # get json formatted data 
md5_2tax = out['data']                                                                 # get first data layer 
md5_2taxd = pd.DataFrame(md5_2tax)                                                     # make DF
# md5_2tax2

columns = ['md5','ncbi_tax_id',"organism"]                                             # get columns to cut   
md5_2taxdr = md5_2taxd[columns]                                                        # cut by columns   # md5_2taxdr.head()

# There are many duplicated md5 IDs...looks like each is just different strain, not important for Taxonomy
#  md5_2taxdrDups = md5_2taxdr[md5_2taxdr.duplicated('md5', keep = False)]                               # md5_2taxdrDups.head()  # md5_2taxdrDups.shape  # md5_2taxdrDups
md5u_2taxdr = md5_2taxdr.drop_duplicates('md5', keep='first')                                            # md5u_2taxdr  # md5u_2taxdr.shape

In [160]:
# md5u_2taxdr  # 
#md5u_2taxdr.shape

### Test merge with full Taxnomy

In [161]:
# Set abstraction variable for md5 output from API
md5_GB = md5_2taxdr #.head(5)

## MERGE md5 IDs with Full Taxonomy
md5_GB_tax = pd.merge(md5_GB, m5nr_TaxonomyCl, how = 'left', on="ncbi_tax_id")

# Drop exact duplicates 
md5_GB_tax0 = md5_GB_tax.iloc[:,:-1]                       # Drop strain info assoc. with Taxonomy DB given by organism_y 
md5_GB_taxU = md5_GB_tax0.drop_duplicates(keep='first')    # Drop duplicates, get unique  
#md5_GB_taxU# .head() md5_GB_taxU.shape

In [162]:
# There are many duplicated md5 IDs...looks like each is just different strain, not important for Taxonomy
# md5d_GB_taxU = md5_GB_taxU[md5_GB_taxU.duplicated('md5', keep = False)]                                     # md5d_GB_taxU 
md5u_GB_taxU = md5_GB_taxU.drop_duplicates('md5', keep='first')                  # drop md5 strain duplicates                                     
# md5u_GB_taxU
# md5u_2taxdr  # md5u_2taxdr.shape

In [163]:
md5u_GB_taxU.shape # -- A few extras, hmmm...
# Send back taxiD = 0 for alt. lookup...
# possible ambig calls-- send to other DB?

#POST MERGE, should remove any human / mouse / fly hits (by domain???)


(31223, 10)

## merge md5 taxonomy with KO hierarchy

In [164]:
# Get KO-md5 mapping for KO subset
# note if running with full Ontology the columns are diff : ['Index', 'L1', 'L2','KO','gene', 'fxn', 'EC', 'MD5']  # Clean columns # KO_subset_md5_cl


KO_subset_md5 = pd.merge(KO_subset, md5_KO_rast133, on="KO", how='inner')                       # KO_subset_md5
KO_subset_md5_cl = KO_subset_md5[['L1', 'L3','KO','gene', 'function', 'EC', 'MD5']]   # Clean columns # KO_subset_md5_cl

#KO_subset_md5_cl.rename(columns={'MD5':'md5'}, inplace=True)
KO_subset_md5_cl.columns = ['L1', 'L3','KO','gene', 'function', 'EC', 'md5']   # Clean columns # KO_subset_md5_cl  #KO_subset_md5_cl
KO_subset_md5_clU = KO_subset_md5_cl.drop_duplicates(keep='first')                 # Drop duplicates, get unique  
KO_subset_md5_clU.shape
#KO_subset_md5_cl

(32655, 7)

In [165]:
# Merge with Taxonomy
#KO_subset_md5Tax = pd.merge(KO_subset_md5_cl, md5u_GB_taxU, left_on=['md5','KO'], right_on=['md5','KO'], how='inner')
KO_subset_md5Tax = pd.merge(KO_subset_md5_cl, md5u_GB_taxU, on="md5", how='inner')
# KO_subset_md5Tax = pd.merge(KO_subset_md5_cl, md5_GB_taxU, on="md5", how='inner')
#KO_subset_md5Tax.rename(columns={'md5':'MD5'}, inplace=True)                        # convert to "MD5" again
KO_subset_md5Tax#.shape

Unnamed: 0,L1,L3,KO,gene,function,EC,md5,ncbi_tax_id,organism_x,domain,phylum,class,order,family,genus,species
0,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],00f21a1f7b33ec7e1862ac168bae479d,412021.0,Burkholderia mallei ATCC 10399,Bacteria,Proteobacteria,Betaproteobacteria,Burkholderiales,Burkholderiaceae,Burkholderia,Burkholderia mallei
1,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],090a06e6f790f4175d55a94ef0af0565,375451.0,Roseobacter denitrificans OCh 114,Bacteria,Proteobacteria,Alphaproteobacteria,Rhodobacterales,Rhodobacteraceae,Roseobacter,Roseobacter denitrificans
2,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],094c7dee12562cd34d6dfac9e8627306,583345.0,Methylotenera mobilis JLW8,Bacteria,Proteobacteria,Betaproteobacteria,Methylophilales,Methylophilaceae,Methylotenera,Methylotenera mobilis
3,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],0a66c0f71baf62f146da7450750ffae1,318161.0,Shewanella denitrificans OS217,Bacteria,Proteobacteria,Gammaproteobacteria,Alteromonadales,Shewanellaceae,Shewanella,Shewanella denitrificans
4,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],0a877b9f940692206489cc8d5f25175c,485914.0,Halomicrobium mukohataei DSM 12286,Archaea,Euryarchaeota,Halobacteria,Halobacteriales,Halobacteriaceae,Halomicrobium,Halomicrobium mukohataei
5,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],128dc50e0803e13f47f5a84b401a1c67,416269.0,Actinobacillus pleuropneumoniae serovar 5b str...,Bacteria,Proteobacteria,Gammaproteobacteria,Pasteurellales,Pasteurellaceae,Actinobacillus,Actinobacillus pleuropneumoniae
6,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],13a67f6021ca34f30fb5cb4686a5bf3b,1169230.0,Brucella ovis 63/96,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Brucellaceae,Brucella,Brucella ovis
7,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],13fb139a0ace3097c1bf60169ab995f5,456481.0,Leptospira biflexa serovar Patoc strain 'Patoc...,Bacteria,Spirochaetes,Spirochaetia,Spirochaetales,Leptospiraceae,Leptospira,Leptospira biflexa
8,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],14178c32b846869981e50acb42770e5a,439375.0,Ochrobactrum anthropi ATCC 49188,Bacteria,Proteobacteria,Alphaproteobacteria,Rhizobiales,Brucellaceae,Ochrobactrum,Ochrobactrum anthropi
9,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],16841c9e1c07f851faab01e1ff3213fe,272569.0,Haloarcula marismortui ATCC 43049,Archaea,Euryarchaeota,Halobacteria,Halobacteriales,Halobacteriaceae,Haloarcula,Haloarcula marismortui


## merge KO-md5 taxonomy with KO-md5 count data 

In [166]:
# UGH, need to convert to cap MD5 again
KO_subset_md5Tax.rename(columns={'md5':'MD5'}, inplace=True)                                    #KO_subset_md5Tax

# MERGE KO-md5 Taxnomomy and counts                                                             #KO_md5samples # KO_md5samples.head()
KOsubset_md5Tax_counts133 = pd.merge(KO_subset_md5Tax,KO_md5samples, on='MD5', how='inner')
KOsubset_md5Tax_counts133 = pd.merge(KO_subset_md5Tax,KO_md5samples, left_on=['MD5','KO'], right_on=['MD5','KO'], how='inner')

In [None]:
cols = pd.DataFrame(KOsubset_md5Tax_counts133.columns)
#cols## TODO: DROP KO_y column

### note SITE ORDERING now alphabetical! 

In [168]:
KOsubset_md5Tax_counts133#.head()

Unnamed: 0,L1,L3,KO,gene,function,EC,MD5,ncbi_tax_id,organism_x,domain,...,WestPond_TuleC_D1,WestPond_TuleC_D2,White_CordA_D2,White_CordB_D2,White_ThreeSqA_D1,White_ThreeSqA_D2,White_ThreeSqB_D1,White_ThreeSqB_D2,White_ThreeSqC_D1,White_ThreeSqC_D2
0,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],00f21a1f7b33ec7e1862ac168bae479d,412021.0,Burkholderia mallei ATCC 10399,Bacteria,...,29.0,20.0,3.0,5.0,10.0,12.0,8.0,3.0,8.0,6.0
1,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],090a06e6f790f4175d55a94ef0af0565,375451.0,Roseobacter denitrificans OCh 114,Bacteria,...,4.0,2.0,2.0,9.0,14.0,8.0,10.0,11.0,10.0,6.0
2,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],094c7dee12562cd34d6dfac9e8627306,583345.0,Methylotenera mobilis JLW8,Bacteria,...,37.0,27.0,4.0,1.0,9.0,13.0,13.0,6.0,4.0,10.0
3,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],0a66c0f71baf62f146da7450750ffae1,318161.0,Shewanella denitrificans OS217,Bacteria,...,1.0,1.0,4.0,0.0,4.0,7.0,6.0,5.0,9.0,6.0
4,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],0a877b9f940692206489cc8d5f25175c,485914.0,Halomicrobium mukohataei DSM 12286,Archaea,...,1.0,2.0,1.0,3.0,1.0,2.0,1.0,0.0,1.0,3.0
5,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],128dc50e0803e13f47f5a84b401a1c67,416269.0,Actinobacillus pleuropneumoniae serovar 5b str...,Bacteria,...,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
6,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],13a67f6021ca34f30fb5cb4686a5bf3b,1169230.0,Brucella ovis 63/96,Bacteria,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],13fb139a0ace3097c1bf60169ab995f5,456481.0,Leptospira biflexa serovar Patoc strain 'Patoc...,Bacteria,...,8.0,6.0,0.0,1.0,5.0,7.0,2.0,1.0,1.0,0.0
8,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],14178c32b846869981e50acb42770e5a,439375.0,Ochrobactrum anthropi ATCC 49188,Bacteria,...,25.0,27.0,1.0,2.0,9.0,12.0,10.0,12.0,10.0,14.0
9,Nitrogen,N_DNT,K00368,nirK,nitrite reductase (NO-forming),[EC:1.7.2.1],16841c9e1c07f851faab01e1ff3213fe,272569.0,Haloarcula marismortui ATCC 43049,Archaea,...,3.0,2.0,5.0,3.0,8.0,4.0,3.0,6.0,6.0,6.0


In [169]:
## Not a terrible amount of duplication now. 32k vs. 31k

## Write table Out

In [170]:
KOsubset_md5Tax_counts133.to_csv("KOsubset_md5Tax_counts133.txt", sep ='\t')

In [182]:
# Look at some tables for taxonomy

In [185]:
# Look at only K00399
KOmd5_TaxCounts = KOsubset_md5Tax_counts133  # shorten

mcrA = KOmd5_TaxCounts[KOmd5_TaxCounts.KO=="K00399"]
#mcrA.iloc[:,:15]
#mcrA = KOmd5_TaxCounts[KOmd5_TaxCounts.KO==]
#mcrA2 = KOmd5_TaxCounts[KOmd5_TaxCounts.KO=="K00400"]
mcrA
# mcrABG['K00399','K00400']
pta= KOmd5_TaxCounts[KOmd5_TaxCounts.KO=="K00625"] # phosphate acetyltransferase
pta

Unnamed: 0,L1,L3,KO,gene,function,EC,MD5,ncbi_tax_id,organism_x,domain,...,WestPond_TuleC_D1,WestPond_TuleC_D2,White_CordA_D2,White_CordB_D2,White_ThreeSqA_D1,White_ThreeSqA_D2,White_ThreeSqB_D1,White_ThreeSqB_D2,White_ThreeSqC_D1,White_ThreeSqC_D2
28961,CH4_cycling,CH4_Acetat,K00625,"E2.3.1.8, pta",phosphate acetyltransferase,[EC:2.3.1.8],010f88a1d61735ec240f42192bfefbf5,322710.0,Azotobacter vinelandii DJ,Bacteria,...,42.0,32.0,8.0,6.0,15.0,12.0,9.0,12.0,8.0,9.0
28962,CH4_cycling,CH4_Acetat,K00625,"E2.3.1.8, pta",phosphate acetyltransferase,[EC:2.3.1.8],01bbc98683a756c5ccded64b0d7ee6c2,692420.0,Bacillus amyloliquefaciens DSM 7,Bacteria,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
28963,CH4_cycling,CH4_Acetat,K00625,"E2.3.1.8, pta",phosphate acetyltransferase,[EC:2.3.1.8],026827ed8f5bd18447cec3b158f77e60,395019.0,Burkholderia multivorans ATCC 17616,Bacteria,...,6.0,2.0,3.0,1.0,8.0,5.0,5.0,4.0,3.0,5.0
28964,CH4_cycling,CH4_Acetat,K00625,"E2.3.1.8, pta",phosphate acetyltransferase,[EC:2.3.1.8],029c63babdebe99902f12b2414cfad70,335992.0,Candidatus Pelagibacter ubique HTCC1062,Bacteria,...,0.0,0.0,1.0,0.0,2.0,1.0,0.0,1.0,2.0,3.0
28965,CH4_cycling,CH4_Acetat,K00625,"E2.3.1.8, pta",phosphate acetyltransferase,[EC:2.3.1.8],035cc291a84bf46ea822420bea895d04,257314.0,Lactobacillus johnsonii NCC 533,Bacteria,...,0.0,0.0,2.0,2.0,1.0,0.0,2.0,0.0,1.0,0.0
28966,CH4_cycling,CH4_Acetat,K00625,"E2.3.1.8, pta",phosphate acetyltransferase,[EC:2.3.1.8],04b2217d45744d1af4b4b9e4b5723723,390236.0,Borrelia afzelii PKo,Bacteria,...,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,1.0,0.0
28967,CH4_cycling,CH4_Acetat,K00625,"E2.3.1.8, pta",phosphate acetyltransferase,[EC:2.3.1.8],05221076dc302e7ca1a2543a92391bf9,62928.0,Azoarcus sp. BH72,Bacteria,...,34.0,62.0,16.0,11.0,8.0,8.0,6.0,15.0,12.0,7.0
28968,CH4_cycling,CH4_Acetat,K00625,"E2.3.1.8, pta",phosphate acetyltransferase,[EC:2.3.1.8],054d3042e2b720bfb81e007f815415e3,315749.0,Bacillus cytotoxicus NVH 391-98,Bacteria,...,3.0,0.0,3.0,2.0,1.0,3.0,1.0,2.0,0.0,1.0
28969,CH4_cycling,CH4_Acetat,K00625,"E2.3.1.8, pta",phosphate acetyltransferase,[EC:2.3.1.8],05dd6c595647411e2ed21f8101e1b134,264198.0,Ralstonia eutropha JMP134,Bacteria,...,5.0,6.0,3.0,5.0,10.0,9.0,15.0,7.0,6.0,10.0
28970,CH4_cycling,CH4_Acetat,K00625,"E2.3.1.8, pta",phosphate acetyltransferase,[EC:2.3.1.8],0808e4f6f6dcd43193ebca68f3d00549,272562.0,Clostridium acetobutylicum ATCC 824,Bacteria,...,20.0,27.0,11.0,10.0,7.0,19.0,10.0,10.0,5.0,14.0
