In [1]:
import biom
import skbio

In [14]:
import os
import pandas as pd

In [15]:
def alpha_diversity_pre(otu_table,metric,tree=None):
    df = biom.load_table(otu_table).to_dataframe()
    result = ''
    if metric == 'faith_pd':
        tree = skbio.TreeNode.read(tree)
        result = skbio.diversity.alpha_diversity(counts=df.T.values, ids=df.columns,metric='faith_pd',tree=tree,otu_ids=df.index)
    else:
        result = skbio.diversity.alpha_diversity(counts=df.T.values, ids=df.columns,metric=metric)
    result = pd.DataFrame(result,columns=['alpha_div'])
    return result

In [26]:
otu_table = os.path.join('','upload_files/feature-table.biom')
tree = os.path.join('','upload_files/tree.nwk')
metric = 'faith_pd'
metadata = os.path.join('','upload_files/demo_metadata.tsv')

In [27]:
alpha_pre = alpha_diversity_pre(otu_table,metric,tree)

In [34]:
def alpha_diversity(alpha_table, metadata,label_col):
    """ split the alpha table into serveral parts according to the metadata.
    Args:
        alpha_table: an pandas dataframe which come from the 'alpha_dive
            rsity_pre' function.
        metadata: record the macro feature of every sample.(File name, String)
    Return :
        a dict contains every label and its samples.
        e.g. dict0 = {'class0':[0,1,2,3,4,5], 'class1': [5, 6, 7, 8, 9]}
    """
    metadata = pd.read_csv(metadata, sep='\t')
    #alpha_table = pd.read_csv(alpha_table, sep='\t')
    try:
        merged = alpha_table.merge(
            metadata, left_index=True, right_on='#SampleID')
    except:
        print('Wrong column name')
    #print(merged)
    diversity = merged['alpha_div']
    labels = merged[label_col]
    #print(labels)
    result_dict = {}
    for i in range(len(labels)):
        key = labels[i+1]
        if key  in result_dict:
            result_dict[key].append(diversity[i+1])
        else:
            result_dict[key] =[diversity[i+1]]
    return result_dict


In [35]:
result_dict = alpha_diversity(alpha_pre,metadata,'BodySite')

    alpha_div #SampleID BarcodeSequence LinkerPrimerSequence    BodySite  \
4    7.416346    L1S105    AGTGCGATGCGT  GTGCCAGCMGCCGCGGTAA         gut   
19   7.146462    L1S140    ATGGCAGCTCTA  GTGCCAGCMGCCGCGGTAA         gut   
20   8.624969    L1S208    CTGAGATACGCG  GTGCCAGCMGCCGCGGTAA         gut   
21   8.289504    L1S257    CCGACTGAGATG  GTGCCAGCMGCCGCGGTAA         gut   
22   7.335356    L1S281    CCTCTCGTGATC  GTGCCAGCMGCCGCGGTAA         gut   
2    6.539852     L1S57    ACACACTATGGC  GTGCCAGCMGCCGCGGTAA         gut   
3    6.293609     L1S76    ACTACGTGTGGT  GTGCCAGCMGCCGCGGTAA         gut   
1    5.277165      L1S8    AGCTGACTAGTC  GTGCCAGCMGCCGCGGTAA         gut   
5   20.786049    L2S155    ACGATGCGACCA  GTGCCAGCMGCCGCGGTAA   left palm   
6   18.108938    L2S175    AGCTATCCACGA  GTGCCAGCMGCCGCGGTAA   left palm   
7   19.295724    L2S204    ATGCAGCTCAGT  GTGCCAGCMGCCGCGGTAA   left palm   
8   26.790348    L2S222    CACGTGACATGT  GTGCCAGCMGCCGCGGTAA   left palm   
23   8.95095

In [36]:
result_dict

{'gut': [5.2771648994999998,
  6.5398520304999996,
  6.2936087504999998,
  7.4163461304999991,
  7.1464624804999994,
  8.6249694955000002,
  8.2895044665000004,
  7.3353559524999996],
 'left palm': [20.786049410499999,
  18.1089381735,
  19.295724058499999,
  26.790347861500003,
  8.9509539074999989,
  16.429068346499999,
  9.7161246094999996,
  14.487626460500001],
 'right palm': [4.0155023404999994,
  11.363373767499999,
  8.0971416964999996,
  10.0834229135,
  14.539094142500002,
  4.5003362645000005,
  38.689280145000005,
  30.329040524,
  15.898265447500002],
 'tongue': [6.0202577174999998,
  4.4268051605000007,
  5.6827088784999997,
  6.3302466284999994,
  5.8878515514999998,
  5.4009182935000002,
  7.9605999584999996,
  7.2916169345000004,
  7.2647129904999996]}