In [2]:
# Import Python packages
import pandas as pd
import numpy as np
import biom
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import cycle
import os
from matplotlib.colors import ListedColormap
from matplotlib.colors import to_rgba
from biom import Table

In [6]:
# Function to load BIOM table, process it, and save back as BIOM
def view_biom_table(biom_path):
    # Load BIOM table
    table = biom.load_table(biom_path)
    df = pd.DataFrame(table.matrix_data.toarray(),
                      index=table.ids(axis='observation'),
                      columns=table.ids(axis='sample'))
    
    # Sort rows by row sum in descending order
    df['row_sum'] = df.sum(axis=1)
    df = df.sort_values(by='row_sum', ascending=False)
    df = df.drop(columns=['row_sum'])  # Drop the 'row_sum' column before returning
    
    return df

In [4]:
def load_process_save_biom_table(biom_path, output_biom_path):
    # List of columns to remove
    columns_to_remove = [
        'Ca007NKPN', '9003932', '9004022', '9003992', 'Ca007NKNL', 'Co005SNNL', 
        'Co001SMPN', '9003912', 'Ca006ONPN2', 'Ca008HNPN', 'Ca010EBL', '9003962', 
        '9004032', 'Co004LNNL', 'Co003LTNL', 'Co003LTPN', 'Co002MPPN', 'Ca009STNL', 
        'Ca006ONPN', 'Ca009STPN', '9003972', 'Ca010EBNL', 'Ca007NKL', 'Ca006ONNL2', 
        'Ca008HNL', 'Ca008HNNL', '9003922', '9003982', 'Ca009STL', 'Co004LNPN', 
        '9004002', 'Ca006ONL2', 'Co002MPNL', 'Co001SMNL', '9003952', 'Ca006ONL', 
        'Co005SNPN', '9004012', 'Ca006ONNL', 'Ca010EBPN'
    ]
    
    # Load BIOM table
    table = biom.load_table(biom_path)
    df = pd.DataFrame(table.matrix_data.toarray(),
                      index=table.ids(axis='observation'),
                      columns=table.ids(axis='sample'))
    
    # Sort rows by row sum in descending order
    df['row_sum'] = df.sum(axis=1)
    df = df.sort_values(by='row_sum', ascending=False)
    df = df.drop(columns=['row_sum'])  # Drop the 'row_sum' column before returning
    
    # Remove prefix '15564.' from all column names
    df.columns = df.columns.str.replace("15564.", "")
    
    # Drop specified columns
    df = df.drop(columns=columns_to_remove, errors='ignore')
    
    # Convert DataFrame back to BIOM table
    new_table = Table(
        df.values,
        observation_ids=df.index.tolist(),
        sample_ids=df.columns.tolist()
    )
    
    # Save the new BIOM table to the specified path
    with open(output_biom_path, 'w') as biom_out:
        new_table.to_json("Generated by BIOM-Format", biom_out)
    
    print(f"Processed BIOM table saved to {output_biom_path}")

In [7]:
view_biom_table('/Users/yangchen/PhD/Gallo_lab/16S_AD_South-Africa/Data/Tables/Relative_Abundance_Tables/df_16S_filtered_feature_table_rare_Genus_relative_abundance.biom')

Unnamed: 0,900221,900570,900092,900466,9003932,900091,900556,900301,900245,900581,...,900547,900263,Ca008HNL,900081,900501,900279,900304,900580,900484,9003972
g__Streptococcus,0.000667,0.120454,0.973333,0.971667,0.700333,0.017391,0.632667,0.001667,0.006333,0.049000,...,0.209333,0.153000,0.000333,0.242000,0.539513,0.021667,0.904667,0.934667,0.703000,0.225343
g__Staphylococcus,0.058333,0.236904,0.000000,0.001000,0.000000,0.081271,0.001667,0.980667,0.017667,0.517333,...,0.004000,0.103000,0.977667,0.072333,0.058686,0.251667,0.006667,0.043333,0.000000,0.127048
g__Haemophilus_D_734546,0.000000,0.000667,0.000000,0.000000,0.283333,0.000000,0.252000,0.000000,0.000000,0.008000,...,0.750000,0.001667,0.000000,0.154333,0.014672,0.015667,0.022667,0.000333,0.132333,0.015714
g__,0.002333,0.175843,0.006667,0.000333,0.000000,0.411371,0.022333,0.003000,0.280667,0.090667,...,0.011667,0.106667,0.003667,0.147000,0.093698,0.093000,0.008000,0.000333,0.007333,0.041792
g__Corynebacterium,0.002333,0.012346,0.015333,0.000333,0.006000,0.090301,0.036333,0.012333,0.170000,0.010000,...,0.003000,0.132333,0.016333,0.027667,0.005335,0.182000,0.023333,0.014000,0.039000,0.015045
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
g__Pseudomonas_K,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
g__Herbaspirillum,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
g__Marinomonas,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
g__Tetragenococcus,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
