# Beta Diversity Analysis

This code will run the beta diversity analysis on the GCMP tissue compartment samples using phylogenetic trees generated with the greengenes and Silva databases.

First we need to load all the plugins we will use for this analysis

In [1]:
import qiime2
from qiime2 import Artifact
try:
    from qiime2.plugins.diversity.methods import beta
except:
    from qiime2.plugins.diversity.pipelines import beta
from qiime2.plugins.diversity.pipelines import beta_phylogenetic
from qiime2.plugins.diversity.visualizers import beta_group_significance
from qiime2.plugins.diversity.methods import pcoa
from qiime2.plugins import diversity
from qiime2.plugins.emperor.visualizers import plot
from qiime2.metadata import Metadata
from os.path import abspath,exists,join
import shutil


# Load Data

Load the tissue compartment feature tables and merge them into one dictionary loop.

In [2]:
#Check that required files exist
phylo_tree = Artifact.load("../input/insertion-tree_silva_GCMP.qza")
phylo_tree_physeq = Artifact.load("../input/physeq.noncton-rooted-tree.qza")
metadata = Metadata.load("../input/GCMP_EMP_map_r28_no_empty_samples.txt")

#these files will be merged as a dictionary for future analysis
feature_table_decon_all_1000 = Artifact.load("../input/feature_tables/feature_table_decon_all_1000.qza")
feature_table_decon_mucus_1000 = Artifact.load("../input/feature_tables/feature_table_decon_mucus_1000.qza")
feature_table_decon_skeleton_1000 = Artifact.load("../input/feature_tables/feature_table_decon_skeleton_1000.qza")
feature_table_decon_tissue_1000 = Artifact.load("../input/feature_tables/feature_table_decon_tissue_1000.qza")
#feature_table_decon_all_1000 = Artifact.load("../input/feature_tables/feature_table_decon_all_1000.qza")

In [3]:
#need to define dictionary for the loop
#feature_tables_silva_m2_1000 = {"mucus":feature_table_silva_m2_mucus_1000, "tissue":feature_table_silva_m2_tissue_1000, "skeleton":feature_table_silva_m2_skeleton_1000}
#feature_tables_silva_m2_1000 = {"all":feature_table_silva_m2_all_1000}
feature_tables_decon_1000 = {"mucus":feature_table_decon_mucus_1000, "tissue":feature_table_decon_tissue_1000, "skeleton":feature_table_decon_skeleton_1000, "all":feature_table_decon_all_1000}

# Beta Diversity Loop

This code runs beta diversity using the phylogenetic tree generated using the greengenes or silva database. The feature table and phylogenetic tree should match the database used.

Note: This code appears to work for "sample_type_EMP" but not "reef_name" when pairwise is set to true. If I use reef name I get an error saying that I have some single values which I can't seem to find.

In [4]:
metrics=['weighted_unifrac', 'unweighted_unifrac']
column_names=['Huang_Roy_tree_name']

for compartment, table in feature_tables_decon_1000.items():
    for metric in metrics:
        print(f"Calculating beta diversity for {compartment} using {metric}")
        beta_results = beta_phylogenetic(table=table, phylogeny=phylo_tree, metric=metric)
        beta_dm = beta_results.distance_matrix

        #calculate diversity on multiple columns as a second loop
        #change pairwise to true if want to get pairwise comparison
        #I think this still needs to be indented
        #for compartment, column in beta_results.distance_matrix
        for column in column_names:
            print(f"Calculating beta diversity metrics for {compartment} using {column}")
            pairwise_beta_diversity = diversity.actions.beta_group_significance\
                    (distance_matrix=beta_dm, metadata=metadata.get_column(column),\
                    method='permanova', pairwise=False)
    
            #visualize and save data
            beta_pairwise_visualization = pairwise_beta_diversity.visualization
            output_filename = f"beta_phylo_permanova_{compartment}_silva_m2_{metric}_{column}.qzv"
            output_filepath = join("../output",output_filename)
            print(f"Saveing significant results to {output_filepath}")
            beta_pairwise_visualization.save(output_filepath)
            
            #create and save a pcoa from the distance matrix
            beta_pairwise_pcoa = pcoa(beta_dm)
            beta_pairwise_pcoa_out = beta_pairwise_pcoa.pcoa
            output_filename = f"beta_phylo_pcoa_{compartment}_silva_m2_{metric}_{column}.qza"
            output_filepath =join("../output", output_filename)
            print(f"Saveing the raw pcoa results to {output_filepath}")
            beta_pairwise_pcoa_out.save(output_filepath)
            
            #save a plot of the pcoa
            emperor_plot = plot(pcoa=beta_pairwise_pcoa_out,metadata=metadata)
            emperor_visualization = emperor_plot.visualization
            output_filename = f"beta_phylo_emperor_pcoa_{compartment}_silva_m2_{metric}_{column}.qzv"
            output_filepath = join("../output", output_filename)
            print(f"Saveing the pcoa emperor plot to {output_filepath}")
            emperor_visualization.save(output_filepath)
            
      

Calculating beta diversity for mucus using weighted_unifrac
Calculating beta diversity metrics for mucus using Huang_Roy_tree_name
Saveing significant results to ../output/beta_phylo_permanova_mucus_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qzv


  warn(


Saveing the raw pcoa results to ../output/beta_phylo_pcoa_mucus_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qza
Saveing the pcoa emperor plot to ../output/beta_phylo_emperor_pcoa_mucus_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qzv
Calculating beta diversity for mucus using unweighted_unifrac
Calculating beta diversity metrics for mucus using Huang_Roy_tree_name
Saveing significant results to ../output/beta_phylo_permanova_mucus_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qzv


  warn(


Saveing the raw pcoa results to ../output/beta_phylo_pcoa_mucus_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qza
Saveing the pcoa emperor plot to ../output/beta_phylo_emperor_pcoa_mucus_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qzv
Calculating beta diversity for tissue using weighted_unifrac
Calculating beta diversity metrics for tissue using Huang_Roy_tree_name
Saveing significant results to ../output/beta_phylo_permanova_tissue_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qzv


  warn(


Saveing the raw pcoa results to ../output/beta_phylo_pcoa_tissue_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qza
Saveing the pcoa emperor plot to ../output/beta_phylo_emperor_pcoa_tissue_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qzv
Calculating beta diversity for tissue using unweighted_unifrac
Calculating beta diversity metrics for tissue using Huang_Roy_tree_name
Saveing significant results to ../output/beta_phylo_permanova_tissue_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qzv


  warn(


Saveing the raw pcoa results to ../output/beta_phylo_pcoa_tissue_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qza
Saveing the pcoa emperor plot to ../output/beta_phylo_emperor_pcoa_tissue_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qzv
Calculating beta diversity for skeleton using weighted_unifrac
Calculating beta diversity metrics for skeleton using Huang_Roy_tree_name
Saveing significant results to ../output/beta_phylo_permanova_skeleton_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qzv


  warn(


Saveing the raw pcoa results to ../output/beta_phylo_pcoa_skeleton_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qza
Saveing the pcoa emperor plot to ../output/beta_phylo_emperor_pcoa_skeleton_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qzv
Calculating beta diversity for skeleton using unweighted_unifrac
Calculating beta diversity metrics for skeleton using Huang_Roy_tree_name
Saveing significant results to ../output/beta_phylo_permanova_skeleton_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qzv
Saveing the raw pcoa results to ../output/beta_phylo_pcoa_skeleton_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qza
Saveing the pcoa emperor plot to ../output/beta_phylo_emperor_pcoa_skeleton_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qzv
Calculating beta diversity for all using weighted_unifrac
Calculating beta diversity metrics for all using Huang_Roy_tree_name
Saveing significant results to ../output/beta_phylo_permanova_all_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qzv


  warn(


Saveing the raw pcoa results to ../output/beta_phylo_pcoa_all_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qza
Saveing the pcoa emperor plot to ../output/beta_phylo_emperor_pcoa_all_silva_m2_weighted_unifrac_Huang_Roy_tree_name.qzv
Calculating beta diversity for all using unweighted_unifrac
Calculating beta diversity metrics for all using Huang_Roy_tree_name
Saveing significant results to ../output/beta_phylo_permanova_all_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qzv


  warn(


Saveing the raw pcoa results to ../output/beta_phylo_pcoa_all_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qza
Saveing the pcoa emperor plot to ../output/beta_phylo_emperor_pcoa_all_silva_m2_unweighted_unifrac_Huang_Roy_tree_name.qzv


In [5]:
#save only the distance matrix for later
metrics=['weighted_unifrac', 'unweighted_unifrac']

for compartment, table in feature_tables_decon_1000.items():
    for metric in metrics:
        print(f"Calculating beta diversity for {compartment} using {metric}")
        beta_results = beta_phylogenetic(table=table, phylogeny=phylo_tree, metric=metric)
        beta_dm = beta_results.distance_matrix
        
        #save distance matrix for each compartment and metric used
        output_filename= f"beta_dist_matrix_{compartment}_silva_m2_decon_1000_{metric}.qza"
        output_filepath=join("../output",output_filename)
        print(f"Saving distance matrix to {output_filepath}")
        beta_dm.save(output_filepath)
        


Calculating beta diversity for mucus using weighted_unifrac
Saving distance matrix to ../output/beta_dist_matrix_mucus_silva_m2_decon_1000_weighted_unifrac.qza
Calculating beta diversity for mucus using unweighted_unifrac
Saving distance matrix to ../output/beta_dist_matrix_mucus_silva_m2_decon_1000_unweighted_unifrac.qza
Calculating beta diversity for tissue using weighted_unifrac
Saving distance matrix to ../output/beta_dist_matrix_tissue_silva_m2_decon_1000_weighted_unifrac.qza
Calculating beta diversity for tissue using unweighted_unifrac
Saving distance matrix to ../output/beta_dist_matrix_tissue_silva_m2_decon_1000_unweighted_unifrac.qza
Calculating beta diversity for skeleton using weighted_unifrac
Saving distance matrix to ../output/beta_dist_matrix_skeleton_silva_m2_decon_1000_weighted_unifrac.qza
Calculating beta diversity for skeleton using unweighted_unifrac
Saving distance matrix to ../output/beta_dist_matrix_skeleton_silva_m2_decon_1000_unweighted_unifrac.qza
Calculating 

In [15]:
#set up 
import pandas as pd
from os.path import abspath,exists,join
from qiime2 import Artifact
import shutil

#set up the directories
input_dir = "../output/beta_diversity/"
output_dir = "../output/beta_diversity/"

#load the distance matrix files
dm_weighted_mucus = Artifact.load(input_dir + "beta_dist_matrix_mucus_silva_m2_decon_1000_weighted_unifrac.qza")
dm_weighted_skeleton = Artifact.load(input_dir + "beta_dist_matrix_skeleton_silva_m2_decon_1000_weighted_unifrac.qza")
dm_weighted_tissue = Artifact.load(input_dir + "beta_dist_matrix_tissue_silva_m2_decon_1000_weighted_unifrac.qza")
dm_weighted_all = Artifact.load(input_dir + "beta_dist_matrix_all_silva_m2_decon_1000_weighted_unifrac.qza")

dm_unweighted_mucus = Artifact.load(input_dir + "beta_dist_matrix_mucus_silva_m2_decon_1000_unweighted_unifrac.qza")
dm_unweighted_skeleton = Artifact.load(input_dir + "beta_dist_matrix_skeleton_silva_m2_decon_1000_unweighted_unifrac.qza")
dm_unweighted_tissue = Artifact.load(input_dir + "beta_dist_matrix_tissue_silva_m2_decon_1000_unweighted_unifrac.qza")
dm_unweighted_all = Artifact.load(input_dir + "beta_dist_matrix_all_silva_m2_decon_1000_unweighted_unifrac.qza")

In [16]:
distance_matrix_weighted = {'mucus': dm_weighted_mucus, 'skeleton': dm_weighted_skeleton,\
                           'tissue': dm_weighted_tissue, 'all': dm_weighted_all}
distance_matrix_unweighted ={'mucus': dm_unweighted_mucus, 'skeleton': dm_unweighted_skeleton,\
                            'tissue': dm_unweighted_tissue, 'all': dm_unweighted_all}

Run weighted and unweighted unfrac before proceeding.

In [18]:
#run loop to extract the artifact distance matrix files so that the coordinates can be used 
#need to pass in the distance matrix file rather than the pcoa file
for compartment,distance_matrix in distance_matrix_unweighted.items():
    print(f"Ploting the distance matrix to a Pcoa for {compartment}!")
    pcoa_matrix = pcoa(distance_matrix)
    pcoa_matrix_out = pcoa_matrix.pcoa
    
    #save the pcoa matrix
    output_file = f"pcoa_dm_{compartment}_unweighted_unifrac.qza"
    output_filepath = join(output_dir,output_file)
    print(f"Saving results to: {output_filepath}")
    pcoa_matrix_out.save(output_filepath)
    
    #export the pcoa qza file as a text file for downstream analysis
    #note: this exports a folder with differing names but the file of interest inside the folder is always
    #called ordination.txt
    print(f"Exporting the pcoa distance matrix for {compartment}.")
    output_file = f"pcoa_dm_{compartment}_unweighted_unifrac"
    output_filepath = join(output_dir,output_file)
    pcoa_matrix_out.export_data(output_filepath)

Ploting the distance matrix to a Pcoa for mucus!


  warn(


Saving results to: ../output/beta_diversity/pcoa_dm_mucus_unweighted_unifrac.qza
Exporting the pcoa distance matrix for mucus.
Ploting the distance matrix to a Pcoa for skeleton!
Saving results to: ../output/beta_diversity/pcoa_dm_skeleton_unweighted_unifrac.qza
Exporting the pcoa distance matrix for skeleton.
Ploting the distance matrix to a Pcoa for tissue!


  warn(


Saving results to: ../output/beta_diversity/pcoa_dm_tissue_unweighted_unifrac.qza
Exporting the pcoa distance matrix for tissue.
Ploting the distance matrix to a Pcoa for all!


  warn(


Saving results to: ../output/beta_diversity/pcoa_dm_all_unweighted_unifrac.qza
Exporting the pcoa distance matrix for all.


In [19]:
#rename file files in the new txt folders so that they are unique like their folder name
#not just ordination.txt
import os
import ntpath
beta_diversity_dir = "../output/beta_diversity"
for root, dirs, files in os.walk(beta_diversity_dir):
    for file in files:
        if file == 'ordination.txt':
            dirname = ntpath.basename(root)
            #original location
            ori = root + '/' + file
            #From location, adding the dirname ("FolderA") before the file name
            dest =  root + '/' + dirname  + "_" + file
            os.rename(ori, dest)
print("Files are renamed!")

Files are renamed!


In [20]:
import pandas as pd

def add_pcoa_to_metadata(path_to_pcoa_tsv: str, path_to_metadata_tsv: str, \
                         final_merge_data_column = "Huang_Roy_tree_name", column_prefix: str = "PC") -> pd.DataFrame:
    """Returns metadata as DataFrame with PC axes added.
    
    path_to_pcoa_tsv -- A string representing the path to an exported Pcoa from Qiime2
    path_to_metadata_tsv -- A string representing the path to a metadata tsv file
    column_prefix -- A string to start the column names. The number of the PC axis will be added. 
      (eg 'weighted_unifrac_PC1')
    """
    mapping_df = pd.read_csv(path_to_metadata_tsv, sep="\t")
    sampleid_column_name = mapping_df.columns[0]
    
    pcoa_df = pd.read_csv(path_to_pcoa_tsv, skiprows=9, skipfooter=4,\
                          header=None, sep="\t", engine='python')
    
    #determine the number of columns in the mapping file and pcoa file
    columns = [sampleid_column_name]
    n_columns = len(pcoa_df.columns)
    #extend the column names for the pcoa
    columns.extend([column_prefix+str(x+1) for x in range(n_columns-1)])
    
    pcoa_df = pd.read_csv(path_to_pcoa_tsv, skiprows=9, skipfooter=4,\
                          header=None, names=columns, sep="\t", engine='python')
    
    #create a small dataframe using the pcoa DataFrame that contains the sample ID and 
    #the first 3 PC coordinates.
    pcoa_df_small = pcoa_df[[f'{sampleid_column_name}', column_prefix + "1", column_prefix + "2", column_prefix +"3"]]
    
    #merge the mapping and small pcoa dataframe into one by the SampleID column.
    mapping_df.loc[:,f'{sampleid_column_name}'] = \
    mapping_df.loc[:,f'{sampleid_column_name}'].astype(str)
    pcoa_df_small.loc[:,f'{sampleid_column_name}'] =\
    pcoa_df_small.loc[:,f'{sampleid_column_name}'].astype(str)
    merged_df = pcoa_df_small.merge(mapping_df, on = f'{sampleid_column_name}', how="left")
    
    #use a selected column to create a smaller DataFrame to run downstream analysis
    pcoa_df_small = merged_df[[final_merge_data_column, column_prefix + "1", column_prefix + "2", column_prefix +"3"]]
    #take the mean of the same samples in the new DataFrame 
    pcoa_df_small = pcoa_df_small.groupby([final_merge_data_column]).mean() 
    
    return pcoa_df_small

In [21]:
#call the function
#mapping file is easy I think
#path to pcoa will be harder- need to get to the .txt file in each of the folders and save with the folder name
#metadata file
import os
beta_diversity_dir = "../output/beta_diversity"
pcoa_df_small = {}
path_to_metadata = "../input/GCMP_EMP_map_r28_no_empty_samples.txt"
metadata_file = join(beta_diversity_dir, "combined_metadata.tsv")
#combined_metadata = pd.read_csv(metadata_file)
#with open("metadata_file", "w") as outfile:
#get all the pcoa data
for root, dirs, files in os.walk(beta_diversity_dir):
    for file in files:
        if file.endswith('ordination.txt'):
            print(file)
            print(root)
            #pcoa_df_small.head
            
            input_file = os.path.join(root,file)
            print("input file path:", input_file)
            #get only part of the filename to not include the file extention and the pcoa_dm_ beginning
            file_str = str(file)
            file_str = file.rsplit('.txt')[0]
            file_str = file_str.rsplit("pcoa_dm_")[1]
            column_prefix = file_str+"_PC"
            print(column_prefix)
            current_metadata = add_pcoa_to_metadata(input_file, path_to_metadata, column_prefix=column_prefix)
            output_file_prefix = file_str.rsplit("ordination")[0]
            output_file= os.path.join(root,f"{output_file_prefix}_metadata_ordination.tsv")
            current_metadata.to_csv(output_file, sep='\t')
            

pcoa_dm_mucus_unweighted_unifrac_ordination.txt
../output/beta_diversity/pcoa_dm_mucus_unweighted_unifrac
input file path: ../output/beta_diversity/pcoa_dm_mucus_unweighted_unifrac/pcoa_dm_mucus_unweighted_unifrac_ordination.txt
mucus_unweighted_unifrac_ordination_PC
pcoa_dm_all_unweighted_unifrac_ordination.txt
../output/beta_diversity/pcoa_dm_all_unweighted_unifrac
input file path: ../output/beta_diversity/pcoa_dm_all_unweighted_unifrac/pcoa_dm_all_unweighted_unifrac_ordination.txt
all_unweighted_unifrac_ordination_PC
pcoa_dm_mucus_weighted_unifrac_ordination.txt
../output/beta_diversity/pcoa_dm_mucus_weighted_unifrac
input file path: ../output/beta_diversity/pcoa_dm_mucus_weighted_unifrac/pcoa_dm_mucus_weighted_unifrac_ordination.txt
mucus_weighted_unifrac_ordination_PC
pcoa_dm_skeleton_unweighted_unifrac_ordination.txt
../output/beta_diversity/pcoa_dm_skeleton_unweighted_unifrac
input file path: ../output/beta_diversity/pcoa_dm_skeleton_unweighted_unifrac/pcoa_dm_skeleton_unweighte

# Merge the pcoa ordinations into one file. 

In [22]:
import os
import pathlib
from collections import defaultdict
from functools import reduce

beta_diversity_dir = "../output/beta_diversity"
metadata_file = join(beta_diversity_dir, "combined_metadata.tsv")
merged_data = pd.DataFrame(columns=["Huang_Roy_tree_name"])
data = []
for root, dirs, files in os.walk(beta_diversity_dir):
        for file in files:
            if file.endswith('metadata_ordination.tsv'):
                print(file)
                print(root)
                input_file = os.path.join(root,file)
                data_frame = pd.read_csv(input_file, sep="\t")
                data.append(data_frame)
        #print(data)
    
merge_data = reduce(lambda left,right: pd.merge(left,right, on=['Huang_Roy_tree_name'], how='outer'), data)
#delete the extra columns that end in _x and _y
merge_data_unique = merge_data[merge_data.columns[~(merge_data.columns.str.endswith('_x') | merge_data.columns.str.endswith('_y'))]]
#delete first column since its just numbers
#merge_data_unique = merge_data_unique.drop(columns=merge_data_unique.columns[0], axis=1, inplace=True)
#merged_data = bind.by.Huang_Roy_tree_name(data)
#concat_data = pd.concat(data, axis = "columns")
#merged_data = concat_data.merge(data, on = "Huang_Roy_tree_name", how = "outer")

#merge how =outer
#merged_data_unique = merged_data.loc[:,~merged_data.columns.duplicated()]
#merge_data.to_csv(metadata_file, sep="\t")
                

mucus_unweighted_unifrac__metadata_ordination.tsv
../output/beta_diversity/pcoa_dm_mucus_unweighted_unifrac
all_unweighted_unifrac__metadata_ordination.tsv
../output/beta_diversity/pcoa_dm_all_unweighted_unifrac
mucus_weighted_unifrac__metadata_ordination.tsv
../output/beta_diversity/pcoa_dm_mucus_weighted_unifrac
skeleton_unweighted_unifrac__metadata_ordination.tsv
../output/beta_diversity/pcoa_dm_skeleton_unweighted_unifrac
skeleton_weighted_unifrac__metadata_ordination.tsv
../output/beta_diversity/pcoa_dm_skeleton_weighted_unifrac
tissue_unweighted_unifrac__metadata_ordination.tsv
../output/beta_diversity/pcoa_dm_tissue_unweighted_unifrac
all_weighted_unifrac__metadata_ordination.tsv
../output/beta_diversity/pcoa_dm_all_weighted_unifrac
tissue_weighted_unifrac__metadata_ordination.tsv
../output/beta_diversity/pcoa_dm_tissue_weighted_unifrac


In [23]:
merge_data_unique.to_csv(metadata_file, sep="\t", index=False)

# Merge the pcoa axis data to the trait table

Make sure you have all the weighted and unweighted unifrac data merged in one table for this step

In [24]:
#now we can merge the combined metadata and the TIR data
trait_table_df=pd.read_csv("../input/coral_genome_trait_table.csv")
combine_pcoa_df=merge_data_unique

In [25]:
merged_df = trait_table_df.merge(combine_pcoa_df, on = "Huang_Roy_tree_name", how="left")
output_filename = "tir_pcoa_merged_trait_table.csv"
output_file = join(beta_diversity_dir,output_filename)
merged_df.to_csv(output_file)

# Create Emperor plot using the TIR Trait table only

In [8]:
#set up 
import pandas as pd
from os.path import abspath,exists,join
from qiime2 import Artifact
from qiime2.plugins.feature_table.methods import group
import shutil

#set up the directories
input_dir = "../output/beta_diversity/"
output_dir = "../output/beta_diversity/"

#load new TIR only metadata
metadata_tir = Metadata.load("../input/GCMP_TIR_genomes_mapping.txt")
metadata_tir_genome = Metadata.load("../input/GCMP_TIR_only_genomes_mapping.tsv")

#load the distance matrix files
TIR_feature_table_mucus = Artifact.load(input_dir + "TIR_mucus_feature_table_filtered.qza")
TIR_feature_table_skeleton = Artifact.load(input_dir + "TIR_skeleton_feature_table_filtered.qza")
TIR_feature_table_tissue = Artifact.load(input_dir + "TIR_tissue_feature_table_filtered.qza")
TIR_feature_table_all = Artifact.load(input_dir + "TIR_all_feature_table_filtered.qza")


In [9]:
TIR_feature_table = {'mucus': TIR_feature_table_mucus, 'skeleton': TIR_feature_table_skeleton,\
                           'tissue': TIR_feature_table_tissue, 'all': TIR_feature_table_all}

In [14]:
metrics=['weighted_unifrac', 'unweighted_unifrac']
metadata_column = metadata_tir.get_column(name='Huang_Roy_tree_name_tir')

for compartment, table in TIR_feature_table.items():
    print(f"Creating a merged feature table with only one genome value using {metadata_column} in {compartment}.")
    genome_feature_table_results = group(table=table,axis='sample', metadata=metadata_column, mode='mean-ceiling')
    genome_feature_table = genome_feature_table_results.grouped_table
    print("done filtering")
    output_filename = f"tir_genome_only_feature_table_{compartment}.qza"
    output_filepath = join("../output/beta_diversity",output_filename)
    genome_feature_table.save(output_filepath)
    for metric in metrics:
        print(f"Calculating beta diversity for {compartment} using {metric}")
        beta_results = beta_phylogenetic(table=genome_feature_table, phylogeny=phylo_tree, metric=metric)
        beta_dm = beta_results.distance_matrix
    
        #create and save a pcoa from the distance matrix
        beta_pairwise_pcoa = pcoa(beta_dm)
        beta_pairwise_pcoa_out = beta_pairwise_pcoa.pcoa
        output_filename = f"tir_genome_only_beta_phylo_pcoa_{compartment}_{metric}.qza"
        output_filepath =join("../output/beta_diversity", output_filename)
        print(f"Saveing the raw pcoa results to {output_filepath}")
        beta_pairwise_pcoa_out.save(output_filepath)
            
        #save a plot of the pcoa
        emperor_plot = plot(pcoa=beta_pairwise_pcoa_out,metadata=metadata_tir_genome, ignore_missing_samples= True)
        emperor_visualization = emperor_plot.visualization
        output_filename = f"tir_genome_only_beta_phylo_emperor_pcoa_{compartment}_{metric}.qzv"
        output_filepath = join("../output", output_filename)
        print(f"Saveing the pcoa emperor plot to {output_filepath}")
        emperor_visualization.save(output_filepath)

Creating a merged feature table with only one genome value using <CategoricalMetadataColumn name='Huang_Roy_tree_name_tir' id_count=243> in mucus.
done filtering
Calculating beta diversity for mucus using weighted_unifrac
Saveing the raw pcoa results to ../output/beta_diversity/tir_genome_only_beta_phylo_pcoa_mucus_weighted_unifrac.qza
Saveing the pcoa emperor plot to ../output/tir_genome_only_beta_phylo_emperor_pcoa_mucus_weighted_unifrac.qzv
Calculating beta diversity for mucus using unweighted_unifrac
Saveing the raw pcoa results to ../output/beta_diversity/tir_genome_only_beta_phylo_pcoa_mucus_unweighted_unifrac.qza
Saveing the pcoa emperor plot to ../output/tir_genome_only_beta_phylo_emperor_pcoa_mucus_unweighted_unifrac.qzv
Creating a merged feature table with only one genome value using <CategoricalMetadataColumn name='Huang_Roy_tree_name_tir' id_count=243> in skeleton.
done filtering
Calculating beta diversity for skeleton using weighted_unifrac
Saveing the raw pcoa results to 