This code will run beta diversity on the GCMP data.

Need to run beta diversity on GCMP microbiology data and the phylogenetic tree.

First we need to load all the plugins we will use for this analysis

In [1]:
import qiime2
from qiime2 import Artifact
try:
    from qiime2.plugins.diversity.methods import beta
except:
    from qiime2.plugins.diversity.pipelines import beta
from qiime2.plugins.diversity.pipelines import beta_phylogenetic
from qiime2.plugins.diversity.visualizers import beta_group_significance
from qiime2.plugins import diversity
from qiime2.metadata import Metadata
from os.path import abspath,exists,join
import shutil


Import compartment feature tables for future analysis

In [2]:
#these are files that will be merged (right now lets just try the greengenes files)
feature_table_gg_mucus = Artifact.load("../input/feature_table_greengenes_mucus.qza")
feature_table_gg_mucus_1000 = Artifact.load("../input/feature_table_greengenes_mucus_1000.qza")
feature_table_gg_m2_mucus = Artifact.load("../input/feature_table_greengenes_metaxa2_mucus.qza")
feature_table_gg_m2_mucus_1000 = Artifact.load("../input/feature_table_greengenes_metaxa2_mucus_1000.qza")
feature_table_gg_skeleton = Artifact.load("../input/feature_table_greengenes_skeleton.qza")
feature_table_gg_skeleton_1000 = Artifact.load("../input/feature_table_greengenes_skeleton_1000.qza")
feature_table_gg_m2_skeleton = Artifact.load("../input/feature_table_greengenes_metaxa2_skeleton.qza")
feature_table_gg_m2_skeleton_1000 =Artifact.load("../input/feature_table_greengenes_metaxa2_skeleton_1000.qza")
feature_table_gg_tissue = Artifact.load("../input/feature_table_greengenes_tissue.qza")
feature_table_gg_tissue_1000 = Artifact.load("../input/feature_table_greengenes_tissue_1000.qza")
feature_table_gg_m2_tissue = Artifact.load("../input/feature_table_greengenes_metaxa2_tissue.qza")
feature_table_gg_m2_tissue_1000 = Artifact.load("../input/feature_table_greengenes_metaxa2_tissue_1000.qza")
#Output and input directory
output_dir = abspath("../output/")
input_dir = abspath("../input")

In [3]:
#need to define my "items" for the loop
feature_tables_gg = {"mucus":feature_table_gg_mucus, "tissue":feature_table_gg_tissue, "skeleton":feature_table_gg_m2_skeleton}
feature_tables_gg_m2 = {"mucus":feature_table_gg_m2_mucus, "tissue":feature_table_gg_m2_tissue, "skeleton":feature_table_gg_m2_skeleton}
feature_tables_gg_1000 = {"mucus":feature_table_gg_mucus_1000, "tissue":feature_table_gg_tissue_1000, "skeleton":feature_table_gg_skeleton_1000}
feature_tables_gg_m2_1000 = {"mucus":feature_table_gg_m2_mucus_1000, "tissue":feature_table_gg_m2_tissue_1000, "skeleton":feature_table_gg_m2_skeleton_1000}

Load the data
The files needed for this analysis are in the next section.

In [4]:
#feature_table = Artifact.load ("../input/feature_table_greengenes_tissue.qza")
phylo_tree = Artifact.load("../input/insertion-tree_GCMP.qza")
metadata = Metadata.load("../input/GCMP_EMP_map_r28_no_empty_samples.txt")

Calculate Beta-diversity using the generated microbial phylogenetic tree. Pairwise comparisons will be
run to get the metrics

This code appears to work for "sample_type_EMP" but not "reef_name" when pairwise is set to true. If I use reef name I get an error saying that I have some single values which I can't seem to find.

In [10]:
#can run command changing the metric used: 'generalized_unifrac', 'weighted_normalized_unifrac', \
#'unweighted_unifrac', 'weighted_unifrac'
beta_results = beta_phylogenetic(table=feature_table, phylogeny=phylo_tree, metric='weighted_unifrac')
beta_dm = beta_results.distance_matrix
#calulate pairwise comparisons
pairwise_beta_diversity = diversity.actions.beta_group_significance(\
                                                                distance_matrix=beta_dm,\
                                                                metadata=metadata.get_column('reef_name'),\
                                                                method='permanova', pairwise=False)
beta_pairwise_visualization = pairwise_beta_diversity.visualization
output_filename = "beta_phylogenetic_permanova_unifrac_pairwise.qzv"
output_filepath = "../output/beta_phylogenetic_permanova_unifrac_pairwise.qzv"
print("Saving results to {output_filepath}")
beta_pairwise_visualization.save(output_filepath)

Saving results to {output_filepath}


'../output/beta_phylogenetic_permanova_unifrac_pairwise.qzv'

This code will run the beta diversity metrics as a loop

In [8]:
#can run command changing the metric used: 'generalized_unifrac', 'weighted_normalized_unifrac', \
#'unweighted_unifrac', 'weighted_unifrac'
#Also need to take into account different column names from the metadata file to compare
metrics=['weighted_unifrac', 'unweighted_unifrac']
column_names=['reef_name', 'temperature']
#column_name1=metadata.get_column('reef_name')
#column_name2=metadata.get_column('sample_type_EMP')
#column_names = [column_name1, column_name2]
#sample_type_EMP

for compartment, table in feature_tables_gg_m2.items():
    for metric in metrics:
        print(f"Calculating beta diversity for {compartment} using {metric}")
        beta_results = beta_phylogenetic(table=table, phylogeny=phylo_tree, metric=metric)
        beta_dm = beta_results.distance_matrix

        #calculate diversity on multiple columns as a second loop
        #change pairwise to true if want to get pairwise comparison
        #I think this still needs to be indented
        #for compartment, column in beta_results.distance_matrix
        for column in column_names:
            print(f"Calculating beta diversity metrics for {compartment} using {column}")
            pairwise_beta_diversity = diversity.actions.beta_group_significance\
                    (distance_matrix=beta_dm, metadata=metadata.get_column(column),\
                    method='permanova', pairwise=False)
    
            #visualize and save the data
            beta_pairwise_visualization = pairwise_beta_diversity.visualization
            output_filename = f"beta_phylo_permanova_{compartment}_gg_m2_{metric}_{column}.qza"
            output_filepath = join("../output",output_filename)
            print(f"Saveing significant results to {output_filepath}")
            beta_pairwise_visualization.save(output_filepath)


Calculating beta diversity for mucus using weighted_unifrac
Calculating beta diversity metrics for mucus using reef_name
Saveing significant results to ../output/beta_phylo_permanova_mucus_gg_m2_weighted_unifrac_reef_name.qza
Calculating beta diversity metrics for mucus using temperature
Saveing significant results to ../output/beta_phylo_permanova_mucus_gg_m2_weighted_unifrac_temperature.qza
Calculating beta diversity for mucus using unweighted_unifrac
Calculating beta diversity metrics for mucus using reef_name
Saveing significant results to ../output/beta_phylo_permanova_mucus_gg_m2_unweighted_unifrac_reef_name.qza
Calculating beta diversity metrics for mucus using temperature
Saveing significant results to ../output/beta_phylo_permanova_mucus_gg_m2_unweighted_unifrac_temperature.qza
Calculating beta diversity for tissue using weighted_unifrac
Calculating beta diversity metrics for tissue using reef_name
Saveing significant results to ../output/beta_phylo_permanova_tissue_gg_m2_weig