In [1]:
import os
import pandas as pd
from qiime2 import Artifact, Metadata, Visualization

from qiime2.plugins import (
    metadata, 
    feature_table, 
    diversity,
    emperor
)

%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
workdir = "/storage/TomaszLab/vbez/sinus-microbiota/picrust-analysis"

In [9]:
%cd $workdir

/storage/TomaszLab/vbez/sinus-microbiota/picrust-analysis


In [3]:
os.makedirs("visualizations", exist_ok=True)
os.makedirs("results", exist_ok=True)
os.makedirs("intermediate_artifacts", exist_ok=True)

In [4]:
feature_dir = "./picrust_artifacts/"
vis_dir = "./visualizations/"
res_dir = "./results/"

In [18]:
# converting PICRUSt2 output to a suitable format 
for _, _, filenames in os.walk(feature_dir):
    for filename in filenames:
        if filename.endswith(".tsv"):
            name, _ = filename.split(".")
            os.system(f"biom convert -i {feature_dir + filename} -o {feature_dir + name}.biom --to-hdf5")

In [6]:
# Replaced "not collected" & "not applicable" with NA 
sample_metadata = Metadata.load('metadata_types_corrected.tsv')  
metric = "braycurtis"

# Selecting columns of interest from metadata
cat_cols = ["host_body_site", "maxillary_ostium_size"]

In [None]:
for _, _, filenames in os.walk(feature_dir):
    for filename in filenames:
        if filename.endswith(".biom"):
            picrust_out_type = filename.split("_")[0]
            os.makedirs(os.path.join(res_dir, picrust_out_type), exist_ok=True)
            
            feat_table = Artifact.import_data("FeatureTable[Frequency]", feature_dir + filename)

            # Creating PCoA Emperor plots for every feature table and every metric
            dist_matrix = diversity.pipelines.beta(feat_table, metric=metric)
            dist_matrix.distance_matrix.save(intermediate_dir + "_".join([picrust_out_type, metric, "dis_matrix"]) + ".qza")

            pcoa_results = diversity.methods.pcoa(dist_matrix.distance_matrix)
            pcoa_results.pcoa.save(intermediate_dir + "_".join([picrust_out_type, metric, "pcoa"]) + ".qza")

            pcoa_emperor = emperor.visualizers.plot(pcoa_results.pcoa, metadata=sample_metadata)
            pcoa_emperor.visualization.save(vis_dir + "_".join([picrust_out_type, metric, "pcoa_emperor"]) + ".qzv")
               
            # Testing PERMANOVA
            permanova = diversity.visualizers.beta_group_significance(dist_matrix.distance_matrix, 
                                                                      metadata=sample_metadata.get_column(cat),
                                                                      method="permanova", 
                                                                      pairwise=True, 
                                                                      permutations=9999)

            savepath = os.path.join(res_dir, picrust_out_type, "_".join([metric, "permanova", cat]) + ".qzv")
            permanova.visualization.save(savepath)