ASSIGN TAXONOMY TO THE FEATURE TABLE

1. Read feature table, metadata and representative seuqences.
2. Filter gut samples relates sequences from the representative seuqences artifact.
3. Asign taxonomy to seuqences.
4. Collapse taxonomy to chosen level.

In [None]:
import pandas as pd
from qiime2 import Artifact
from qiime2 import Metadata

from qiime2.plugins.feature_classifier.methods import classify_sklearn
from qiime2.plugins import feature_classifier
from qiime2.plugins.feature_table.methods import filter_seqs, filter_samples
from qiime2.plugins.taxa.methods import collapse

### read feature table

In [77]:
ftable_file = '/storage/zkarwowska/microbiome-interactions/datasets/processed/qiita_550/raw_data/57456_reference-hit.biom'
ftable = Artifact.import_data('FeatureTable[Frequency]', ftable_file)

### read metadata

In [78]:
metadata_file = '/storage/zkarwowska/microbiome-interactions/datasets/processed/qiita_550/raw_data/550_metadata.txt'
metadata = Metadata.load(metadata_file)

### read representative sequences

In [79]:
seq_file = '/storage/zkarwowska/microbiome-interactions/datasets/processed/qiita_550/raw_data/rep_seqs.qza'
res_seqs = Artifact.load(seq_file)

### filter representative sequences

In [85]:
filtered_ftable = filter_samples(table=ftable,
                                 metadata = metadata,
                                 where="[sample_type]='feces'")

filtered_ftable = filter_samples(table=filtered_ftable.filtered_table,
                                 metadata = metadata,
                                 where="[mislabeled]='FALSE'")
           
filtered_seqs = filter_seqs(data = res_seqs, 
                            table = filtered_ftable.filtered_table)

### assign taxonomy

In [64]:
classifier_path = '/storage/zkarwowska/classfier/'
gg_classifier = Artifact.import_data('TaxonomicClassifier', classifier_path)

In [65]:
taxonomy = feature_classifier.methods.classify_sklearn(reads = filtered_seqs.filtered_data,
                                                       classifier = gg_classifier)

### collapse taxonomy on a chosen level

In [120]:
collapsed_taxonomy = collapse(table=filtered_ftable.filtered_table,
                              taxonomy = taxonomy.classification, 
                              level = 2)

In [121]:
collapsed_taxonomy_df = collapsed_taxonomy.collapsed_table.view(pd.DataFrame)
#collapsed_taxonomy_df.to_csv('/storage/zkarwowska/microbiome-interactions/datasets/processed/taxonomy/550_family.csv')

In [122]:
collapsed_taxonomy_df

Unnamed: 0,k__Bacteria;p__Proteobacteria,k__Bacteria;p__Fusobacteria,k__Bacteria;p__Firmicutes,k__Bacteria;p__Bacteroidetes,k__Bacteria;p__Actinobacteria,k__Bacteria;p__Tenericutes,k__Bacteria;p__Cyanobacteria,k__Bacteria;p__Verrucomicrobia,k__Bacteria;p__Synergistetes,k__Archaea;p__Euryarchaeota,k__Bacteria;__,k__Bacteria;p__Acidobacteria
550.L1S343.s.1.sequence,384.0,0.0,9904.0,15558.0,6.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
550.L2S7.s.2.sequence,312.0,0.0,10198.0,20158.0,0.0,61.0,0.0,16.0,0.0,0.0,0.0,0.0
550.L2S81.s.2.sequence,342.0,0.0,9756.0,17441.0,2.0,181.0,0.0,17.0,0.0,0.0,0.0,0.0
550.L1S273.s.1.sequence,443.0,0.0,8809.0,22864.0,6.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0
550.L1S112.s.1.sequence,600.0,0.0,10112.0,19751.0,4.0,0.0,0.0,37.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
550.L1S10.s.1.sequence,1696.0,0.0,4314.0,18604.0,0.0,0.0,0.0,443.0,0.0,0.0,0.0,0.0
550.L2S52.s.2.sequence,239.0,0.0,6891.0,14551.0,0.0,33.0,0.0,0.0,0.0,0.0,0.0,0.0
550.L1S200.s.1.sequence,128.0,0.0,9820.0,22487.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
550.L1S161.s.1.sequence,73.0,0.0,8089.0,18777.0,0.0,0.0,0.0,98.0,0.0,0.0,0.0,0.0
