In [1]:
!pwd

/Volumes/GoogleDrive/My Drive/PhD/Isolation_Dorm/Dorm_analysis


In [3]:
import pandas as pd
import qiime2
from qiime2 import Artifact
import os
from qiime2.plugins import feature_table, fragment_insertion, feature_classifier, diversity, metadata, emperor, deicode, gneiss, phylogeny, taxa

## Import data/metadata

In [6]:
tbl = Artifact.load('./data/137124-feature-table-KathSeqFil.qza')
seqs = Artifact.load('./data/137124-reference-hit.seqs.qza')
meta = qiime2.Metadata.load('./data/13957_metadata.txt')

## Filter out single/doubletons

In [6]:
tbl.view(pd.DataFrame).shape

(305, 36209)

In [7]:
tbl_freq3 = feature_table.actions.filter_features(tbl, 
                                     min_frequency = 3)
tbl_freq3.filtered_table.save('./data/feature-table_filt_singl.qza')

'./data/feature-table_filt_singl.qza'

#### Filter seqs based on feature table

In [8]:
# This step is to remove the seqs that were a singleton/doubleton
filt_seqs = feature_table.actions.filter_seqs(data = seqs, 
                                  table = tbl_freq3.filtered_table)

In [9]:
filt_seqs.filtered_data.save('./data/reference-hit_singl_filt.seqs.qza')

'./data/reference-hit_singl_filt.seqs.qza'

### Filter not inserted seqs out of the feature-table

In [10]:
# For this, I need to remove features that aren't in the tree so that
# phylogenetic analyses don't get messed up down the line
!qiime tools import --input-path ./data/insertion_tree.relabelled.tre --output-path ./data/rooted-tree.qza --type 'Phylogeny[Rooted]' 

[32mImported ./data/insertion_tree.relabelled.tre as NewickDirectoryFormat to ./data/rooted-tree.qza[0m


In [11]:
# I need to load the qza using the API before I can start. using it.
tree_r = Artifact.load('./data/rooted-tree.qza')

In [12]:
filt_tbl = fragment_insertion.actions.filter_features(table = tbl_freq3.filtered_table, tree = tree_r)

In [13]:
# It looks like nothing was filtered out...all of them aligned to references?
filt_tbl.removed_table.view(pd.DataFrame).head()

13957.Apt.A.kitchen.cabinet.face.1
13957.Apt.A.bathroom.sink.handle.left
13957.Apt.A.bedroom.door.face.inside
13957.Apt.A.kitchen.cabinet.handle.7
13957.Apt.A.kitchen.fridge.floor


### Filter chloroplast and mitochondria

In [14]:
tax = Artifact.load('./data/137124-reference-hit.taxonomy_gg.qza')

In [27]:
filt_tbl.filtered_table.view(pd.DataFrame).shape

(305, 19437)

In [28]:
filt_tbl_nomit_nochlor = taxa.actions.filter_table(table = filt_tbl.filtered_table,
                                                   taxonomy = tax,
                                                   exclude = 'mitochondria,chloroplast')

In [29]:
filt_tbl.filtered_table.view(pd.DataFrame).head()

Unnamed: 0,TACGTAGGGTGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGAGCTCGTAGGTGGTTTGTCGCGTCGTTTGTGTAATACCGCAGCTTAACTGCGGGGTTGCAGGCGATACGGGCATAACTTGAGTGCTGTAGGGGAGACTGGAATTCCTG,TACGTAGGGTGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCACGTCGTCTGTGAAATCCTAGGGCTTAACCCTGGACGTGCAGGCGATACGGGCTGACTTGAGTACTACAGGGGAGACTGGAATTTCTGG,TACGTAGGTGGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGCGCGTAGGCGGTTTTTTAAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGTCATTGGAAACTGGAAAACTTGAGTGCAGAAGAGGAAAGTGGAATTCCATG,TACGAAGGGGGCTAGCGTTGCTCGGAATCACTGGGCGTAAAGGGTGCGTAGGCGGGTCTTTAAGTCAGGGGTGAAATCCTGGAGCTCAACTCCAGAACTGCCTTTGATACTGAGGATCTTGAGTTCGGGAGAGGTGAGTGGAACTGCGAG,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGCGAGCGCAGGCGGTCTTTTAAGTCTGATGTGAAAGCCCCCGGCTTAACCGGGGAGGGTCATTGGAAACTGGGAGACTTGAGTGCAGAAGAGGAAAGCGGAATTCCATG,TACGAAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGGCGAGCTAGAGTACGGTAGAGGGTGGTGGAATTTCCTG,TACGTAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGCAGGCGGTTTTGTAAGACAGTGGTGAAATCCCCGGGCTCAACCTGGGAACTGCCATTGTGACTGCAAGGCTAGAGTGCGGCAGAGGGGGATGGAATTCCGCG,TACGAAGGGGGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGGGCGCGTAGGCGGCGCTTCAAGTCAGATGTGAAAGCCCCGGGCTCAACCTGGGAATAGCATTTGAGACTGGAGTGCTTGAGTTCCGGAGAGGTGGGTGGAATTCCCAG,TACGTAGGGTGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGGGCTCGTAGGTGGTTTGTCGCGTCGTCTGTGAAATTCCGGGGCTTAACTCCGGGCGTGCAGGCGATACGGGCATAACTTGAGTACTGTAGGGGTAACTGGAATTCCTG,TACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTTTAAGTCAGGGGTGAAATCCCGGAGCTCAACTCCGGAACTGCCCTTGAAACTGGGAAGCTAGAATCTTGGAGAGGCGAGTGGAATTCCGAG,...,TACGTAGGTGGCAAGCGTTATCCGGAATTATTGGGCGTAAAGCGCGCGTAGGCGGTAAAATAAGTCTGATGTGAATGCCCCCGGCTCAACCGGGGAGGGTCATTGGAAACTGTTTTACTTGAGTACAGAAGAGGAGAGTGGAATTCCATG,TACGTAGGGTGCAAGCGTTGTCCGGAATTATTGGGCATAAAGAGCTCGTAGGCGGTTGGTCGCGTCGGCTGTGAAAACCCGGAGCTCAACTCCGGGCCTGCAGTCGATACGGGCCGACTTGAGTGTTGCAGGGGAGACTGGAATTCCTGG,TACGTAGGGTCCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGCAGGCGGTTTTGTAAGCCAGATGTGAAATCCCCGGGCTTAACCTGGGAACTGCATTTGGGACTGCAAGGCTCGAGTACGGCAGAGGGAGGTGGAATTCCACG,TACGGAGGGTGCGAGCGTTATCCGGATTCACTGGGTTTAAAGGGTGTGTAGGCGGGTCATTAAGTCAGTGGTGAAATCTCTGAGCTCTACTCAGAAACTGCCATTGATACTATTGATCTTGAATACCGTTGAGGTAGGCGGAATATGTCA,TACGTAGGTGGCAAGCGTTGTCCAGAATTATTGGGCGTAAAGCGCGCGCAGGCGGTTCCTTAAGTCTGATGTGAAAGCCCCCGGCTCAACCGGGGAGGGTCATTGGAAACTGGGGAACTTGAGTGCAGAAGAGGAGAGCGGAATTCCACG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCATGCTAAGTCAGGGGTGAAAGACGGTGGCTCAACCATCGCAGTGCCTTTGATACTGGTGTGCTTGAATGCGGATGAGGTAGGCGGAATATGGCA,TACGTAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGCAGGCGGTTTTGTAAGTCTGACGTGAAATCCCCGGGCTTAACCTGGGAATTGCGTTGGAGACTGCAAGGCTAGAATCTGGCAGAGGGGGGTAGAATTCCACG,TACGTAGGGGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGCCTATTGAGTCTGATGTGAAAGCCCACGGCTCAACCGTGGAGGGCCATTGGAAACTGGTAGGCTTGAGTGCAGGAGTGGAGAGCGGAATTCCCGG,TACGTAGGGTGCAAGCGTTGTCCGGAATTATTGGGCATAAAGAGCTCGTAGGCGGTTGGTCGCGTCGGCTGTGAAAACCCGGAGCTCAACTCCGGGCCTGCAGTCGATACGGGCCGACTTGAGTGTTGCAGGGGAGACTGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCATGCTAAGTCAGGGGTGAAAGACGGTGGCTCAACCATCGCAGTGCCTTTGATACTGGTGTGCTTGAATGCGGATGAGGTAGGCGGAATGTGGCA
13957.Apt.A.kitchen.cabinet.face.1,0.0,0.0,83.0,0.0,0.0,0.0,8.0,0.0,12.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.bathroom.sink.handle.left,0.0,54.0,1844.0,19.0,0.0,0.0,2.0,0.0,1336.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.bedroom.door.face.inside,0.0,79.0,1338.0,18.0,0.0,0.0,0.0,0.0,928.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.kitchen.cabinet.handle.7,0.0,100.0,671.0,28.0,0.0,24.0,0.0,0.0,600.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.kitchen.fridge.floor,0.0,61.0,7824.0,4.0,0.0,0.0,12.0,0.0,15367.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
filt_tbl_nomit_nochlor.filtered_table.view(pd.DataFrame).head()

Unnamed: 0,TACGTAGGGCGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGCTTGTAGGTGGCTTGTCGCGTCTGCCGTGAAAACCCGAGGCTCAACCTCGGGCGTGCGGTGGGTACGGGCAGGCTAGAGTGTGGTAGGGGAGACTGGAACTCCTGG,TACAGAGGGGGCAAGCGTTATTCGGAATTATTGGGCGTAAAGGGCGCGTAGGCGGCTTTGCAAGTGACGGGTGAAATCCCTCGGCTTAACCGAGGAACTGCCTGTCAGACTGTAAGGCTTGAGACCGGGAGAGGTGAGTGGAATTCCCAG,TACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATCCAAAACTGGCAAGCTAGAGTACGGTAGAGGGTGGTGGAATTTCCTG,TACGAAGGGGGCTAGCGTTGTTCGGATTTACTGGGCGTAAAGCGCACGTAGGCGGATTGTTAAGTTAGGGGTGAAATCCCAGGGCTCAACCCTGGAACTGCCTCTAATACTGGCAATCTTGAGTTCGAGAGAGGTGAGTGGAATTCCGAG,TACGTAGGGTGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTTGTCGCGTCGGGAGTGAAAACTCAGGGCTTAACCCTGAGCCTGCTTCCGATACGGGCAGACTAGAGGTATGCAGGGGAGAACGGAATTCCTGG,TACGGAGGGTGCAAGCGTTGTCCGGAATCATTGGGCGTAAAGAGTTCGTAGGTGGTTTGTTAAGTTTGGTGTTAAATGCAGAGGCTCAACTTCTGTTCGGCATCTGATACTGGCAGACTAGAATGCGGTAGAGGTAAAGGGAATTCCTGG,TACGAAGGGGGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGATTGTTAAGTTAGGGGTGAAATCCCAGGGCTCAACCCTGGAACTGCCTTTAATACTGGCAATCTAGAGGCCGAGAGAGGTGAGTGGAATTCCGAG,TACGTAGGGAGCAAGCGTTATCCGGATTTATTGGGTGTAAAGGGTGCGTAGACGGGAAGTCAAGTTAGTTGTGAAATCCCTCGGCTCAACTGAGGAACTGCAACTAAAACTAACTTTCTTGAGTGCTGGAGAGGAAAGTGGAATTCCTAG,TACGGAGGGTGCAAGCGTTATCCGGATTCACTGGGTTTAAAGGGTGCGTAGGCGGGTGTGTAAGTCCGTGGTGAAATCTCCGAGCTTAACTCGGAAACTGCCGTGGGTACTGCATGTCTTGAATGTTGTGGAGGTGAGCGGAATATGTCA,TACAGAGAGTGCGAGCGTTAATCAGAATTACTGGGCGTAAAGCGCATGTAGGTGGATAACTAAGTCGAATGTGAAAGCCCCGGGCTTAACTTGGGAATTGCATCCGATACTGGTTGTCTAGAGTATGGTAGAGGGAAGTGGAATTTCCGG,...,TACGTAGGGTGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGCTTGTCGCGTCTGCTGTGAAAACCAGAGGCTCAACCTCTGGCCTACAGTGGGTACGGGCAAGCTAGAGTGCGGTAGGGGAGATTGGAATTCCTGG,TACGTAGGGAGCAAGCGTTGTCCGGAATCATTGGGCGTAAAGCGCGCGTAGGTGGCCATTTAAGTCCGCTGTGAAAGTCAAAGGCTCAACCTTTGAAAGCCGGTGGATACTGGATGGCTAGAGTACGGAAGAGGCGAGTGGAATTCCTGG,TACGAAGGGGGCTAGCGTTGCTCGGAATGACTGGGCGTAAAGGGCGCGTAGGCGGTTCGGACAGTCAGATGTGAAATTCCTGGGCTTAACCTGGGGGCTGCATTTGATACGTCCGGGCTTGAGTGTGGAAGAGGGTTGTGGAATTCCCAG,TACGGAGGGTGCAAGCGTTATCCGGATTCACTGGGTTTAAAGGGTGCGTAGGTGGGTTTGTAAGTCAGTGGTGAAATCTCCGTGCTTAACATGGAAACTGCCATTGATACTACAGGTCTTGAATTATCTGGAGGTCAGCGGAATATGTCA,TACGTAGGGGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGTTAATTAAGTCAGATGTGAAAGGCTACGGCTCAACCGTAGAGTTGCATTTGAAACTGGTTGACTTGAGTGCAGGAGAGGTAAGTGGAATTCCCGG,TACGAAGGGGGCTAGCGTTGCTCGGATTTACTGGGCGTAAAGGGCGCGTAGGCGGATGACCAAGTTGGGGGTGAAAGCCCGGGGCTCAACCTCGGAATTGCCTTCAAAACTGGTTGTCTTGAGTATGGGAGAGGTGTGTGGAACTCCGAG,TACGTAGGGTGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGAGCTCGTAGGTGGTTTGTCGCGTCGTCTGTGAAATTCCGGGGCTTAACTCCGGGCGTGCAGGCGATACGGGCAATACTTGAGTGATGTAGGGGTAACTGGAATTCCTG,TACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTGGCTGTCTAAGTCAGTGGTGAAATATCTCAGCTTAACTGAGAGGGTGCCATTGATACTGGATAGCTTGAGTACAGATGAGGTAGGCGGAATTGACGG,TACGTAGGCAGCAAGCGTTGTTCGGAATTACTGGGCGTAAAGAGTGTGTAGGCGGTTTTCCAAGTTTGGTGTGAAATCTCCCGGCTTAACTGGGAGGGTGCGCCGAAAACTGGGAGGCTTGAGTGCCGGAGAGGATAGCGGAATTCCCGG,TACGTAGGGTGCAGGCGTTAATCGGAATTACTGGGCGTAAAGCGTGCGCAGGCGGCTCCTTAAGTCAGATGTGAAATCCCCGGGCTTAACCTGGGAACTGCGTTTGAAACTGAGGAGCTCGAGTGTGGAAGAGGGGGGTGGAATTCCAGG
13957.Apt.A.kitchen.cabinet.face.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.bathroom.sink.handle.left,0.0,0.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.bedroom.door.face.inside,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.kitchen.cabinet.handle.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.kitchen.fridge.floor,4.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [54]:
filt_tbl_nomit_nochlor.filtered_table.save('./data/preprocessed-feature-table.qza')

'./data/preprocessed-feature-table.qza'

In [32]:
filt_tbl_nomit_nochlor = Artifact.load('./data/preprocessed-feature-table.qza')

### Create a feature table without the pos/neg controls

In [17]:
meta.to_dataframe()
#empo_1 is Control for all controls

Unnamed: 0_level_0,altitude,anonymized_name,average_cq,collection_timestamp,decision_tree_result,description,dna_extracted,elevation,empo_1,empo_2,...,sample_type,scientific_name,sep_id,sex,source,surface_type,taxon_id,title,tube_id,tube_id_study_no
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
13957.Apt.A.bathroom.door.face.inside,not applicable,bathroom.door.face.inside,,2021-07-14 00:00:00,Not Detected,bathroom door,True,193.0,Free-living,Non-saline,...,surface,indoor metagenome,EXC_SEP_005161,not applicable,Environmental,sealed_wood,1256227.0,COVID Isolation Dorm,359138745.0,13957.359139
13957.Apt.A.bathroom.door.face.outside,not applicable,bathroom.door.face.outside,,2021-07-14 00:00:00,Not Detected,front face of used bathroom door,True,193.0,Free-living,Non-saline,...,surface,indoor metagenome,EXC_SEP_005160,not applicable,Environmental,sealed_wood,1256227.0,COVID Isolation Dorm,359138523.0,13957.359139
13957.Apt.A.bathroom.door.knob.inside,not applicable,bathroom.door.knob.inside,34.157,2021-07-14 00:00:00,Inconclusive,bathroom door,True,193.0,Free-living,Non-saline,...,surface,indoor metagenome,EXC_SEP_005160,not applicable,Environmental,metal,1256227.0,COVID Isolation Dorm,359138729.0,13957.359139
13957.Apt.A.bathroom.door.knob.outside,not applicable,bathroom.door.knob.outside,,2021-07-14 00:00:00,Not Detected,bathroom door,True,193.0,Free-living,Non-saline,...,surface,indoor metagenome,EXC_SEP_005161,not applicable,Environmental,metal,1256227.0,COVID Isolation Dorm,359138728.0,13957.359139
13957.Apt.A.bathroom.floor.middle,not applicable,bathroom.floor.middle,31.8843333333333,2021-07-14 00:00:00,Detected,floor in the middled of the bathroom,True,193.0,Free-living,Non-saline,...,surface,indoor metagenome,EXC_SEP_005160,not applicable,Environmental,vinyl,1256227.0,COVID Isolation Dorm,359138516.0,13957.359139
13957.Apt.A.bathroom.mirror,not applicable,bathroom.mirror,,2021-07-14 00:00:00,Not Detected,mirror above the bathroom sinks,True,193.0,Free-living,Non-saline,...,surface,indoor metagenome,EXC_SEP_005161,not applicable,Environmental,glass,1256227.0,COVID Isolation Dorm,359138767.0,13957.359139
13957.Apt.A.bathroom.shower.floor,not applicable,bathroom.shower.floor,,2021-07-14 00:00:00,Not Detected,shower floor,True,193.0,Free-living,Non-saline,...,surface,indoor metagenome,EXC_SEP_005160,not applicable,Environmental,fiberglass,1256227.0,COVID Isolation Dorm,359138563.0,13957.359139
13957.Apt.A.bathroom.shower.knobs,not applicable,bathroom.shower.knobs,,2021-07-14 00:00:00,Not Detected,shower knobs,True,193.0,Free-living,Non-saline,...,surface,indoor metagenome,EXC_SEP_005161,not applicable,Environmental,metal,1256227.0,COVID Isolation Dorm,359138741.0,13957.359139
13957.Apt.A.bathroom.shower.wall,not applicable,bathroom.shower.wall,,2021-07-14 00:00:00,Not Detected,walls of the shower,True,193.0,Free-living,Non-saline,...,surface,indoor metagenome,EXC_SEP_005161,not applicable,Environmental,fiberglass,1256227.0,COVID Isolation Dorm,359138719.0,13957.359139
13957.Apt.A.bathroom.sink.bowl.left,not applicable,bathroom.sink.bowl.left,,2021-07-14 00:00:00,Not Detected,left bathroom sink bowl,True,193.0,Free-living,Non-saline,...,surface,indoor metagenome,EXC_SEP_005160,not applicable,Environmental,porcelain,1256227.0,COVID Isolation Dorm,359138712.0,13957.359139


In [33]:
filt_tbl_nomit_chlor_ctrl = feature_table.actions.filter_samples(table = filt_tbl_nomit_nochlor, 
                                     metadata = meta,
                                     where = "[empo_1]!='Control'")

In [34]:
filt_tbl_nomit_chlor_ctrl.filtered_table.save('./data/filt_tbl_nomit_chlor_ctrl.qza')
filt_tbl_nomit_chlor_ctrl.filtered_table.view(pd.DataFrame).head()

Unnamed: 0,TACGTAGGTGGCGAGCGTTATCCGGATTTACTGGGCGTAAAGGGCGTGTAGGCGGCTAGATAAGTGTGATGTTTAAATCCAAGGCTTAACCTTGGGGTTCATTACAAACTGTTTAGCTTGAGTGCTGGAGAGGATAGTGGAATTCCTAGT,TACGGAGGGTGCGAGCGTTGTCCGGATTTATTGGGTTTAAAGGGTGCGTAGGTGGCTTTATAAGTCAGTGGTGAAATACAGCCGCTCAACGGTTGAGGTGCCATTGATACTGTAGAGCTTGAAATAATTGGAGGCTGCCGGAATGGATGG,TACGTAGGGGGCAAACGTTGTCCGGATTTACTGGGTGTAAAGGGTGCTCAGGCGGTTTTGTAAGTCAGAAGTGAAATCCTAGGGCTTAACTCTGGAACTGCTTTTGATACTGCAAGGCTTGAATGTGGAAGAGGAGGATGGAATTTCTGG,TACAGAGGTGGCGAGCGTTGTTCGGATTTACTGGGCGTAAAGGGTGCGTAGGCGGTTTAGTAAGTCGGATGTGAAAGCCCAGGGCTCAACCCTGGAACTGCATTCGATACTGCTGAACTAGAGTACAGGAAGGGAGAGGGGAATTCTTGG,TACGGAGGGTGCGAGCGTTATCCGGAATCACTGGGCGTAAAGGGCGCGTAGGCGGTTCAGTAAGTCTGATGTTAAAGAGCGGGGCTCAACCCCGTCACGGCGTTGGATACTGCTGGGCTTGACGACTGGAGAGGTGAGTGGAATTACCAG,TACGTAGGGTGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCGGGAGTGAAAACCTACAGCTTAACTGTGGGCTTGCTTTCGATACGGGCAGACTTGAGGCATGCAGGGGAGAACGGAATTCCTGG,TACGTATGGGGCGAGCGTTGTCCGGAGTTATTGGGCGTAAAGGGTACGTAGGCGGTTTTTTAAGTCAGGTGTCAAAGCGTGGAGCTTAACTCCATTAAGCACTTGAAACTGAAAGACTTGAGTGAAGGAGAGGAAAGTGGAATTCCTAGT,TACGGAGGATGCAAGCGTTATCCGGAATGATTGGGCGTAAAGGGTCTGCAGGTGGCAATGTAAGTCTGCTGTTAAAGAATGAGGCTCAACCTCATACCAGCAGTGGAAACTACATAGCTAGAGTGCGTTCGGGGTAGAGGGAATTCCTGG,GACGGAGGGTGCAAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCGCGCAGGCGGTCTTCTAAGTCCTTTGTGAAATCCCGGAGCTCAACTCCGGTTGTGCAGAGGATACTGGAAGACTAGAGACCAGTAGAGGCTAGCGGAATTCCTGG,TACGTAGGGGGCGAGCGTTATCCGGATTCATTGGGCGTAAAGCGCTCGTAGGCGGCCCGTCTGGTCGGGAGTCAAAGCCCGGGGCTCAACCCCGGCCCGCTCCCGATACCGGCGGGCTTGAGTCGCGCAGGGGAGGCCGGAATTCCGGGG,...,TACGGAGGATGCAAGCGTTATCCGGATTTATTGGGTTTAAAGGGTACGTAGGTGGCCTGATAAGTCAGTGGTGAAAACCTGTCGCTTAACGATAGGCGTGCCATTGATACTGTTGGGCTTGAGTACAGATGAGGTAAGCGGAATGTGTAG,TACGTAGGGTGCGAGCGTTGTCCGGAATTACTGGGCGTAAAGAGCTCGTAGGTGGTTTGTCGCGTTGTCCGTGAAAACTCACAACTCAATTGTGGGCGTGCGGGCGATACGGGCAGACTGGAGTACTGCAGGGGAGACTGGAATTCCTGG,TACGTAGGGGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGTTTCTTAAGTCTTGTGTTTAATCCCGGGGCTCAACCTCGGTTCGCATGGGAAACTGGGAAACTGGAGTGCAGGAGAGGAAAGTGGAATTCCACGT,TACGTAGGGGGCAAGCGTTATCCGGAATCATTGGGCGTAAAGCGCGTGTAGGCGGTCTCGTAAGTCCGCTCTGAAAGCCCAAGGCTCAACCTTGGGAGGCGGGTGGATACTACGAGACTGGAGTACGGAAGAGGAGATTGGAATTCCTGG,TACGGAGGATCCGAGCGTTATCCGGAATTATTGGGTTTAAAGGGTGCGTAGGCGGCGTGCTAAGTCAGGGGTGAAAGACGGTGGCTCAACCATCGCAGTGCCTTTGATACTGGCATGCTTGAATGTACTTGAGGTAGGCGGAATGTGGCA,TACGTAGGGGTCGAGCGTTGTCCGGAGTTACTGGGCGTAAAGCGTGCGCAGGCGGCTCATTACGCCCGGCGTGAAAGCCCCCGGCTCAACCGGGGAGGGTCGTCGGGTACGGGTGAGCTTGAGGGTATCAGGGGTTGGTGGAATTCCCGG,TACGGAGGGGGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGATCGGACAGTCAGAGGTGAAATCCCAGGGCTCAACCTTGGAACTGCCTTTGAAACTACTGGTCTGGAGTTCGAGAGAGGTGAGTGGAATTCCGAG,TACGTAGGGTGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTGGTCGCGTCGGCTGTGAAAACCCGGAGCTCAACTCCGGGCCTACAGTCGATACGGGCCGACTTGAGTGTTGCAGGGGAGACTGGAATTCCTGG,TACGTAGGGTCCGAGCGTTGTCCGGAATTATTGGGCGTAAAGGGCTCGTAGGCGGTTTGTTGCGTCGGGAGTGAAAACCCAGGGCTTAACCCTGGGCCTGCTTTCGATACGGGCAGACTAGAGGCATTCAGGGGAGAACGGAATTCCTGG,TACAGAGGGTGCGAACGTTGCTCGGAATTACTGGGCGTAAAGCGCATGTAGGCGGGTCGGCAAGTCAGATGTGAAATCCCCGGGCTCAACCCGGGAACTGCATCTGAAACTGCTGGTCTTGAGTACTGGAGAGGGTGGCGGAATTCCTGG
13957.Apt.A.kitchen.cabinet.face.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.bathroom.sink.handle.left,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.bedroom.door.face.inside,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.kitchen.cabinet.handle.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13957.Apt.A.kitchen.fridge.floor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0


In [2]:
feat_cts = filt_tbl_nomit_chlor_ctrl.filtered_table.view(pd.DataFrame).sum(axis=1)

NameError: name 'filt_tbl_nomit_chlor_ctrl' is not defined

In [1]:
feat_cts.sort_values()

NameError: name 'feat_cts' is not defined

In [107]:
alpha_rare_no_phylo = diversity.actions.alpha_rarefaction(table = filt_tbl_nomit_chlor_ctrl.filtered_table,
                                                          max_depth = 20000,
                                                          metadata = meta
                                                          steps = 20)
# based on shannon and observed that this outputs, 4000 looks like a good depth

In [11]:
alpha_rare_no_phylo = diversity.actions.alpha_rarefaction(table = tbl_filt_ur.filtered_table,
                                                          max_depth = 20000,
                                                          metadata = meta,
                                                          steps = 20)
alpha_rare_no_phylo.visualization.save('./data/alpha_rare_no_phylo.qzv')

'./data/alpha_rare_no_phylo.qzv'

In [21]:
# Filtering tree in an updated QIIME envirnoment so that it doesn't take forever to run
# filtered tree is from the filter_tree_new_qiime.ipynb
filt_tree = Artifact.load('./data/filt_tree.qza')

In [23]:
alpha_rare_phylo = diversity.actions.alpha_rarefaction(table = filt_tbl_nomit_chlor_ctrl.filtered_table,
                                                        metadata = meta,
                                                        max_depth = 20000,
                                                        phylogeny = filt_tree,
                                                        steps = 20)

In [118]:
alpha_rare_no_phylo.visualization.save('./data/alpha_rare_no_phylo.qzv')


'./data/alpha_rare_no_phylo.qzv'

In [24]:
alpha_rare_phylo.visualization.save('./data/alpha_rare_phylo.qzv')

'./data/alpha_rare_phylo.qzv'

In [138]:
rarefy_result = feature_table.methods.rarefy(table = filt_tbl_nomit_chlor_ctrl.filtered_table, sampling_depth=4000)
rarefied_table = rarefy_result.rarefied_table

I commented this next cell out because I need to make sure I used the same rarefied table all the time

In [141]:
#rarefied_table.save('./data/preprocessed-feature-table-4k.qza')

'./data/preprocessed-feature-table-4k.qza'

In [4]:
tbl_all = Artifact.load('./data/preprocessed-feature-table-4k.qza')
tbl_all_ur = Artifact.load('./data/filt_tbl_nomit_chlor_ctrl.qza')

In [7]:
tbl_filt = feature_table.actions.filter_samples(table = tbl_all, 
                                     metadata = meta,
                                     where = "[indoor_space_classifier]!='dont_use'")
tbl_filt_ur = feature_table.actions.filter_samples(table = tbl_all_ur, 
                                     metadata = meta,
                                     where = "[indoor_space_classifier]!='dont_use'")
tbl_filt = feature_table.actions.filter_samples(table = tbl_filt.filtered_table, 
                                     metadata = meta,
                                     where = "[sample_name]!='13957.Apt.A.kitchen.fridge.face'")
tbl_filt_ur = feature_table.actions.filter_samples(table = tbl_filt_ur.filtered_table, 
                                     metadata = meta,
                                     where = "[sample_name]!='13957.Apt.A.kitchen.fridge.face'")


In [8]:
tbl_filt.filtered_table.view(pd.DataFrame).shape


(224, 10776)

In [113]:
tbl_filt_ur.filtered_table.view(pd.DataFrame).shape

(260, 13796)

In [9]:
# Here I see that the unrarefied and rarefied table have discrepancies (unrarefied table doesn't remove samples below
# the rarefaction limit of 4k. I can't use just the feature counts from the initial qza so I'm gonna manually remove them
not_rare = tbl_filt_ur.filtered_table.view(pd.DataFrame)
rare = tbl_filt.filtered_table.view(pd.DataFrame)
diff = not_rare.index.difference(rare.index)
diff

Index(['13957.Apt.A.bedroom.ceiling.fan',
       '13957.Apt.A.bedroom.closet.door.face',
       '13957.Apt.A.bedroom.middle.floor',
       '13957.Apt.A.kitchen.cabinet.face.7',
       '13957.Apt.A.kitchen.freezer.face',
       '13957.Apt.A.kitchen.island.stool.seat',
       '13957.Apt.A.kitchen.microwave.interface',
       '13957.Apt.A.kitchen.stove.vent.switch',
       '13957.Apt.A.living.wall.windows',
       '13957.Apt.A.living.window.pane.right',
       '13957.Apt.B.bathroom.shower.knobs', '13957.Apt.B.bedroom.bed.floor',
       '13957.Apt.B.bedroom.ceiling.fan', '13957.Apt.B.bedroom.desk.top',
       '13957.Apt.B.bedroom.door.knob.inside',
       '13957.Apt.B.bedroom.door.knob.outside', '13957.Apt.B.bedroom.outlet',
       '13957.Apt.B.entrance.door.knob.inside',
       '13957.Apt.B.entrance.door.threshold.inside',
       '13957.Apt.B.entrance.door.threshold.outside',
       '13957.Apt.B.kitchen.cabinet.handle.4',
       '13957.Apt.B.kitchen.cabinet.handle.6',
       '13957.Apt.B.

In [17]:
metadf=meta.to_dataframe()
meta_failed_rare = metadf.loc[diff]
meta_failed_rare.value_counts('decision_detected_inconclusive')

decision_detected_inconclusive
Not Detected    23
Detected        13
dtype: int64

In [121]:
for sample in diff:
    print(tbl_filt_ur.filtered_table.view(pd.DataFrame).sum(axis=1).loc['%s' % sample])

3093.0
3799.0
2987.0
3063.0
3149.0
3905.0
2763.0
3205.0
3277.0
3059.0
3427.0
3465.0
3466.0
2550.0
2748.0
3847.0
3179.0
2858.0
2816.0
3724.0
117.0
270.0
646.0
3663.0
2828.0
3004.0
3322.0
3966.0
3056.0
3390.0
2909.0
2684.0
3916.0
3607.0
3618.0
2965.0


In [116]:
#okay because I know this will confuse me in the future, here I use the list of different samples between the un/rarefied
#tables and filter out the samples one by one. I now realize tho.... I could have just opened the qza of the unrarefied
#samples and copied that feature count number. whatever, this is keeping me sharp
i = 0
tbl_filt_ur2 = {}
tbl_filt_ur2[0] = tbl_filt_ur
for sample in diff:
    i += 1
    tbl_filt_ur2[i] = feature_table.actions.filter_samples(table = tbl_filt_ur2[i-1].filtered_table, 
                                                         metadata = meta,
                                                         where = "[sample_name]!='%s'" % sample)
print(i)

36


In [118]:
not_rare_check = tbl_filt_ur2[36].filtered_table.view(pd.DataFrame)
rare_check = tbl_filt.filtered_table.view(pd.DataFrame)
not_rare_check.index.difference(rare_check.index)

Index([], dtype='object')

In [123]:
tbl_filt.filtered_table.save('./data/preprocessed-feature-table-4k-only-apt.qza')
tbl_filt_ur2[36].filtered_table.save('./data/preprocessed-feature-table-unrare-only-apt.qza')

'./data/preprocessed-feature-table-unrare-only-apt.qza'

I need to create tables filtered down to the host_subject_id level (Apt A vs B vs C). I also need to filter out samples not associated with a room (researcher's shoes/laptop/etc) 

In [124]:
tbl_a = feature_table.actions.filter_samples(table = tbl_filt.filtered_table, 
                                     metadata = meta,
                                     where = "[host_subject_id]='Apt.A'")
tbl_b = feature_table.actions.filter_samples(table = tbl_filt.filtered_table, 
                                     metadata = meta,
                                     where = "[host_subject_id]='Apt.B'")
tbl_c = feature_table.actions.filter_samples(table = tbl_filt.filtered_table, 
                                     metadata = meta,
                                     where = "[host_subject_id]='Apt.C'")

In [125]:
tbl_a.filtered_table.save('./data/a-apt-preprocessed-4k.qza')
tbl_b.filtered_table.save('./data/b-apt-preprocessed-4k.qza')
tbl_c.filtered_table.save('./data/c-apt-preprocessed-4k.qza')

'./data/c-apt-preprocessed-4k.qza'

In [126]:
tbl_kit = feature_table.actions.filter_samples(table = tbl_filt.filtered_table, 
                                     metadata = meta,
                                     where = "[indoor_space_classifier]='kitchen'")
tbl_bed = feature_table.actions.filter_samples(table = tbl_filt.filtered_table, 
                                     metadata = meta,
                                     where = "[indoor_space_classifier]='bedroom'")
tbl_bath = feature_table.actions.filter_samples(table = tbl_filt.filtered_table, 
                                     metadata = meta,
                                     where = "[indoor_space_classifier]='bathroom'")
tbl_liv = feature_table.actions.filter_samples(table = tbl_filt.filtered_table, 
                                     metadata = meta,
                                     where = "[indoor_space_classifier]='living_room'")

In [127]:
tbl_kit.filtered_table.save('./data/kitchen-preprocessed-4k.qza')
tbl_bed.filtered_table.save('./data/bedroom-preprocessed-4k.qza')
tbl_bath.filtered_table.save('./data/bathroom-preprocessed-4k.qza')
tbl_liv.filtered_table.save('./data/living-preprocessed-4k.qza')

'./data/living-preprocessed-4k.qza'

In [131]:
tbl_a_ur = feature_table.actions.filter_samples(table = tbl_filt_ur2[36].filtered_table, 
                                     metadata = meta,
                                     where = "[host_subject_id]='Apt.A'")
tbl_b_ur = feature_table.actions.filter_samples(table = tbl_filt_ur2[36].filtered_table, 
                                     metadata = meta,
                                     where = "[host_subject_id]='Apt.B'")
tbl_c_ur = feature_table.actions.filter_samples(table = tbl_filt_ur2[36].filtered_table, 
                                     metadata = meta,
                                     where = "[host_subject_id]='Apt.C'")

In [132]:
tbl_a_ur.filtered_table.save('./data/a-apt-preprocessed-unrare.qza')
tbl_b_ur.filtered_table.save('./data/b-apt-preprocessed-unrare.qza')
tbl_c_ur.filtered_table.save('./data/c-apt-preprocessed-unrare.qza')

'./data/c-apt-preprocessed-unrare.qza'

In [133]:
tbl_kit_ur = feature_table.actions.filter_samples(table = tbl_filt_ur2[36].filtered_table, 
                                     metadata = meta,
                                     where = "[indoor_space_classifier]='kitchen'")
tbl_bed_ur = feature_table.actions.filter_samples(table = tbl_filt_ur2[36].filtered_table, 
                                     metadata = meta,
                                     where = "[indoor_space_classifier]='bedroom'")
tbl_bath_ur = feature_table.actions.filter_samples(table = tbl_filt_ur2[36].filtered_table, 
                                     metadata = meta,
                                     where = "[indoor_space_classifier]='bathroom'")
tbl_liv_ur = feature_table.actions.filter_samples(table = tbl_filt_ur2[36].filtered_table, 
                                     metadata = meta,
                                     where = "[indoor_space_classifier]='living_room'")

In [134]:
tbl_kit_ur.filtered_table.save('./data/kitchen-preprocessed-unrare.qza')
tbl_bed_ur.filtered_table.save('./data/bedroom-preprocessed-unrare.qza')
tbl_bath_ur.filtered_table.save('./data/bathroom-preprocessed-unrare.qza')
tbl_liv_ur.filtered_table.save('./data/living-preprocessed-unrare.qza')

'./data/living-preprocessed-unrare.qza'