In [1]:
import qiime2 as q2
import pandas as pd
import numpy as np
from skbio import OrdinationResults
from RDA.step_wise_anova import run_stepwise_anova
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
ord_ = q2.Artifact.load('./analysis/bdiv/deicode/deicode_ordination.qza').view(OrdinationResults).samples
meta = q2.Metadata.load('./data/13957_metadata.txt').to_dataframe()

In [6]:
ord_.head() # if doing this for other ordinations will need to manually subset to first three
# principal components

Unnamed: 0,0,1,2
13957.Apt.A.kitchen.cabinet.face.1,-0.018515,0.08785,-0.071923
13957.Apt.A.bathroom.sink.handle.left,-0.025551,0.067995,0.009486
13957.Apt.A.bedroom.door.face.inside,-0.017336,-0.014227,-0.002685
13957.Apt.A.kitchen.cabinet.handle.7,-0.062113,-0.016133,-0.009407
13957.Apt.A.kitchen.fridge.floor,-0.040126,0.040872,0.103806


In [8]:
meta.columns.sort_values()
# apt_space_classifier
# decision_detected_inconclusive
# indoor_space_classifier
# host_subject_id
# surface_type
# orientation
#['apt_space_classifier', 'decision_detected_inconclusive', 'indoor_space_classifier', 'host_subject_id', 'surface_type', 'orientation']

Index(['altitude', 'anonymized_name', 'apt_space', 'apt_space_classifier',
       'average_cq', 'collection_timestamp', 'decision_detected_inconclusive',
       'decision_tree_result', 'description', 'dna_extracted', 'elevation',
       'empo_1', 'empo_2', 'empo_3', 'env_biome', 'env_feature',
       'env_material', 'env_package', 'final_result', 'geo_loc_name',
       'host_age', 'host_age_units', 'host_body_habitat',
       'host_body_mass_index', 'host_body_product', 'host_body_site',
       'host_common_name', 'host_height', 'host_height_units',
       'host_life_stage', 'host_scientific_name', 'host_subject_id',
       'host_taxid', 'host_weight', 'host_weight_units', 'indoor_space',
       'indoor_space_classifier', 'latitude', 'longitude', 'ms2_ct_value',
       'n_ct_value', 'notes', 'orf1ab_ct_value', 'orientation',
       'physical_specimen_location', 'physical_specimen_remaining',
       'qiita_study_id', 'rep_id', 'rp_ct_value', 's_ct_value', 'sample_code',
       'sample_t

In [19]:
md_cols_used = ['apt_space_classifier', 'decision_detected_inconclusive', 'indoor_space_classifier', 'host_subject_id']
#this matches the metadata and ordination files 
orddf = ord_.copy()
meta_rda = meta.copy().dropna(subset=md_cols_used)

ind_ = list(set(orddf.index) & set(meta_rda.index))
orddf = orddf.reindex(ind_)
meta_rda = meta_rda.reindex(ind_)

In [15]:
meta_rda.apt_space_classifier.value_counts()

Apt.C.kitchen        45
Apt.A.kitchen        34
Apt.C.bathroom       31
Apt.A.bathroom       31
Apt.A.bedroom        23
Apt.C.bedroom        21
Apt.A.living_room    16
Apt.C.dont_use       14
Apt.B.bathroom       13
Apt.B.kitchen        12
Apt.C.living_room    12
Apt.B.bedroom        11
Apt.B.living_room     8
Apt.A.dont_use        8
Apt.B.dont_use        8
Name: apt_space_classifier, dtype: int64

In [16]:
set(meta_rda.apt_space_classifier)

{'Apt.A.bathroom',
 'Apt.A.bedroom',
 'Apt.A.dont_use',
 'Apt.A.kitchen',
 'Apt.A.living_room',
 'Apt.B.bathroom',
 'Apt.B.bedroom',
 'Apt.B.dont_use',
 'Apt.B.kitchen',
 'Apt.B.living_room',
 'Apt.C.bathroom',
 'Apt.C.bedroom',
 'Apt.C.dont_use',
 'Apt.C.kitchen',
 'Apt.C.living_room'}

In [18]:
meta_rda[meta_rda.apt_space_classifier=='Apt.C.living_room']

Unnamed: 0_level_0,altitude,anonymized_name,average_cq,collection_timestamp,decision_tree_result,decision_detected_inconclusive,description,dna_extracted,elevation,empo_1,...,sample_type,scientific_name,sep_id,sex,source,surface_type,taxon_id,title,tube_id,tube_id_study_no
sample_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
13957.Apt.C.living.fan.switch,not applicable,living.fan.switch,33.403,8/24/21 0:00,Inconclusive,Detected,fan switch in the living room,True,193.0,Free-living,...,surface,indoor metagenome,EXC_SEP_005598,not applicable,Environmental,plastic,1256227.0,COVID Isolation Dorm,363237760.0,13957.36324
13957.Apt.C.living.floor.island,not applicable,living.floor.island,32.81833333,8/24/21 0:00,Detected,Detected,floor near island on carpeted (living room side),True,193.0,Free-living,...,surface,indoor metagenome,EXC_SEP_005598,not applicable,Environmental,carpet,1256227.0,COVID Isolation Dorm,363147024.0,13957.36315
13957.Apt.C.living.ceiling.fan,not applicable,living.ceiling.fan,,8/24/21 0:00,Not Detected,Not Detected,living room ceiling fan blades,True,193.0,Free-living,...,surface,indoor metagenome,EXC_SEP_005598,not applicable,Environmental,unsure,1256227.0,COVID Isolation Dorm,363147087.0,13957.36315
13957.Apt.C.entrance.switch,not applicable,entrance.switch,30.75533333,8/24/21 0:00,Detected,Detected,light switch near entrance to apartment,True,193.0,Free-living,...,surface,indoor metagenome,EXC_SEP_005598,not applicable,Environmental,plastic,1256227.0,COVID Isolation Dorm,363237734.0,13957.36324
13957.Apt.C.living.window.handle,not applicable,living.window.handle,,8/24/21 0:00,Not Detected,Not Detected,living room window handles,True,193.0,Free-living,...,surface,indoor metagenome,EXC_SEP_005598,not applicable,Environmental,plastic,1256227.0,COVID Isolation Dorm,363237738.0,13957.36324
13957.Apt.C.living.wall.left,not applicable,living.wall.left,,8/24/21 0:00,Not Detected,Not Detected,"western wall in living room, unsure",True,193.0,Free-living,...,surface,indoor metagenome,EXC_SEP_005598,not applicable,Environmental,painted_wall,1256227.0,COVID Isolation Dorm,363147030.0,13957.36315
13957.Apt.C.living.window.sill,not applicable,living.window.sill,,8/24/21 0:00,Not Detected,Not Detected,living room wall window sill,True,193.0,Free-living,...,surface,indoor metagenome,EXC_SEP_005598,not applicable,Environmental,painted_wall,1256227.0,COVID Isolation Dorm,363237801.0,13957.36324
13957.Apt.C.entrance.door.knob.inside,not applicable,entrance.door.knob.inside,27.52,8/24/21 0:00,Detected,Detected,front door,True,193.0,Free-living,...,surface,indoor metagenome,EXC_SEP_005598,not applicable,Environmental,metal,1256227.0,COVID Isolation Dorm,363237770.0,13957.36324
13957.Apt.C.entrance.door.knob.outside,not applicable,entrance.door.knob.outside,28.84766667,8/24/21 0:00,Detected,Detected,front door,True,193.0,Free-living,...,surface,indoor metagenome,EXC_SEP_005598,not applicable,Environmental,metal,1256227.0,COVID Isolation Dorm,363237821.0,13957.36324
13957.Apt.C.entrance.door.threshold.outside,not applicable,entrance.door.threshold.outside,32.591,8/24/21 0:00,Detected,Detected,front door,True,193.0,Free-living,...,surface,indoor metagenome,EXC_SEP_005598,not applicable,Environmental,carpet,1256227.0,COVID Isolation Dorm,363147038.0,13957.36315


In [20]:
orddf.head()

Unnamed: 0,0,1,2
13957.Apt.C.living.fan.switch,0.026098,-0.006062,-0.032396
13957.Apt.C.kitchen.island.countertop.left,0.061449,0.047752,0.03803
13957.Apt.C.building.northern.door.threshold.inside,-0.007055,-0.074605,-0.045049
13957.Apt.A.living.window.handle.left,-0.031742,0.122708,0.05057
13957.Apt.A.building.northern.door.handle.outside,-0.014139,-0.020459,0.019172


In [21]:
# now we use the handy wrapper script in the assets folder.
effect_size = run_stepwise_anova(orddf, meta_rda, md_cols_used)
effect_size = effect_size.sort_values('R2.adj', ascending=False)
# now I just save the dataframe that is returned.
effect_size

Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: /Volumes/GoogleDrive/My Drive/PhD/Isolation_Dorm/Dorm_analysis/RDA/stepwise-rda.R /var/folders/4y/4cpz9m8d70l80zdcqlxq6p4c0000gp/T/tmp39cwnvwb/ord_.tsv /var/folders/4y/4cpz9m8d70l80zdcqlxq6p4c0000gp/T/tmp39cwnvwb/mf_.txt /var/folders/4y/4cpz9m8d70l80zdcqlxq6p4c0000gp/T/tmp39cwnvwb/output.effect.size.tsv

R version 4.0.5 (2021-03-31) 


Loading required package: permute
Loading required package: lattice
This is vegan 2.5-7


Call: rda(formula = Y_16S ~ 1, data = X_16S, scale = TRUE)

              Inertia Rank
Total               3     
Unconstrained       3    3
Inertia is correlations 

Eigenvalues for unconstrained axes:
PC1 PC2 PC3 
  1   1   1 

Call: rda(formula = Y_16S ~ apt_space_classifier +
decision_detected_inconclusive + indoor_space_classifier +
host_subject_id, data = X_16S, scale = TRUE)

              Inertia Proportion Rank
Total          3.0000     1.0000     
Constrained    1.1503     0.3834    3
Unconstrained  1.8497     0.6166    3
Inertia is correlations 
Some constraints were aliased because they were collinear (redundant)

Eigenvalues for constrained axes:
  RDA1   RDA2   RDA3 
0.7407 0.3951 0.0145 

Eigenvalues for unconstrained axes:
   PC1    PC2    PC3 
0.9855 0.6049 0.2593 

Step: R2.adj= 0 
Call: Y_16S ~ 1 
 
                                 R2.adjusted
+ apt_space_classifier            0.32133514
+ host_subject_id                 0.26908290
+ decision_detected_inconclusive  0

Unnamed: 0,R2.adj,Df,AIC,F,Pr(>F)
+ apt_space_classifier,0.321335,14,220.071212,10.740187,0.0002
+ decision_detected_inconclusive,0.02821,1,208.745086,12.883092,0.0002


In [None]:
#adonis on distance you just report effect size and then look at significance using adonis 
#adonis and RDA should be done on the subsets.