# Import vaginal samples

In [3]:
import pandas as pd
import qiime2 as q2
from biom import Table, load_table
from biom.util import biom_open

## Redbiom fetch study
vaginal samples from study 10894 only USA

In [43]:
%%script bash
source ~/.bash_profile
conda activate qiime2-2020.2

out_dir=/projects/cmi_proj/seed_grants/Infant_Studies/Jae_Kim/nature_med_consolidated_analyses/sw_notebooks/data_10894
mkdir -p $out_dir

#Downlaod biom data
redbiom search metadata "where qiita_study_id==10894" |\
redbiom fetch samples --context  Deblur-Illumina-16S-V4-100nt-fbc5b2 --output $out_dir/10894.biom

#Download metadata
redbiom search metadata "where qiita_study_id==10894" |\
redbiom fetch sample-metadata --all-columns --context  Deblur-Illumina-16S-V4-100nt-fbc5b2 --output $out_dir/10894_md.txt

10770 sample ambiguities observed. Writing ambiguity mappings to: 10894.biom.ambiguities


# Additional filtering
Redbiom only allows 2 filtering terms. (If you are reading this and know how to do more than 2, please let me know)

In [45]:
#Load metadata
data_dir="/projects/cmi_proj/seed_grants/Infant_Studies/Jae_Kim/nature_med_consolidated_analyses/sw_notebooks/data_10894/"
md=pd.read_csv(data_dir + "10894_md.txt",sep='\t', index_col=0)
#select samples
vaginal_keep = set(md.query('sample_type=="vaginal mucus" & mom_baby=="Mom" & country=="USA"').index )
baby_keep = set(md.query('sample_type in ["feces","saliva","skin"] & mom_baby=="Baby" & country=="USA" & current_abx=="No" & date_sampling_category in ["Day_0","Day_1","Day_2-3","Day_7","Day_14"] & birth_mode!="CSseed"').index )
baby_md = md.query('index in @baby_keep')
vaginal_md = md.query('index in @vaginal_keep')

#Load data
bt = load_table(data_dir + "10894.biom")

#Filter tables
v_bt = bt.filter(vaginal_keep, inplace=False)
b_bt = bt.filter(baby_keep, inplace=False)

#Save tables
def save_biom(biom_filename, biom_table,author='default'):
    with biom_open(biom_filename, 'w') as f:  
            biom_table.to_hdf5(f, author)
    print("Saved to " + biom_filename)
save_biom(data_dir + "10894_vaginal.biom", v_bt)
save_biom(data_dir + "10894_baby.biom", b_bt)

Saved to /projects/cmi_proj/seed_grants/Infant_Studies/Jae_Kim/nature_med_consolidated_analyses/sw_notebooks/data_10894/10894_vaginal.biom
Saved to /projects/cmi_proj/seed_grants/Infant_Studies/Jae_Kim/nature_med_consolidated_analyses/sw_notebooks/data_10894/10894_baby.biom


# Sumarize samples

In [46]:
print("Vaginal samples: {}".format(v_bt.shape[1]))
print("Baby samples samples: {}".format(b_bt.shape[1]))
print()
print("Baby sample type breakdown")
baby_md.sample_type.value_counts()

Vaginal samples: 427
Baby samples samples: 499

Baby sample type breakdown


skin      262
feces     144
saliva     93
Name: sample_type, dtype: int64

In [48]:
#Birth mode breakdown
baby_md.birth_mode.value_counts()

Vag       436
CS         40
CSself     23
Name: birth_mode, dtype: int64