In [1]:
import numpy as np
import os
import pandas as pd
import shutil
import urllib.request

from qiime2 import Artifact
from qiime2.plugins.dada2.methods import denoise_single

In [2]:
def download_file(url, local_filepath):
    """Download a file from a remote url and save to a local filepath
    
    url - the web address of the file you want to download as a string
    local_filepath - the local filepath to which the file will be saved
    """
    print(f"Downloading file: {url}")
    with urllib.request.urlopen(url) as response, open(local_filepath, 'wb') as out_file:
        shutil.copyfileobj(response, out_file)
    
    print(f"Saved to local filepath: {local_filepath}")

In [3]:
def make_directory(path):
    """Make a directory, but proceed without errors if it fails
    path -- the path to the directory (e.g. "../output/taxonomy_references")
    """
    try:
        os.mkdir(path)
    except OSError:
        print (f"Creation of directory {path} failed")
    else:
        print (f"Created the directory {path}")

In [4]:
working_dir=os.path.abspath('../')
mock_dir=working_dir+'/output/mock_communities'
metadata_url_base='https://github.com/caporaso-lab/mockrobiota/raw/master/data/mock-'

In [5]:
mocks=[12, 13, 14, 15, 16, 18, 19, 20, 21, 22]

In [6]:
#download the forward reads, import into qiime2, and create
#sequence and feature table artifacts
#this loop could take a while, depending on the power of your
#cpu and your download speed
if not os.path.exists(mock_dir):
    print(f'Creating mock directory {mock_dir}')
    make_directory(mock_dir)
for mock in mocks:
    mock_folder=mock_dir+'/mock_'+str(mock)
    if not os.path.exists(mock_folder):
        print(f'Creating mock folder {mock_folder}')
        make_directory(mock_folder)
    metadata_url=metadata_url_base+str(mock)+'/dataset-metadata.tsv'
    metadata_filepath=mock_dir+'/metadata.tsv'
    sample_metadata_url=metadata_url_base+str(mock)+'/sample-metadata.tsv'
    sample_metadata_filepath=mock_dir+'/sample_metadata.tsv'
    download_file(metadata_url, metadata_filepath)
    download_file(sample_metadata_url, sample_metadata_filepath)
    metadata=pd.read_csv(metadata_filepath, sep='\t')
    sample_metadata=pd.read_csv(sample_metadata_filepath, sep='\t')
    sample_name=sample_metadata['SampleID'][0]
    forward_url=metadata['value'].values[2]
    forward_filepath=mock_folder+'/1_1_L001_R1_001.fastq.gz'
    #reverse_url=metadata['value'].values[3]
    #reverse_filepath=mock_folder+'/'+str(mock)+'_reverse.fastq.gz'
    #index_url=metadata['value'].values[4]
    #index_filepath=mock_folder+'/'+str(mock)+'_index.'
    download_file(forward_url, forward_filepath)
    #download_file(reverse_url, reverse_filepath)
    #importing a fastq file into qiime2 requires a manifest file and a metadata.yml file
    if not os.path.exists(mock_folder+'/MANIFEST'):
        with open(mock_folder+'/MANIFEST', 'w') as file:
            file.write('sample-id,filename,direction\n'+'sample1,'+forward_filepath+',forward')
    if not os.path.exists(mock_folder+'/metadata.yml'):
        with open(mock_folder+'/metadata.yml', 'w') as file:
            pass
    #now we need to import them into qiime
    seqs = Artifact.import_data('SampleData[SequencesWithQuality]', '../output/mock_communities/mock_'+str(mock))
    seqs.save(mock_folder+'/mock_'+str(mock)+'_sequences.qza')
    table, sequences, stats=denoise_single(seqs, 0, n_threads=0)
    table.save(mock_folder+'/mock_'+str(mock)+'_ft.qza')
    sequences.save(mock_folder+'/mock_'+str(mock)+'_rep_seqs.qza')
    print('mock', mock, 'imported\n')

Creating mock directory /mnt/c/Users/Dylan/Documents/zaneveld/2_14_gcmp/GCMP_Global_Disease/analysis/organelle_removal/output/mock_communities
Created the directory /mnt/c/Users/Dylan/Documents/zaneveld/2_14_gcmp/GCMP_Global_Disease/analysis/organelle_removal/output/mock_communities
Creating mock folder /mnt/c/Users/Dylan/Documents/zaneveld/2_14_gcmp/GCMP_Global_Disease/analysis/organelle_removal/output/mock_communities/mock_12
Created the directory /mnt/c/Users/Dylan/Documents/zaneveld/2_14_gcmp/GCMP_Global_Disease/analysis/organelle_removal/output/mock_communities/mock_12
Downloading file: https://github.com/caporaso-lab/mockrobiota/raw/master/data/mock-12/dataset-metadata.tsv
Saved to local filepath: /mnt/c/Users/Dylan/Documents/zaneveld/2_14_gcmp/GCMP_Global_Disease/analysis/organelle_removal/output/mock_communities/metadata.tsv
Downloading file: https://github.com/caporaso-lab/mockrobiota/raw/master/data/mock-12/sample-metadata.tsv
Saved to local filepath: /mnt/c/Users/Dylan/Docum

mock 16 imported

Creating mock folder /mnt/c/Users/Dylan/Documents/zaneveld/2_14_gcmp/GCMP_Global_Disease/analysis/organelle_removal/output/mock_communities/mock_18
Created the directory /mnt/c/Users/Dylan/Documents/zaneveld/2_14_gcmp/GCMP_Global_Disease/analysis/organelle_removal/output/mock_communities/mock_18
Downloading file: https://github.com/caporaso-lab/mockrobiota/raw/master/data/mock-18/dataset-metadata.tsv
Saved to local filepath: /mnt/c/Users/Dylan/Documents/zaneveld/2_14_gcmp/GCMP_Global_Disease/analysis/organelle_removal/output/mock_communities/metadata.tsv
Downloading file: https://github.com/caporaso-lab/mockrobiota/raw/master/data/mock-18/sample-metadata.tsv
Saved to local filepath: /mnt/c/Users/Dylan/Documents/zaneveld/2_14_gcmp/GCMP_Global_Disease/analysis/organelle_removal/output/mock_communities/sample_metadata.tsv
Downloading file: https://s3-us-west-2.amazonaws.com/mockrobiota/latest/mock-18/mock-forward-read.fastq.gz
Saved to local filepath: /mnt/c/Users/Dylan/