# Setup
**Environment:** qiime2-2020.11

## How to use this notebook:
1. Activate the `qiime2-2020.11` conda environment.
    ```
   source $HOME/miniconda3/bin/activate # use the path in your local machine to activate miniconda
   conda activate qiime2-2020.11 # activate qiime2 conda environment
    ```
2. Launch Jupyter notebook:
    ```
   jupyter notebook
    ```  

In [None]:
## change working directory to the project root directory
%cd ..

##  Download raw sequence data from NCBI SRA database

###  Download sequence using grabseqs

####  Run1

In [None]:
grabseqs sra PRJNA****** ERR****** -m metadata.csv -o data/raw/casava-18-paired-end-demultiplexed-run1/ -r 3 -l 

####  Run2

In [None]:
grabseqs sra PRJNA****** ERR****** -m metadata.csv -o data/raw/casava-18-paired-end-demultiplexed-run2/ -r 3 -l 

### Rename downloaded fastq files

####  Run1

In [None]:
# get the absolute file path of SRA fastq files
fq1 <- list.files("data/raw/casava-18-paired-end-demultiplexed-run1", full.names = TRUE)

# read in SRA metadata
mtd1 <- read.csv("data/raw/casava-18-paired-end-demultiplexed-run1/metadata.csv")

# make a lookup table for renaming fastq files
names1 <- mtd1[, c("Run", "SampleName")]

## duplicate samples (rows) to match pair-ended reads
names1[rep(seq_len(nrow(mtd1)), each = 2), ]

## forward and reverse read id
names1$Index <- rep(1:2, nrow(mtd1))

## SRA fastq file names
names1$Run <- paste0(dirname(fq1[1]), "/", mtd1$Run, "_", mtd1$Index, ".fastq.gz") 

## original fastq file names used for the data analysis
names1$SampleName <- paste0(dirname(fq1[1]), "/", mtd1$SampleName, "_R", mtd1$Index, "_001.fastq.gz") 

# replace SRA fastq file names with original fastq file names
file.rename(from = names1[["Run"]], to = names1[["SampleName"]])

####  Run2

In [None]:
# get the absolute file path of SRA fastq files
fq2 <- list.files("data/raw/casava-18-paired-end-demultiplexed-run2", full.names = TRUE)

# read in SRA metadata
mtd2 <- read.csv("data/raw/casava-18-paired-end-demultiplexed-run2/metadata.csv")

# make a lookup table for renaming fastq files
names2 <- mtd2[, c("Run", "SampleName")]
names2[rep(seq_len(nrow(mtd2)), each = 2), ]
names2$Index      <- rep(1:2, nrow(mtd2))
names2$Run        <- paste0(dirname(fq2[1]), "/", mtd2$Run, "_", mtd2$Index, ".fastq.gz")
names2$SampleName <- paste0(dirname(fq2[1]), "/", mtd2$SampleName, "_R", mtd2$Index, "_001.fastq.gz")

# replace SRA fastq file names with original fastq file names
file.rename(from = names2[["Run"]], to = names2[["SampleName"]])

##  Download SILVA132 reference sequences and taxonomy

In [None]:
# Download SILVA132
wget -P data/reference https://www.arb-silva.de/fileadmin/silva_databases/qiime/Silva_132_release.zip

# Decompress and delete the downloaded zip file 
unzip data/reference/Silva_132_release.zip -d data/reference/silva_132 && rm -f data/reference/Silva_132_release.zip

# Copy and rename the reference sequence and taxonomy file
cp data/reference/silva_132/SILVA_132_QIIME_release/rep_set/rep_set_16S_only/99/silva_132_99_16S.fna data/reference
cp data/reference/silva_132/SILVA_132_QIIME_release/taxonomy/16S_only/99/consensus_taxonomy_7_levels.txt data/reference
mv data/reference/consensus_taxonomy_7_levels.txt data/reference/silva_132_consensus_taxonomy_l7.txt

# Delete data to free up disk space
rm -rf data/reference/silva_132

##  Download SILVA128 reference phylogenetic tree

In [None]:
wget -P data/reference https://data.qiime2.org/2020.11/common/sepp-refs-silva-128.qza