In [60]:
"""
Created on Mon Nov 27 14:05:26 2019

@author: usingh
This notebook covers few example use cases of pyrpipe
"""

#import pyrpipe class
from pyrpipe import sra

In [61]:
#define environment to work with
testDir="/home/usingh/work/urmi/hoap/test" #all the downloads and results will be under this directory
#Specify hisat index to be used later
hisatInd="/home/usingh/work/urmi/hoap/test/hisatYeast/S288C_reference_genome_R64-2-1_20150113/yeastIndex"
#small list of yeast RNA-Seq runs
yeastList=['SRR1583780','SRR5507495','SRR5507442','SRR5507362','SRR5507343','SRR5507356','SRR5507413']

## Creating SRA objects

We will use the SRA class to create objects which will refer to an "SRR" sample (also knonw as the Run object see: https://www.ncbi.nlm.nih.gov/books/NBK56913/#search.what_do_the_different_sra_accessi). We need to create an SRA object for each run we want to analyze. We will specify a directory (testDir) which will be used to store all the data. By default this directory is the current working directory. To make things consistent, for each run
all data will be downloaded under the sub-directory &lt;SRRaccession> (this is enforced behaviour).

In [62]:
#let's consider the run SRR1583780
myRun='SRR1583780'
'''
create SRA object. Note that the test directory parameter specifies where to put all the data. 
By default the directory is the current working directory. To make things consistent, for each run
all data will be downloaded under the sub-directory <SrrAccession>.
'''
sraOb=sra.SRA(myRun,testDir)
#get the accession
sraOb.getSrrAccession()
#check the location
print("Location is: "+sraOb.location)
#check if sra file exists
sraOb.sraFileExistsLocally()


[94mCreating SRA: SRR1583780[0m
Location is: /home/usingh/work/urmi/hoap/test/SRR1583780


False

## Downloading SRA file
In the above chunk we have created an SRA object. ```sraFileExistsLocally()``` checks whether the .sra files exist locally (&lt;SRRaccession>.sra). To download .sra files using ```prefetch``` we can use ```downloadSRAFile()```

In [63]:
#download .sra file for sraOb
sraOb.downloadSRAFile()

if sraOb.sraFileExistsLocally():
    print("sra file downloaded to"+sraOb.localSRAFilePath)
else:
    raise Exception("Error Downloading")

Downloading SRR1583780 ...
Executing:prefetch -O /home/usingh/work/urmi/hoap/test/SRR1583780 SRR1583780


2019-11-27T21:59:29 prefetch.2.9.3: 1) Downloading 'SRR1583780'...

2019-11-27T21:59:29 prefetch.2.9.3:  Downloading via https...

2019-11-27T21:59:29 prefetch.2.9.3: 1) 'SRR1583780' was downloaded successfully

Downloaded file: /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780.sra 215.3 KB 
sra file downloaded to/home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780.sra


## Converting sra to fastq
Once .sra file is downloaded we can use ```runFasterQDump()``` to conver .sra to .fastq files. The fastq files will be created in the same directory (```sraOb.location```)


In [64]:
'''
run fasterq-dump.
deleteSRA arguments tells to remove the .sra file after getting the .fastq files
using **kwags we can pass additional arguments to our program. These are separated by key-value pairs just like
regular linux argument. The flags (options without arguments) should be passed as having empty value i.e. "" 
(e.g. -f below). Also note that int type values are passed as string e.g. "-e":"10" (specifies num cores to use)
'''

sraOb.runFasterQDump(deleteSRA=True,**{"-f":"","-t":testDir,"-e":"10"})
#see fastq files
if sraOb.fastqFilesExistsLocally():
    print("fq1: "+sraOb.localfastq1Path)
    print("fq2: "+sraOb.localfastq2Path)
else: raise Exception("Failed fasterq-dump")


Executing:fasterq-dump -f -t /home/usingh/work/urmi/hoap/test -e 10 -O /home/usingh/work/urmi/hoap/test/SRR1583780 -o SRR1583780.fastq /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780.sra
spots read      : 4,881

reads read      : 9,762

reads written   : 9,762

Deleting file: /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780.sra rm /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780.sra
fq1: /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_1.fastq
fq2: /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_2.fastq


## Quality control and adapter filtering
To remove adapters and filter reads from fastq files we can use ```performQC()``` function. The ```performQC()``` function takes an object of type ```RNASeqQC``` which are basically programs to perform quality filtering on RNA-Seq. So far ```trim_galore``` and ```BBMap``` are implemented in pyrpipe. To run ```performQC()```, we have to first create an ```RNASeqQC``` object.

**Important to note that after running ```RNASeqQC``` the ```localfastqPath``` variables are updated to point to the qc-corrected files.**

In [65]:
'''
First we need to import qc which contains all RNASeqQC objects.
create TrimGalore object to use trim_galore. Note TrimGalore inherits RNASeqQC class.
Note the **kwargs passed to TrimGalore
'''
from pyrpipe import qc
trimGaloreObj=qc.Trimgalore(**{"-j":"8","--length":"1"})
#now perform qc
sraOb.performQC(trimGaloreObj,deleteRawFastq=False)

#check path again
if sraOb.fastqFilesExistsLocally():
    print("fq1: "+sraOb.localfastq1Path)
    print("fq2: "+sraOb.localfastq2Path)
else: raise Exception("Failed fasterq-dump")

[94mChecking trim_galore...[0m
[92mFound trim_galore[0m
[94mChecking cutadapt...[0m
[92mFound cutadapt[0m
['-j', '8', '--length', '1']
Performing QC using trim_galore
Running trim_galore paired
Executing: trim_galore -o /home/usingh/work/urmi/hoap/test/SRR1583780 -j 8 --length 1 --paired /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_1.fastq /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_2.fastq
Executing:mv /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_1_val_1.fq /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_1_trimGalore.fastq
(True, '/home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_1_trimGalore.fastq', '/home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_2_trimGalore.fastq')
fq1: /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_1_trimGalore.fastq
fq2: /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_2_trimGalore.fastq


## Mapping RNA-Seq reads to genome
One can create instances of the ```Mapping``` from pyrpipe to perform read mapping. An object of the ```Mapping``` class is equivalent to an alignment program like ```Hisat2``` or ```STAR```. A mapping object can then take SRA object and produce the alignment. Each mapping object should be initialized with specific parameters (different parameters, genome index and so on.)

In [66]:
from pyrpipe import mapping
#specify hisat index
hisatInd="/home/usingh/work/urmi/hoap/test/hisatYeast/S288C_reference_genome_R64-2-1_20150113/yeastIndex"
#create hisat object
hisat2Obj=mapping.Hisat2(hisatInd)
#run hisat and specify some parametrs, get return value in hsStatus
hsStatus=hisat2Obj.runHisat2(sraOb,**{"-p":"10","--dta-cufflinks":""})

print(hsStatus)

if not hsStatus[0]:
    raise Exception("Hisat2 failed")
else:
    print("Sam file:"+hsStatus[1])

[94mChecking hisat2...[0m
[92mFound hisat2[0m
Found HISAT2 index files.
Executing:hisat2 -p 10 --dta-cufflinks -x /home/usingh/work/urmi/hoap/test/hisatYeast/S288C_reference_genome_R64-2-1_20150113/yeastIndex -1 /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_1_trimGalore.fastq -2 /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_2_trimGalore.fastq -S /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2.sam
(True, '/home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2.sam')
Sam file:/home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2.sam


# Converting sam to bam
Sam files can be converted to bam or sorted bam using samtools.

In [67]:
#samtools object
samtOb=mapping.Samtools()
#convert sam to sored bam and delete original sam and intermediate bam file
sortedBam=samtOb.samToSortedBam(hsStatus[1],10,deleteSam=True,deleteOriginalBam=True)

print(sortedBam)

[94mChecking samtools...[0m
[92mFound samtools[0m
Executing: samtools view -@ 10 -b -o /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2.bam /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2.sam
sam2bam finished
Deleting sam file...
Executing: samtools sort -o /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2_sorted.bam -@ 10 /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2.bam
SamSort finished
Deleting unsorted bam file...
/home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2_sorted.bam


# Assemble transcripts
```Assembly``` objects can be used to assemble transcripts.


In [68]:
from pyrpipe import assembly
#creat a stringtie object
stieOb=assembly.Stringtie()
#run stringtie
gtf=stieOb.runStringtie(sortedBam,deleteInputBam=True,proc=10)

print(gtf)

[94mChecking stringtie...[0m
[92mFound stringtie[0m
Executing: stringtie /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2_sorted.bam -p 10 -o /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2_sorted.bam_stie.gtf
Deleting file: /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2_sorted.bam rm /home/usingh/work/urmi/hoap/test/SRR1583780/SRR1583780_hisat2_sorted.bam
(False, '')


# Simple RNA-Seq mapping pipeline
We can use pyrpipe's calsses and function to process multiple RNA-Seq runs.