-
Notifications
You must be signed in to change notification settings - Fork 0
/
Full_script_PVX.sh
154 lines (109 loc) · 5.97 KB
/
Full_script_PVX.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#Script 1
#Remove - from file names
#!/bin/bash
for file in *-*; do
mv $file "${file//-/}"
done
#convert to unix before executing
#Script 2
#FastQC Quality Check
#BSUB -L /bin/bash # uses the bash login shell to initialize the job's execution environment.
#BSUB -J fastqc # job name
#BSUB -n 2 # assigns 2 cores for execution
#BSUB -R "span[ptile=2]" # assigns 20 cores per node
#BSUB -R "rusage[mem=2700]" # reserves 2700MB memory per core
#BSUB -M 2700 # sets to 2,700MB (~2.7GB) per process enforceable memory limit. Total job memory = (M * n)
#BSUB -W 24:00 # sets to 24 hours the job's runtime wall-clock limit.
#BSUB -o stdout.%J # directs the job's standard output to stdout.jobid
#BSUB -e stderr.%J # directs the job's standard error to stderr.jobid
#BSUB -u venura.herath@tamu.edu #Send all emails to email_address
#BSUB -B -N #Send email on job begin (-B) and end (-N)
module load FastQC/0.11.9-Java-11
########## INPUTS ##########
SAMPLES="PVX_2d_1 PVX_2d_2 PVX_2d_3 PVX_3d_1 PVX_3d_2 PVX_3d_3 VMOCK_2d_1 VMOCK_2d_2 VMOCK_2d_3 VMOCK_3d_1 VMOCK_3d_2 VMOCK_3d_3"
######## PARAMETERS ########
threads=2 # make sure this is <= your BSUB -n value
################################### COMMANDS ###################################
# use -o <directory> to save results to <directory> instead of directory where reads are located
# <directory> must already exist before using -o <directory> option
# --nogroup will calculate average at each base instead of bins after the first 50 bp
# fastqc runs one thread per file; using 20 threads for 2 files does not speed up the processing
for SAMPLE in $SAMPLES; do
fastqc -t $threads -o ./fastQC ${SAMPLE}_1.fq.gz ${SAMPLE}_2.fq.gz
done
#convert to unix before executing
#Script 3
#HISAT2 Allignment and SAM to BAM convertion
#BSUB -L /bin/bash # uses the bash login shell to initialize the job's execution environment.
#BSUB -J hisat2 # job name
#BSUB -n 20 # assigns 20 cores for execution
#BSUB -R "span[ptile=20]" # assigns 20 cores per node
#BSUB -R "rusage[mem=2700]" # reserves 2700MB memory per core
#BSUB -M 2700 # sets to 2,700MB (~2.7GB) per process enforceable memory limit. Total job memory = (M * n)
#BSUB -W 24:00 # sets to 24 hours the job's runtime wall-clock limit.
#BSUB -o stdout.%J # directs the job's standard output to stdout.jobid
#BSUB -e stderr.%J # directs the job's standard error to stderr.jobid
module load HISAT2/2.2.0-foss-2018b
module load Python/3.6.6-foss-2018b
#bash script for hisat2; align all .fq.gz files to indexed reference genome to generate .sam files
SAMPLES="PVX_2d_1 PVX_2d_2 PVX_2d_3 PVX_3d_1 PVX_3d_2 PVX_3d_3 VMOCK_2d_1 VMOCK_2d_2 VMOCK_2d_3 VMOCK_3d_1 VMOCK_3d_2 VMOCK_3d_3"
threads=20
for SAMPLE in $SAMPLES; do
hisat2 -p $threads --dta --rna-strandness RF -x /scratch/datasets/genome_indexes/other_genomes/potato/hisat2/DM_1-3_516_R44_potato_genome_assembly.v6.1 -1 ${SAMPLE}_1.fq.gz -2 ${SAMPLE}_2.fq.gz -S ${SAMPLE}.sam
done
#Purge module to avoid conflicts
module purge
#bash script for samtools; convert .sam files to .bam files
module load SAMtools/1.9-intel-2018b
for SAMPLE in $SAMPLES; do
samtools sort -@ $threads -o ${SAMPLE}.bam ${SAMPLE}.sam
done
#bash script for samtools; index our .bam files to obtain .bam.bai files using samtools
for SAMPLE in $SAMPLES; do
samtools index ${SAMPLE}.bam ${SAMPLE}.bam.bai
done
#convert to unix before executing
#Script 4
#Stringtie to DESEQ2
#BSUB -L /bin/bash # uses the bash login shell to initialize the job's execution environment.
#BSUB -J hisat2 # job name
#BSUB -n 20 # assigns 20 cores for execution
#BSUB -R "span[ptile=20]" # assigns 20 cores per node
#BSUB -R "rusage[mem=2700]" # reserves 2700MB memory per core
#BSUB -M 2700 # sets to 2,700MB (~2.7GB) per process enforceable memory limit. Total job memory = (M * n)
#BSUB -W 24:00 # sets to 24 hours the job's runtime wall-clock limit.
#BSUB -o stdout.%J # directs the job's standard output to stdout.jobid
#BSUB -e stderr.%J # directs the job's standard error to stderr.jobid
#BSUB -u venura.herath@tamu.edu #Send all emails to email_address
#BSUB -B -N #Send email on job begin (-B) and end (-N)
#Loading New modules
module load StringTie/2.1.4-GCC-9.3.0
module load Python/2.7.18-GCCcore-9.3.0
#bash script for stringtie; assemble transcripts using stringtie
SAMPLES="PVX_2d_1 PVX_2d_2 PVX_2d_3 PVX_3d_1 PVX_3d_2 PVX_3d_3 VMOCK_2d_1 VMOCK_2d_2 VMOCK_2d_3 VMOCK_3d_1 VMOCK_3d_2 VMOCK_3d_3"
threads=20
for SAMPLE in $SAMPLES; do
stringtie --rf -p $threads -G DM_1-3_516_R44_potato.v6.1.working_models.gff3 -o ${SAMPLE}.gtf -l ${SAMPLE} ${SAMPLE}.bam
done
#merge transcripts
stringtie --merge -p $threads -G DM_1-3_516_R44_potato.v6.1.working_models.gff3 -o stringtie_merged.gtf mergelist.txt
#purge modules
module purge
#Loading GFFcompare
module load GffCompare/0.10.6-GCCcore-7.3.0
#Using Gffcompare
gffcompare -r DM_1-3_516_R44_potato.v6.1.working_models.gff3 -G -o merged stringtie_merged.gtf
#purge modules
module purge
#bash script for stringtie; assemble transcripts using stringtie
SAMPLES="PVX_2d_1 PVX_2d_2 PVX_2d_3 PVX_3d_1 PVX_3d_2 PVX_3d_3 VMOCK_2d_1 VMOCK_2d_2 VMOCK_2d_3 VMOCK_3d_1 VMOCK_3d_2 VMOCK_3d_3"
threads=20
module load StringTie/2.1.4-GCC-9.3.0
module load Python/2.7.18-GCCcore-9.3.0
for SAMPLE in $SAMPLES; do
mkdir ballgown/${SAMPLE}
stringtie --rf -e -B -p $threads -G stringtie_merged.gtf -o ballgown/${SAMPLE}/${SAMPLE}.gtf ${SAMPLE}.bam
done
#converting data to DESEQ2 compatible format
prepDE.py -i ballgown
#end