-
Notifications
You must be signed in to change notification settings - Fork 1
/
Preprocessing.sh
72 lines (59 loc) · 1.92 KB
/
Preprocessing.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/bin/bash
####################################################################
####################################################################
#Script for building the three data structures (ebwt, cda and lcp)
#from a collection of fasta files
####################################################################
####################################################################
SHORT=1
#Fasta files path
PathDataset=$1
#Output files name
fastaName=$2
############################
InfoFile=$fastaName".txt"
FastaDataset=$fastaName".fasta"
############################
#Paths for installed tools
############################
pathseqtk="./Preprocessing/seqtk"
pathBCR="./Preprocessing/BCR_LCP_GSA"
pathgsufsort="./Preprocessing/gsufsort"
############################
############################
> $InfoFile
> $FastaDataset
echo -e "\nComputing a single fasta file..."
for file in $PathDataset/*
do
NAME=$(basename "$file" .fasta)
echo $NAME
#Count string number
nReads=$(grep ">" $file | wc -l)
#Append F+RC
$pathseqtk/seqtk seq -U $file >> $FastaDataset
$pathseqtk/seqtk seq -r -U $file >> $FastaDataset
#Write fileInfo
nReads=$(($nReads*2))
echo -e $NAME"\t"$nReads >> $InfoFile
done
echo -e "\nComputing eBWT/DA/LCP..."
if [ $SHORT -eq 1 ]
then
#BCR for eBWT/LCP/DA
/usr/bin/time -v $pathBCR/BCR_LCP_GSA $FastaDataset $FastaDataset > "BCR_"$(basename "$FastaDataset" .fasta)".stdout" 2> "BCR_"$(basename "$FastaDataset" .fasta)".stderr"
rm *.len
rm *.info
else
#gsufsort for eBWT/LCP/DA
/usr/bin/time -v $pathgsufsort/"gsufsort-64" $FastaDataset --bwt --lcp 4 --da 4 > "gsufsort_"$(basename "$FastaDataset" .fasta)".stdout" 2> "gsufsort_"$(basename "$FastaDataset" .fasta)".stderr"
mv $FastaDataset".bwt" $FastaDataset".ebwt"
mv $FastaDataset".4.da" $FastaDataset".da"
mv $FastaDataset".4.lcp" $FastaDataset".lcp"
fi
echo "Done."
echo -e "\nComputing CDA..."
./create_cda $FastaDataset $InfoFile
echo "Done."
####
####