-
Notifications
You must be signed in to change notification settings - Fork 0
/
print-n50.awk
103 lines (93 loc) · 1.76 KB
/
print-n50.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
####This was designed to split the JBAT scaffold to contig, which further stat the contig N50 and whole scaffolf N50###
{
if ($0~/>/) # new scaffold
{
# contig stuff
if (gapless_counter>0)
{
s+=1
clength[s]=gapless_counter
}
gapless_counter=0
# scaffold stuff
if(NR>1){
scaffold_counter++
slength[scaffold_counter]=total_length
total_length=0
}
next
}
total_length+=length
if ($0!~/N/)
{
gapless_counter+=length($0)
}
else
{
n=split($0, a, "")
for (i=1; i<=n; i++)
{
if (a[i]=="N")
{
if (gapless_counter>0)
{
s+=1
clength[s]=gapless_counter
# print s, clength[s]
}
gapless_counter=0
}
else
{
gapless_counter+=1
}
}
}
}
END{
if (gapless_counter>0)
{
s+=1
clength[s]=gapless_counter
}
scaffold_counter++
slength[scaffold_counter]=total_length
# print contig stuff
asort(clength)
for (i=1; i<=s; i++)
totallength+=clength[i]
i=s
N50=clength[s]
longerlength=clength[s]
while (longerlength<totallength/2)
{
i=i-1
# print i
N50=clength[i]
longerlength+=clength[i]
}
printf("Total contig length:\t%'d\n", totallength)
printf("No of contigs:\t%'d\n", s)
printf("contig N50:\t%'d\n", N50)
printf("Largest contig:\t%'d\n", clength[s])
print "---------"
# print scaffold stuff
asort(slength)
totallength=0
for (i in slength)
totallength+=slength[i]
s=scaffold_counter
N50=slength[s]
longerlength=slength[s]
i=s
while (longerlength<totallength/2)
{
i--
N50=slength[i]
longerlength+=slength[i]
}
printf("Total scaffold length:\t%'d\n", totallength)
printf("No of scaffolds:\t%'d\n", s)
printf("scaffold N50:\t%'d\n", N50)
printf("Largest scaffold:\t%'d\n", slength[s])
}