-
Notifications
You must be signed in to change notification settings - Fork 7
/
dadiBoot.pl
executable file
·66 lines (52 loc) · 1.46 KB
/
dadiBoot.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env perl
$usage="
dadiBoot.pl: generates bootstrapped dadi data
Arguments:
in=[filename] : file generated by vcf2dadi.pl. Bootstrap will be done with replacement
over entries in the \"Gene\" column.
boot=[integer]: number of bootstrap replicates. Default 100.
Output:
Files named as the infile with \".boot[repnum]\" appended
(the last digit in the coordinates of SNPs in the bootstrapped files is the repeat
number for that gene; this is to make all the last column entries unique)
Mikhail Matz, UT Austin, matz\@utexas.edu
";
my $inname;
my $nboot=100;
if ("@ARGV"=~/in=(\S+)/) { $inname=$1; } else { die $usage;}
if ("@ARGV"=~/boot=(\d+)/) { $nboot=$1; }
my %bygene={};
open IN, "$inname" or die "cannot open $inname\n";
my $g=0;
my $header;
while (<IN>) {
if ($g==0) {
$header=$_;
my @h=split('\s',$header);
for ($g=0;$h[$g] ne "Gene";$g++){}
print "genecolumn: ",$g+1,"\n";
next;
}
my @l=split('\s',$_);
$bygene{$l[$g]}.=$_;
}
my @genes=keys(%bygene);
my $ngenes=$#genes+1;
print "$ngenes genes detected\n";
my $b=0;
for ($b=1;$b<=$nboot;$b++){
my $bootfile =$inname.".boot".$b;
print "boot $b\n";
open BOOT, ">$bootfile" or die "cannot create $bootfile\n";
print {BOOT} $header;
my %seen;
for ($g=0;$g<$ngenes;$g++){
my $gene=int(rand($#genes));
$seen{$gene}++;
my $gname=$genes[$gene];
my $line=$bygene{$gname};
$line=~s/\n/$seen{$gene}\n/g;
print {BOOT} $bygene{$genes[$gene]};
}
close BOOT;
}