Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Use new Statistics::CaseResampling

This adds calcuation of median confidence intervals as well as improving
the performance of some other statistics.
  • Loading branch information...
commit 331ded45096803ffe22b976aba0d4c822f6bed92 1 parent 683e13c
@tsee authored
Showing with 14 additions and 17 deletions.
  1. +1 −1  Makefile.PL
  2. +13 −16 lib/Dumbbench/Stats.pm
View
2  Makefile.PL
@@ -13,7 +13,7 @@ WriteMakefile(
'Capture::Tiny' => '0',
'Params::Util' => '0',
'parent' => '0',
- 'Statistics::CaseResampling' => '0.04',
+ 'Statistics::CaseResampling' => '0.06',
}, # e.g., Module::Name => 1.1
($] >= 5.005 ? ## Add these new keywords supported since 5.005
(ABSTRACT_FROM => 'lib/Dumbbench.pm', # retrieve abstract from module
View
29 lib/Dumbbench/Stats.pm
@@ -20,21 +20,10 @@ sub sorted_data {
return $sorted;
}
-sub first_quartile {
- my $self = shift;
- my $n = $self->n;
- my $k = int($n/4) + 1;
- return Statistics::CaseResampling::select_kth($self->data, $k);
-}
-
+sub first_quartile { Statistics::CaseResampling::first_quartile($_[0]->data) }
sub second_quartile { return $_[0]->median }
+sub third_quartile { Statistics::CaseResampling::third_quartile($_[0]->data) }
-sub third_quartile {
- my $self = shift;
- my $n = $self->n;
- my $k = int($n*3/4) + 1;
- return Statistics::CaseResampling::select_kth($self->data, $k);
-}
sub n { scalar(@{$_[0]->data}) }
@@ -58,16 +47,24 @@ sub mean {
return $self->sum / $self->n;
}
-sub median {
+sub median { Statistics::CaseResampling::median($self->data) } # O(n)!
+
+sub median_confidence_limits {
my $self = shift;
- return Statistics::CaseResampling::median($self->data); # O(n)
+ my $nsigma = shift;
+ my $alpha = Statistics::CaseResampling::nsigma_to_alpha($nsigma);
+ # note: The 1000 here is kind of a lower limit for reasonable accuracy.
+ # But if the data set is small, that's more significant. If the data
+ # set is VERY large, then running much more than 1k resamplings
+ # is VERY expensive. So 1k is probably a reasonable default.
+ return Statistics::CaseResampling::median_simple_confidence_limits($self->data, 1-$alpha, 1000) }
}
sub mad {
my $self = shift;
my $median = $self->median;
my @val = map {abs($_ - $median)} @{$self->data};
- return ref($self)->new(data => \@val)->median();
+ return ref($self)->new(data => \@val)->median;
}
sub mad_dev {
Please sign in to comment.
Something went wrong with that request. Please try again.