Skip to content

Commit 47c0ca8

Browse files
authored
math.stats: support int/i64 arrays, fix tests (fix #23245) (#23249)
1 parent f089ba9 commit 47c0ca8

File tree

2 files changed

+525
-178
lines changed

2 files changed

+525
-178
lines changed

vlib/math/stats/stats.v

Lines changed: 86 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ pub fn mean[T](data []T) T {
3131
for v in data {
3232
sum += v
3333
}
34-
return sum / T(data.len)
34+
return T(sum / data.len)
3535
}
3636

3737
// geometric_mean calculates the central tendency
@@ -42,11 +42,16 @@ pub fn geometric_mean[T](data []T) T {
4242
if data.len == 0 {
4343
return T(0)
4444
}
45-
mut sum := 1.0
45+
mut sum := T(1)
4646
for v in data {
4747
sum *= v
4848
}
49-
return math.pow(sum, 1.0 / T(data.len))
49+
$if T is f64 {
50+
return math.pow(sum, f64(1.0) / data.len)
51+
} $else {
52+
// use f32 for f32/int/...
53+
return T(math.powf(sum, f32(1.0) / data.len))
54+
}
5055
}
5156

5257
// harmonic_mean calculates the reciprocal of the average of reciprocals
@@ -57,11 +62,20 @@ pub fn harmonic_mean[T](data []T) T {
5762
if data.len == 0 {
5863
return T(0)
5964
}
60-
mut sum := T(0)
61-
for v in data {
62-
sum += 1.0 / v
65+
$if T is f64 {
66+
mut sum := f64(0)
67+
for v in data {
68+
sum += f64(1.0) / v
69+
}
70+
return f64(data.len / sum)
71+
} $else {
72+
// use f32 for f32/int/...
73+
mut sum := f32(0)
74+
for v in data {
75+
sum += f32(1.0) / f32(v)
76+
}
77+
return T(data.len / sum)
6378
}
64-
return T(data.len) / sum
6579
}
6680

6781
// median returns the middlemost value of the given input array ( input array is assumed to be sorted )
@@ -106,11 +120,21 @@ pub fn rms[T](data []T) T {
106120
if data.len == 0 {
107121
return T(0)
108122
}
109-
mut sum := T(0)
110-
for v in data {
111-
sum += math.pow(v, 2)
123+
124+
$if T is f64 {
125+
mut sum := f64(0)
126+
for v in data {
127+
sum += math.pow(v, 2)
128+
}
129+
return math.sqrt(sum / data.len)
130+
} $else {
131+
// use f32 for f32/int/...
132+
mut sum := f32(0)
133+
for v in data {
134+
sum += math.powf(v, 2)
135+
}
136+
return T(math.sqrtf(sum / data.len))
112137
}
113-
return math.sqrt(sum / T(data.len))
114138
}
115139

116140
// population_variance is the Measure of Dispersion / Spread
@@ -134,11 +158,12 @@ pub fn population_variance_mean[T](data []T, mean T) T {
134158
if data.len == 0 {
135159
return T(0)
136160
}
161+
137162
mut sum := T(0)
138163
for v in data {
139-
sum += (v - mean) * (v - mean)
164+
sum += T((v - mean) * (v - mean))
140165
}
141-
return sum / T(data.len)
166+
return T(sum / data.len)
142167
}
143168

144169
// sample_variance calculates the spread of dataset around the mean
@@ -162,9 +187,9 @@ pub fn sample_variance_mean[T](data []T, mean T) T {
162187
}
163188
mut sum := T(0)
164189
for v in data {
165-
sum += (v - mean) * (v - mean)
190+
sum += T((v - mean) * (v - mean))
166191
}
167-
return sum / T(data.len - 1)
192+
return T(sum / (data.len - 1))
168193
}
169194

170195
// population_stddev calculates how spread out the dataset is
@@ -175,7 +200,11 @@ pub fn population_stddev[T](data []T) T {
175200
if data.len == 0 {
176201
return T(0)
177202
}
178-
return math.sqrt(population_variance[T](data))
203+
$if T is f64 {
204+
return math.sqrt(population_variance[T](data))
205+
} $else {
206+
return T(math.sqrtf(population_variance[T](data)))
207+
}
179208
}
180209

181210
// population_stddev_mean calculates how spread out the dataset is, with the provide mean
@@ -186,7 +215,11 @@ pub fn population_stddev_mean[T](data []T, mean T) T {
186215
if data.len == 0 {
187216
return T(0)
188217
}
189-
return T(math.sqrt(f64(population_variance_mean[T](data, mean))))
218+
$if T is f64 {
219+
return math.sqrt(population_variance_mean[T](data, mean))
220+
} $else {
221+
return T(math.sqrtf(population_variance_mean[T](data, mean)))
222+
}
190223
}
191224

192225
// Measure of Dispersion / Spread
@@ -198,7 +231,11 @@ pub fn sample_stddev[T](data []T) T {
198231
if data.len == 0 {
199232
return T(0)
200233
}
201-
return T(math.sqrt(f64(sample_variance[T](data))))
234+
$if T is f64 {
235+
return math.sqrt(sample_variance[T](data))
236+
} $else {
237+
return T(math.sqrtf(sample_variance[T](data)))
238+
}
202239
}
203240

204241
// Measure of Dispersion / Spread
@@ -210,7 +247,11 @@ pub fn sample_stddev_mean[T](data []T, mean T) T {
210247
if data.len == 0 {
211248
return T(0)
212249
}
213-
return T(math.sqrt(f64(sample_variance_mean[T](data, mean))))
250+
$if T is f64 {
251+
return math.sqrt(sample_variance_mean[T](data, mean))
252+
} $else {
253+
return T(math.sqrtf(sample_variance_mean[T](data, mean)))
254+
}
214255
}
215256

216257
// absdev calculates the average distance between each data point and the mean
@@ -236,7 +277,7 @@ pub fn absdev_mean[T](data []T, mean T) T {
236277
for v in data {
237278
sum += math.abs(v - mean)
238279
}
239-
return sum / T(data.len)
280+
return T(sum / data.len)
240281
}
241282

242283
// tts, Sum of squares, calculates the sum over all squared differences between values and overall mean
@@ -256,7 +297,7 @@ pub fn tss_mean[T](data []T, mean T) T {
256297
}
257298
mut tss := T(0)
258299
for v in data {
259-
tss += (v - mean) * (v - mean)
300+
tss += T((v - mean) * (v - mean))
260301
}
261302
return tss
262303
}
@@ -393,7 +434,7 @@ pub fn covariance_mean[T](data1 []T, data2 []T, mean1 T, mean2 T) T {
393434
for i in 0 .. n {
394435
delta1 := data1[i] - mean1
395436
delta2 := data2[i] - mean2
396-
covariance += (delta1 * delta2 - covariance) / (T(i) + 1.0)
437+
covariance += T((delta1 * delta2 - covariance) / (i + T(1)))
397438
}
398439
return covariance
399440
}
@@ -418,10 +459,10 @@ pub fn lag1_autocorrelation_mean[T](data []T, mean T) T {
418459
for i := 1; i < data.len; i++ {
419460
delta0 := data[i - 1] - mean
420461
delta1 := data[i] - mean
421-
q += (delta0 * delta1 - q) / (T(i) + 1.0)
422-
v += (delta1 * delta1 - v) / (T(i) + 1.0)
462+
q += T((delta0 * delta1 - q) / (i + T(1)))
463+
v += T((delta1 * delta1 - v) / (T(i) + T(1)))
423464
}
424-
return q / v
465+
return T(q / v)
425466
}
426467

427468
// kurtosis calculates the measure of the 'tailedness' of the data by finding mean and standard of deviation
@@ -435,16 +476,19 @@ pub fn kurtosis[T](data []T) T {
435476
// kurtosis_mean_stddev calculates the measure of the 'tailedness' of the data
436477
// using the fourth moment the deviations, normalized by the sd
437478
pub fn kurtosis_mean_stddev[T](data []T, mean T, sd T) T {
479+
if data.len == 0 {
480+
return T(0)
481+
}
438482
mut avg := T(0) // find the fourth moment the deviations, normalized by the sd
439483
/*
440484
we use a recurrence relation to stably update a running value so
441485
* there aren't any large sums that can overflow
442486
*/
443487
for i, v in data {
444488
x := (v - mean) / sd
445-
avg += (x * x * x * x - avg) / (T(i) + 1.0)
489+
avg += T((x * x * x * x - avg) / (i + T(1)))
446490
}
447-
return avg - T(3.0)
491+
return avg - T(3)
448492
}
449493

450494
// skew calculates the mean and standard of deviation to find the skew from the data
@@ -457,31 +501,39 @@ pub fn skew[T](data []T) T {
457501

458502
// skew_mean_stddev calculates the skewness of data
459503
pub fn skew_mean_stddev[T](data []T, mean T, sd T) T {
504+
if data.len == 0 {
505+
return T(0)
506+
}
460507
mut skew := T(0) // find the sum of the cubed deviations, normalized by the sd.
461508
/*
462509
we use a recurrence relation to stably update a running value so
463510
* there aren't any large sums that can overflow
464511
*/
465512
for i, v in data {
466513
x := (v - mean) / sd
467-
skew += (x * x * x - skew) / (T(i) + 1.0)
514+
skew += T((x * x * x - skew) / (i + T(1)))
468515
}
469516
return skew
470517
}
471518

472519
// quantile calculates quantile points
473520
// for more reference
474521
// https://en.wikipedia.org/wiki/Quantile
475-
pub fn quantile[T](sorted_data []T, f T) T {
522+
pub fn quantile[T](sorted_data []T, f T) !T {
476523
if sorted_data.len == 0 {
477524
return T(0)
478525
}
479-
index := f * (T(sorted_data.len) - 1.0)
526+
index := f * (sorted_data.len - 1)
480527
lhs := int(index)
481-
delta := index - T(lhs)
482-
return if lhs == sorted_data.len - 1 {
483-
sorted_data[lhs]
528+
if lhs < 0 || lhs >= sorted_data.len {
529+
return error('index out of range')
530+
} else if lhs == sorted_data.len - 1 {
531+
return sorted_data[lhs]
484532
} else {
485-
(1.0 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)]
533+
if lhs >= sorted_data.len - 1 {
534+
return error('index out of range')
535+
}
536+
delta := index - T(lhs)
537+
return T((1 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)])
486538
}
487539
}

0 commit comments

Comments
 (0)