-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
ttest.go
170 lines (148 loc) · 5.19 KB
/
ttest.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package mathstats
import (
"errors"
"math"
)
// A LocationHypothesis specifies the alternative hypothesis of a
// location test such as a t-test or a Mann-Whitney U-test. The
// default (zero) value is to test against the alternative hypothesis
// that they differ.
type LocationHypothesis int
const (
// LocationLess specifies the alternative hypothesis that the
// location of the first sample is less than the second. This
// is a one-tailed test.
LocationLess LocationHypothesis = -1
// LocationDiffers specifies the alternative hypothesis that
// the locations of the two samples are not equal. This is a
// two-tailed test.
LocationDiffers LocationHypothesis = 0
// LocationGreater specifies the alternative hypothesis that
// the location of the first sample is greater than the
// second. This is a one-tailed test.
LocationGreater LocationHypothesis = 1
)
// A TTestResult is the result of a t-test.
type TTestResult struct {
// N1 and N2 are the sizes of the input samples. For a
// one-sample t-test, N2 is 0.
N1, N2 int
// T is the value of the t-statistic for this t-test.
T float64
// DoF is the degrees of freedom for this t-test.
DoF float64
// AltHypothesis specifies the alternative hypothesis tested
// by this test against the null hypothesis that there is no
// difference in the means of the samples.
AltHypothesis LocationHypothesis
// P is p-value for this t-test for the given null hypothesis.
P float64
}
func newTTestResult(n1, n2 int, t, dof float64, alt LocationHypothesis) *TTestResult {
dist := TDist{dof}
var p float64
switch alt {
case LocationDiffers:
p = 2 * (1 - dist.CDF(math.Abs(t)))
case LocationLess:
p = dist.CDF(t)
case LocationGreater:
p = 1 - dist.CDF(t)
}
return &TTestResult{N1: n1, N2: n2, T: t, DoF: dof, AltHypothesis: alt, P: p}
}
// A TTestSample is a sample that can be used for a one or two sample
// t-test.
type TTestSample interface {
Weight() float64
Mean() float64
Variance() float64
}
var (
ErrSampleSize = errors.New("sample is too small")
ErrZeroVariance = errors.New("sample has zero variance")
ErrMismatchedSamples = errors.New("samples have different lengths")
)
// TwoSampleTTest performs a two-sample (unpaired) Student's t-test on
// samples x1 and x2. This is a test of the null hypothesis that x1
// and x2 are drawn from populations with equal means. It assumes x1
// and x2 are independent samples, that the distributions have equal
// variance, and that the populations are normally distributed.
func TwoSampleTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
n1, n2 := x1.Weight(), x2.Weight()
if n1 == 0 || n2 == 0 {
return nil, ErrSampleSize
}
v1, v2 := x1.Variance(), x2.Variance()
if v1 == 0 && v2 == 0 {
return nil, ErrZeroVariance
}
dof := n1 + n2 - 2
v12 := ((n1-1)*v1 + (n2-1)*v2) / dof
t := (x1.Mean() - x2.Mean()) / math.Sqrt(v12*(1/n1+1/n2))
return newTTestResult(int(n1), int(n2), t, dof, alt), nil
}
// TwoSampleWelchTTest performs a two-sample (unpaired) Welch's t-test
// on samples x1 and x2. This is like TwoSampleTTest, but does not
// assume the distributions have equal variance.
func TwoSampleWelchTTest(x1, x2 TTestSample, alt LocationHypothesis) (*TTestResult, error) {
n1, n2 := x1.Weight(), x2.Weight()
if n1 <= 1 || n2 <= 1 {
// TODO: Can we still do this with n == 1?
return nil, ErrSampleSize
}
v1, v2 := x1.Variance(), x2.Variance()
if v1 == 0 && v2 == 0 {
return nil, ErrZeroVariance
}
dof := math.Pow(v1/n1+v2/n2, 2) /
(math.Pow(v1/n1, 2)/(n1-1) + math.Pow(v2/n2, 2)/(n2-1))
s := math.Sqrt(v1/n1 + v2/n2)
t := (x1.Mean() - x2.Mean()) / s
return newTTestResult(int(n1), int(n2), t, dof, alt), nil
}
// PairedTTest performs a two-sample paired t-test on samples x1 and
// x2. If μ0 is non-zero, this tests if the average of the difference
// is significantly different from μ0. If x1 and x2 are identical,
// this returns nil.
func PairedTTest(x1, x2 []float64, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
if len(x1) != len(x2) {
return nil, ErrMismatchedSamples
}
if len(x1) <= 1 {
// TODO: Can we still do this with n == 1?
return nil, ErrSampleSize
}
dof := float64(len(x1) - 1)
diff := make([]float64, len(x1))
for i := range x1 {
diff[i] = x1[i] - x2[i]
}
sd := StdDev(diff)
if sd == 0 {
// TODO: Can we still do the test?
return nil, ErrZeroVariance
}
t := (Mean(diff) - μ0) * math.Sqrt(float64(len(x1))) / sd
return newTTestResult(len(x1), len(x2), t, dof, alt), nil
}
// OneSampleTTest performs a one-sample t-test on sample x. This tests
// the null hypothesis that the population mean is equal to μ0. This
// assumes the distribution of the population of sample means is
// normal.
func OneSampleTTest(x TTestSample, μ0 float64, alt LocationHypothesis) (*TTestResult, error) {
n, v := x.Weight(), x.Variance()
if n == 0 {
return nil, ErrSampleSize
}
if v == 0 {
// TODO: Can we still do the test?
return nil, ErrZeroVariance
}
dof := n - 1
t := (x.Mean() - μ0) * math.Sqrt(n) / math.Sqrt(v)
return newTTestResult(int(n), 0, t, dof, alt), nil
}