-
Notifications
You must be signed in to change notification settings - Fork 3
/
kendall_w.py
80 lines (63 loc) · 2.23 KB
/
kendall_w.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# TODO:
# - Perform series of tests to make compute_w robust
# - Check that items are RANKED for each annotator:
# -> Exception or Warning ?
# import pandas as pd
import warnings
def compute_w(data):
""" Computes kendall's W from a list of rating lists.
0 indicates no agreement and 1 indicates unanimous agreement.
Parameters
---------
data : list
List of lists with shape (n_items * n_annotators)
Return
---------
W : float
Kendall's W [0:1]
Example
---------
annotations = [
[1, 1, 1, 2], # item 1
[2, 2, 2, 3], # item 2
[3, 3, 3, 1], # item 3
]
# Annotator #4 disagrees with the other annotators
# Annotators #1, #2, #3 agree
W = kendall_w(annotations)
# output: 0.4375
"""
assert isinstance(data, list), "You must pass a python list,\
{} found".format(type(data))
assert all(isinstance(x, list) for x in data), "You must pass a list of\
python lists as input." # To test
assert all(isinstance(x[y], int) for x in data for y in range(len(x))), "You must\
pass a list of lists of integers." # To test
# Number of annotators
m = len(data[0])
# Tests
if not all(len(i) == m for i in data):
raise ValueError("Items must all have the same number of annotators.\
At least one sublist of argument 'data' has different length than\
the first sublist.")
if m <= 1:
raise ValueError("Kendall's W is irrevelent for only one annotator,\
try adding more lists to argument 'data'.")
if m == 2:
warnings.warn("Kendall's W is adapted to measure agreement between\
more than two annotators. The results might not be reliable in\
this case.", Warning)
# Number of items
n = len(data)
# Tests
if n <= 1:
raise ValueError("Kendall's W is irrevelent for only one item,\
try adding more sublists to argument 'data'.")
# Sum of each item ranks
sums = [sum(x) for x in data]
# Mean of ranking sums
Rbar = sum(sums) / n
# Sum of squared deviations from the mean
S = sum([(sums[x] - Rbar) ** 2 for x in range(n)])
W = (12 * S) / (m ** 2 * (n ** 3 - n))
return W