forked from IssamLaradji/extreme-learning-machines
-
Notifications
You must be signed in to change notification settings - Fork 0
/
class_weight.py
109 lines (87 loc) · 3.68 KB
/
class_weight.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Authors: Andreas Mueller
# Manoj Kumar
# Issam H. Laradji
# License: BSD 3 clause
import numpy as np
def compute_class_weight(class_weight, classes, y):
"""Estimate class weights for unbalanced datasets.
Parameters
----------
class_weight : dict, 'auto' or None
If 'auto', class weights will be given inverse proportional
to the frequency of the class in the data.
If a dictionary is given, keys are classes and values
are corresponding class weights.
If None is given, the class weights will be uniform.
classes : ndarray
Array of the classes occurring in the data, as given by
``np.unique(y_org)`` with ``y_org`` the original class labels.
y : array-like, shape (n_samples,)
Array of original class labels per sample;
Returns
-------
class_weight_vect : ndarray, shape (n_classes,)
Array with class_weight_vect[i] the weight for i-th class
"""
# Import error caused by circular imports.
from sklearn.preprocessing import LabelEncoder
if class_weight is None or len(class_weight) == 0:
# uniform class weights
weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
elif class_weight == 'auto':
# Find the weight of each class as present in y.
le = LabelEncoder()
y_ind = le.fit_transform(y)
if not all(np.in1d(classes, le.classes_)):
raise ValueError("classes should have valid labels that are in y")
# inversely proportional to the number of samples in the class
recip_freq = 1. / np.bincount(y_ind)
weight = recip_freq[le.transform(classes)] / np.mean(recip_freq)
else:
# user-defined dictionary
weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
if not isinstance(class_weight, dict):
raise ValueError("class_weight must be dict, 'auto', or None,"
" got: %r" % class_weight)
for c in class_weight:
i = np.searchsorted(classes, c)
if classes[i] != c:
raise ValueError("Class label %d not present." % c)
else:
weight[i] = class_weight[c]
return weight
def compute_sample_weight(class_weight, classes, y):
"""Compute sample weights for unbalanced datasets.
Parameters
----------
class_weight : dict, 'auto' or None
If 'auto', class weights will be given inverse proportional
to the frequency of the class in the data.
If a dictionary is given, keys are classes and values
are corresponding class weights.
If None is given, the class weights will be uniform.
classes : ndarray
Array of the classes occurring in the data, as given by
``np.unique(y_org)`` with ``y_org`` the original class labels.
y : array-like, shape (n_samples,)
Array of original class labels per sample;
Returns
-------
sample_weight : ndarray, shape (n_samples,)
Array where sample_weight[i] denotes the weight for the i-th sample
"""
# Import error caused by circular imports.
from sklearn.preprocessing import LabelEncoder
if class_weight is None:
return None
weight = compute_class_weight(class_weight, classes, y)
sample_weight = np.zeros(y.shape[0])
# Check if every y sample belongs to a class in classes
if not all(np.in1d(np.unique(y), classes)):
raise ValueError("'y' has classes not in 'classes'.")
le = LabelEncoder()
y_ind = le.fit_transform(y)
for class_ in np.unique(y_ind):
indices = np.where(y_ind == class_)[0]
sample_weight[indices] = weight[class_]
return sample_weight