Skip to content

Commit

Permalink
Improve classification metric function (#240)
Browse files Browse the repository at this point in the history
* update

* Update installation.rst (#244)

* update Scaler transform function (fit_transform => transform)

* Update index.rst

Add mdl library maintenance announcement

* Update README.md

Update information about mdl library maintenance.

* update

* fix test case

Co-authored-by: Stephen Wu <39758581+stewu5@users.noreply.github.com>
  • Loading branch information
TsumiNa and stewu5 committed Oct 3, 2021
1 parent 5b23efe commit ad04f74
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 29 deletions.
12 changes: 7 additions & 5 deletions tests/models/test_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,8 @@ def predict(self, x_, y_):
val.step_forward(trainer=_Trainer(), step_info=step_info) # noqa
assert step_info['val_mae'] == regression_metrics(y, x)['mae']
assert set(step_info.keys()) == {
'i_epoch', 'val_mae', 'val_mse', 'val_rmse', 'val_r2', 'val_pearsonr', 'val_spearmanr',
'val_p_value', 'val_max_ae', 'train_loss'
'i_epoch', 'val_mae', 'val_mse', 'val_rmse', 'val_r2', 'val_pearsonr', 'val_spearmanr', 'val_p_value',
'val_max_ae', 'train_loss'
}


Expand Down Expand Up @@ -345,10 +345,12 @@ def predict(self, x_, y_): # noqa

step_info = OrderedDict(train_loss=0, i_epoch=1)
val.step_forward(trainer=_Trainer(), step_info=step_info) # noqa
assert step_info['val_f1'] == classification_metrics(y, x)['f1']
print(step_info)
assert step_info['val_accuracy'] == classification_metrics(y, x)['accuracy']
assert set(step_info.keys()) == {
'i_epoch', 'val_accuracy', 'val_f1', 'val_precision', 'val_recall', 'val_macro_f1',
'val_macro_precision', 'val_macro_recall', 'train_loss'
'i_epoch', 'val_accuracy', 'val_f1', 'val_precision', 'val_recall', 'val_macro_f1', 'val_macro_precision',
'val_macro_recall', 'val_weighted_f1', 'val_weighted_precision', 'val_weighted_recall', 'val_micro_f1',
'val_micro_precision', 'val_micro_recall', 'train_loss'
}


Expand Down
91 changes: 67 additions & 24 deletions xenonpy/model/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# license that can be found in the LICENSE file.

from collections import OrderedDict
from typing import Union
from typing import Union, List, Tuple

import numpy as np
import pandas as pd
Expand All @@ -14,8 +14,7 @@
__all__ = ['regression_metrics', 'classification_metrics']


def regression_metrics(y_true: Union[np.ndarray, pd.Series],
y_pred: Union[np.ndarray, pd.Series]) -> OrderedDict:
def regression_metrics(y_true: Union[np.ndarray, pd.Series], y_pred: Union[np.ndarray, pd.Series]) -> OrderedDict:
"""
Calculate most common regression scores.
See Also: https://scikit-learn.org/stable/modules/model_evaluation.html
Expand Down Expand Up @@ -63,26 +62,56 @@ def regression_metrics(y_true: Union[np.ndarray, pd.Series],


def classification_metrics(
y_true: Union[np.ndarray, pd.DataFrame, pd.Series],
y_pred: Union[np.ndarray, pd.Series]) -> OrderedDict:
y_true: Union[np.ndarray, pd.DataFrame, pd.Series],
y_pred: Union[np.ndarray, pd.Series],
*,
average: Union[None, List[str], Tuple[str]] = ('weighted', 'micro', 'macro'),
labels=None,
) -> dict:
"""
Calculate most common classification scores.
See Also: https://scikit-learn.org/stable/modules/model_evaluation.html
See also: https://scikit-learn.org/stable/modules/model_evaluation.html
Parameters
----------
y_true
True results.
y_pred
Predicted results.
average
This parameter is required for multiclass/multilabel targets. If None, the scores for each class are returned.
Otherwise, this determines the type of averaging performed on the data:
binary:
Only report results for the class specified by pos_label. This is applicable only if targets (y_{true,pred})
are binary.
micro:
Calculate metrics globally by counting the total true positives, false negatives and false positives.
macro:
Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into
account.
weighted:
Calculate metrics for each label, and find their average weighted by support (the number of true instances
for each label). This alters ``macro`` to account for label imbalance; it can result in an F-score that is
not between precision and recall.
labels
The set of labels to include when average != ``binary``, and their order if average is None.
Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority
negative class, while labels not present in the data will result in 0 components in a macro average.
For multilabel targets, labels are column indices.
By default, all labels in y_true and y_pred are used in sorted order.
Returns
-------
OrderedDict
An :class:`collections.OrderedDict` contains classification scores.
These scores will be calculated: ``accuracy``, ``f1``, ``precision``, ``recall``,
``macro_f1``, ``macro_precision``, and ``macro_recall``
These scores will always contains ``accuracy``, ``f1``, ``precision`` and ``recall``.
For multilabel targets, based on the selection of the ``average`` parameter, the **weighted**, **micro**,
and **macro** scores of ``f1`, ``precision``, and ``recall`` will be calculated.
"""
if average is not None and len(average) == 0:
raise ValueError('need average')

if len(y_true.shape) != 1:
y_true = np.argmax(y_true, 1)
if len(y_pred.shape) != 1:
Expand All @@ -92,19 +121,33 @@ def classification_metrics(
y_true = y_true[mask]
y_pred = y_pred[mask]

accuracy = accuracy_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
macro_f1 = f1_score(y_true, y_pred, average='macro')
macro_precision = precision_score(y_true, y_pred, average='macro')
macro_recall = recall_score(y_true, y_pred, average='macro')
return OrderedDict(
accuracy=accuracy,
f1=f1,
precision=precision,
recall=recall,
macro_f1=macro_f1,
macro_precision=macro_precision,
macro_recall=macro_recall,
ret = dict(accuracy=accuracy_score(y_true, y_pred))

ret.update(
f1=f1_score(y_true, y_pred, average=None, labels=labels),
precision=precision_score(y_true, y_pred, average=None, labels=labels),
recall=recall_score(y_true, y_pred, average=None, labels=labels),
)

if 'weighted' in average:
ret.update(
weighted_f1=f1_score(y_true, y_pred, average='weighted', labels=labels),
weighted_precision=precision_score(y_true, y_pred, average='weighted', labels=labels),
weighted_recall=recall_score(y_true, y_pred, average='weighted', labels=labels),
)

if 'micro' in average:
ret.update(
micro_f1=f1_score(y_true, y_pred, average='micro', labels=labels),
micro_precision=precision_score(y_true, y_pred, average='micro', labels=labels),
micro_recall=recall_score(y_true, y_pred, average='micro', labels=labels),
)

if 'macro' in average:
ret.update(
macro_f1=f1_score(y_true, y_pred, average='macro', labels=labels),
macro_precision=precision_score(y_true, y_pred, average='macro', labels=labels),
macro_recall=recall_score(y_true, y_pred, average='macro', labels=labels),
)

return ret

0 comments on commit ad04f74

Please sign in to comment.