Skip to content

Commit

Permalink
ranking ndcg (#1461)
Browse files Browse the repository at this point in the history
## Description

Add ranking ndcg to log_batch_ranking_metrics


- [x] I have reviewed the [Guidelines for Contributing](CONTRIBUTING.md)
and the [Code of Conduct](CODE_OF_CONDUCT.md).

---------

Co-authored-by: Bernease Herman <bernease@gmail.com>
Co-authored-by: felipe207 <felipe@whylabs.ai>
  • Loading branch information
3 people committed Jan 31, 2024
1 parent df90f72 commit b626744
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 27 deletions.
108 changes: 93 additions & 15 deletions python/tests/experimental/api/test_logger.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from math import isclose

from whylogs.core.stubs import pd
from whylogs.experimental.api.logger import log_batch_ranking_metrics

Expand All @@ -15,37 +17,42 @@ def test_log_batch_ranking_metrics_single_simple():
}
)
result = log_batch_ranking_metrics(
k=1, data=single_df, prediction_column="raw_predictions", target_column="raw_targets"
data=single_df, prediction_column="raw_predictions", target_column="raw_targets", convert_non_numeric=True
)
pandas_summary = result.view().to_pandas()

k = 1
column_names = [
"mean_average_precision_k_" + str(k),
"accuracy_k_" + str(k),
"mean_average_precision",
"accuracy",
"mean_reciprocal_rank",
"precision_k_" + str(k),
"recall_k_" + str(k),
"precision",
"recall",
"top_rank",
"average_precision_k_" + str(k),
"average_precision",
"norm_dis_cumul_gain",
]
for col in column_names:
assert col in pandas_summary.index
assert pandas_summary.loc["mean_average_precision_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["accuracy_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["mean_average_precision", "counts/n"] == 1
assert pandas_summary.loc["accuracy", "counts/n"] == 1
assert pandas_summary.loc["mean_reciprocal_rank", "counts/n"] == 1
assert pandas_summary.loc["precision_k_" + str(k), "counts/n"] == 4
assert pandas_summary.loc["recall_k_" + str(k), "counts/n"] == 4
assert pandas_summary.loc["precision", "counts/n"] == 4
assert pandas_summary.loc["recall", "counts/n"] == 4
assert pandas_summary.loc["top_rank", "counts/n"] == 4
assert pandas_summary.loc["average_precision_k_" + str(k), "counts/n"] == 4
assert pandas_summary.loc["average_precision", "counts/n"] == 4
assert pandas_summary.loc["norm_dis_cumul_gain", "counts/n"] == 1
assert pandas_summary.loc["average_precision", "counts/n"] == 4
assert pandas_summary.loc["norm_dis_cumul_gain", "counts/n"] == 1


def test_log_batch_ranking_metrics_binary_simple():
binary_df = pd.DataFrame(
{"raw_predictions": [[True, False, True], [False, False, False], [True, True, False], [False, True, False]]}
)

result = log_batch_ranking_metrics(k=2, data=binary_df, prediction_column="raw_predictions")
result = log_batch_ranking_metrics(
data=binary_df, prediction_column="raw_predictions", k=2, convert_non_numeric=True
)
pandas_summary = result.view().to_pandas()

k = 2
Expand All @@ -57,6 +64,7 @@ def test_log_batch_ranking_metrics_binary_simple():
"recall_k_" + str(k),
"top_rank",
"average_precision_k_" + str(k),
"norm_dis_cumul_gain_k_" + str(k),
]
for col in column_names:
assert col in pandas_summary.index
Expand All @@ -67,6 +75,7 @@ def test_log_batch_ranking_metrics_binary_simple():
assert pandas_summary.loc["recall_k_" + str(k), "counts/n"] == 4
assert pandas_summary.loc["top_rank", "counts/n"] == 4
assert pandas_summary.loc["average_precision_k_" + str(k), "counts/n"] == 4
assert pandas_summary.loc["norm_dis_cumul_gain_k_" + str(k), "counts/n"] == 1


def test_log_batch_ranking_metrics_multiple_simple():
Expand All @@ -81,13 +90,17 @@ def test_log_batch_ranking_metrics_multiple_simple():
],
}
)
k = 4

result = log_batch_ranking_metrics(
k=3, data=multiple_df, prediction_column="raw_predictions", target_column="raw_targets"
data=multiple_df,
prediction_column="raw_predictions",
target_column="raw_targets",
k=k,
convert_non_numeric=True,
)
pandas_summary = result.view().to_pandas()

k = 3
column_names = [
"mean_average_precision_k_" + str(k),
"accuracy_k_" + str(k),
Expand All @@ -96,6 +109,7 @@ def test_log_batch_ranking_metrics_multiple_simple():
"recall_k_" + str(k),
"top_rank",
"average_precision_k_" + str(k),
"norm_dis_cumul_gain_k_" + str(k),
]
for col in column_names:
assert col in pandas_summary.index
Expand All @@ -106,3 +120,67 @@ def test_log_batch_ranking_metrics_multiple_simple():
assert pandas_summary.loc["recall_k_" + str(k), "counts/n"] == 4
assert pandas_summary.loc["top_rank", "counts/n"] == 4
assert pandas_summary.loc["average_precision_k_" + str(k), "counts/n"] == 4
assert pandas_summary.loc["norm_dis_cumul_gain_k_" + str(k), "counts/n"] == 1

assert isclose(pandas_summary.loc[f"norm_dis_cumul_gain_k_{k}", "distribution/median"], 0.76244, abs_tol=0.00001)


def test_log_batch_ranking_metrics_default_target():
multiple_df = pd.DataFrame({"raw_predictions": [[3, 2, 3, 0, 1, 2, 3, 2]]})

result = log_batch_ranking_metrics(
data=multiple_df, prediction_column="raw_predictions", k=3, convert_non_numeric=True
)
pandas_summary = result.view().to_pandas()

k = 3
column_names = [
"mean_average_precision_k_" + str(k),
"accuracy_k_" + str(k),
"mean_reciprocal_rank",
"precision_k_" + str(k),
"recall_k_" + str(k),
"top_rank",
"average_precision_k_" + str(k),
"norm_dis_cumul_gain_k_" + str(k),
]
for col in column_names:
assert col in pandas_summary.index
assert pandas_summary.loc["mean_average_precision_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["accuracy_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["mean_reciprocal_rank", "counts/n"] == 1
assert pandas_summary.loc["precision_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["recall_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["top_rank", "counts/n"] == 1
assert pandas_summary.loc["average_precision_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["norm_dis_cumul_gain_k_" + str(k), "counts/n"] == 1


def test_log_batch_ranking_metrics_ranking_ndcg_wikipedia():
# From https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Example
ranking_df = pd.DataFrame({"targets": [[3, 2, 3, 0, 1, 2, 3, 2]], "predictions": [[7, 6, 5, 4, 3, 2, 1, 0]]})

result = log_batch_ranking_metrics(data=ranking_df, prediction_column="predictions", target_column="targets", k=6)
pandas_summary = result.view().to_pandas()

assert isclose(pandas_summary.loc["norm_dis_cumul_gain_k_6", "distribution/median"], 0.785, abs_tol=0.01)


def test_log_batch_ranking_metrics_ranking_ndcg_sklearn():
# From https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html
ranking_df = pd.DataFrame({"predictions": [[0.1, 0.2, 0.3, 4, 70]], "targets": [[10, 0, 0, 1, 5]]})

result = log_batch_ranking_metrics(data=ranking_df, prediction_column="predictions", target_column="targets")
pandas_summary = result.view().to_pandas()

assert isclose(pandas_summary.loc["norm_dis_cumul_gain", "distribution/median"], 0.69569, abs_tol=0.00001)


def test_log_batch_ranking_metrics_ranking_ndcg_withk_sklearn():
# From https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html
ranking_df = pd.DataFrame({"predictions": [[0.05, 1.1, 1.0, 0.5, 0.0]], "targets": [[10, 0, 0, 1, 5]]})

result = log_batch_ranking_metrics(data=ranking_df, prediction_column="predictions", target_column="targets", k=4)
pandas_summary = result.view().to_pandas()

assert isclose(pandas_summary.loc["norm_dis_cumul_gain_k_4", "distribution/median"], 0.35202, abs_tol=0.00001)
57 changes: 45 additions & 12 deletions python/whylogs/experimental/api/logger/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import math
from typing import Optional, Union

from whylogs.api.logger import log
Expand All @@ -10,11 +11,12 @@


def log_batch_ranking_metrics(
k: int,
data: pd.core.frame.DataFrame,
prediction_column: str,
target_column: Optional[str] = None,
score_column: Optional[str] = None,
k: Optional[int] = None,
convert_non_numeric=False,
schema: Union[DatasetSchema, None] = None,
log_full_data: bool = False,
) -> ViewResultSet:
Expand All @@ -23,8 +25,7 @@ def log_batch_ranking_metrics(
relevant_cols = [prediction_column]
if target_column is None:
target_column = "__targets"
formatted_data[target_column] = True
formatted_data[target_column].apply(lambda x: [x])
formatted_data[target_column] = formatted_data[prediction_column].apply(lambda x: list(range(len(x)))[::-1])
relevant_cols.append(target_column)
if score_column is not None:
relevant_cols.append(score_column)
Expand All @@ -35,6 +36,8 @@ def log_batch_ranking_metrics(
# TODO: more error checking
formatted_data[col] = formatted_data[col].apply(lambda x: [x])

_max_k = formatted_data[prediction_column].apply(len).max()

formatted_data["count_at_k"] = formatted_data[relevant_cols].apply(
lambda row: sum([1 if pred_val in row[target_column] else 0 for pred_val in row[prediction_column][:k]]), axis=1
)
Expand All @@ -52,13 +55,13 @@ def get_top_rank(row):

formatted_data["top_rank"] = formatted_data[relevant_cols].apply(get_top_rank, axis=1)

output_data = (formatted_data["count_at_k"] / k).to_frame()
output_data.columns = ["precision_k_" + str(k)]
output_data["recall_k_" + str(k)] = formatted_data["count_at_k"] / formatted_data["count_all"]
output_data = (formatted_data["count_at_k"] / (k if k else 1)).to_frame()
output_data.columns = ["precision" + ("_k_" + str(k) if k else "")]
output_data["recall" + ("_k_" + str(k) if k else "")] = formatted_data["count_at_k"] / formatted_data["count_all"]
output_data["top_rank"] = formatted_data["top_rank"]

ki_dict: pd.DataFrame = None
for ki in range(1, k + 1):
for ki in range(1, (k if k else _max_k) + 1):
ki_result = (
formatted_data[relevant_cols].apply(
lambda row: sum(
Expand All @@ -74,18 +77,48 @@ def get_top_rank(row):
else:
ki_dict["p@" + str(ki)] = ki_result

output_data["average_precision_k_" + str(k)] = ki_dict.mean(axis=1)
mAP_at_k = output_data["average_precision_k_" + str(k)].mean(axis=0)
output_data["average_precision" + ("_k_" + str(k) if k else "")] = ki_dict.mean(axis=1)

def _convert_non_numeric(row_dict):
return (
[
row_dict[target_column].index(pred_val) if pred_val in row_dict[target_column] else -1
for pred_val in row_dict[prediction_column]
],
list(range(len(row_dict[prediction_column])))[::-1],
)

if convert_non_numeric:
formatted_data[[prediction_column, target_column]] = formatted_data.apply(
_convert_non_numeric, result_type="expand", axis=1
)

def _calculate_row_ndcg(row_dict, k):
predicted_order = np.array(row_dict[prediction_column]).argsort()[::-1]
target_order = np.array(row_dict[target_column]).argsort()[::-1]
dcg_vals = [
(rel / math.log(i + 2, 2)) for i, rel in enumerate(np.array(row_dict[target_column])[predicted_order][:k])
]
idcg_vals = [
(rel / math.log(i + 2, 2)) for i, rel in enumerate(np.array(row_dict[target_column])[target_order][:k])
]
return sum(dcg_vals) / sum(idcg_vals)

formatted_data["norm_dis_cumul_gain_k_" + str(k)] = formatted_data.apply(_calculate_row_ndcg, args=(k,), axis=1)

mAP_at_k = ki_dict.mean()
hit_ratio = formatted_data["count_at_k"].apply(lambda x: bool(x)).sum() / len(formatted_data)
mrr = (1 / output_data["top_rank"]).replace([np.inf], np.nan).mean()
mrr = (1 / formatted_data["top_rank"]).replace([np.inf], np.nan).mean()
ndcg = formatted_data["norm_dis_cumul_gain_k_" + str(k)].mean()

result = log(pandas=output_data, schema=schema)
result = result.merge(
log(
row={
"mean_average_precision_k_" + str(k): mAP_at_k,
"accuracy_k_" + str(k): hit_ratio,
"mean_average_precision" + ("_k_" + str(k) if k else ""): mAP_at_k,
"accuracy" + ("_k_" + str(k) if k else ""): hit_ratio,
"mean_reciprocal_rank": mrr,
"norm_dis_cumul_gain" + ("_k_" + str(k) if k else ""): ndcg,
},
schema=schema,
)
Expand Down

0 comments on commit b626744

Please sign in to comment.