Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ndcg refactor #1481

Merged
merged 8 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 15 additions & 13 deletions python/tests/experimental/api/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@ def test_log_batch_ranking_metrics_single_simple():
assert pandas_summary.loc["norm_dis_cumul_gain", "counts/n"] == 1
assert pandas_summary.loc["average_precision", "counts/n"] == 4
assert pandas_summary.loc["norm_dis_cumul_gain", "counts/n"] == 1
# ndcg = [1, 0, 0.63, 0.5]
jamie256 marked this conversation as resolved.
Show resolved Hide resolved
assert isclose(pandas_summary.loc["norm_dis_cumul_gain", "distribution/mean"], 0.53273, abs_tol=0.00001)


def test_log_batch_ranking_metrics_binary_simple():
binary_df = pd.DataFrame(
{"raw_predictions": [[True, False, True], [False, False, False], [True, True, False], [False, True, False]]}
)

result = log_batch_ranking_metrics(
data=binary_df, prediction_column="raw_predictions", k=2, convert_non_numeric=True
)
result = log_batch_ranking_metrics(data=binary_df, prediction_column="raw_predictions", k=2)
pandas_summary = result.view().to_pandas()

k = 2
Expand All @@ -76,6 +76,8 @@ def test_log_batch_ranking_metrics_binary_simple():
assert pandas_summary.loc["top_rank", "counts/n"] == 4
assert pandas_summary.loc["average_precision_k_" + str(k), "counts/n"] == 4
assert pandas_summary.loc["norm_dis_cumul_gain_k_" + str(k), "counts/n"] == 1
# ndcg@2 = [0.613147, 1.0, 1.0, 0.63093]
assert isclose(pandas_summary.loc["norm_dis_cumul_gain_k_" + str(k), "distribution/mean"], 0.81101, abs_tol=0.00001)


def test_log_batch_ranking_metrics_multiple_simple():
Expand Down Expand Up @@ -121,16 +123,14 @@ def test_log_batch_ranking_metrics_multiple_simple():
assert pandas_summary.loc["top_rank", "counts/n"] == 4
assert pandas_summary.loc["average_precision_k_" + str(k), "counts/n"] == 4
assert pandas_summary.loc["norm_dis_cumul_gain_k_" + str(k), "counts/n"] == 1

assert isclose(pandas_summary.loc[f"norm_dis_cumul_gain_k_{k}", "distribution/median"], 0.76244, abs_tol=0.00001)
# ndcg@4 = [0.9197, 0.0, 1.0, 0.386853]
assert isclose(pandas_summary.loc[f"norm_dis_cumul_gain_k_{k}", "distribution/median"], 0.57664, abs_tol=0.00001)


def test_log_batch_ranking_metrics_default_target():
multiple_df = pd.DataFrame({"raw_predictions": [[3, 2, 3, 0, 1, 2, 3, 2]]})

result = log_batch_ranking_metrics(
data=multiple_df, prediction_column="raw_predictions", k=3, convert_non_numeric=True
)
result = log_batch_ranking_metrics(data=multiple_df, prediction_column="raw_predictions", k=3)
pandas_summary = result.view().to_pandas()

k = 3
Expand All @@ -154,11 +154,13 @@ def test_log_batch_ranking_metrics_default_target():
assert pandas_summary.loc["top_rank", "counts/n"] == 1
assert pandas_summary.loc["average_precision_k_" + str(k), "counts/n"] == 1
assert pandas_summary.loc["norm_dis_cumul_gain_k_" + str(k), "counts/n"] == 1
# ndcg@3 = [0.9013]
assert isclose(pandas_summary.loc[f"norm_dis_cumul_gain_k_{k}", "distribution/median"], 0.90130, abs_tol=0.00001)


def test_log_batch_ranking_metrics_ranking_ndcg_wikipedia():
# From https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Example
ranking_df = pd.DataFrame({"targets": [[3, 2, 3, 0, 1, 2, 3, 2]], "predictions": [[7, 6, 5, 4, 3, 2, 1, 0]]})
ranking_df = pd.DataFrame({"targets": [[3, 2, 3, 0, 1, 2, 3, 2]], "predictions": [[3, 2, 3, 0, 1, 2]]})
jamie256 marked this conversation as resolved.
Show resolved Hide resolved

result = log_batch_ranking_metrics(data=ranking_df, prediction_column="predictions", target_column="targets", k=6)
pandas_summary = result.view().to_pandas()
Expand All @@ -168,19 +170,19 @@ def test_log_batch_ranking_metrics_ranking_ndcg_wikipedia():

def test_log_batch_ranking_metrics_ranking_ndcg_sklearn():
# From https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html
ranking_df = pd.DataFrame({"predictions": [[0.1, 0.2, 0.3, 4, 70]], "targets": [[10, 0, 0, 1, 5]]})
ranking_df = pd.DataFrame({"scores": [[0.1, 0.2, 0.3, 4, 70]], "true_relevance": [[10, 0, 0, 1, 5]]})

result = log_batch_ranking_metrics(data=ranking_df, prediction_column="predictions", target_column="targets")
result = log_batch_ranking_metrics(data=ranking_df, score_column="scores", target_column="true_relevance")
pandas_summary = result.view().to_pandas()

assert isclose(pandas_summary.loc["norm_dis_cumul_gain", "distribution/median"], 0.69569, abs_tol=0.00001)


def test_log_batch_ranking_metrics_ranking_ndcg_withk_sklearn():
# From https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html
ranking_df = pd.DataFrame({"predictions": [[0.05, 1.1, 1.0, 0.5, 0.0]], "targets": [[10, 0, 0, 1, 5]]})
ranking_df = pd.DataFrame({"scores": [[0.05, 1.1, 1.0, 0.5, 0.0]], "true_relevance": [[10, 0, 0, 1, 5]]})

result = log_batch_ranking_metrics(data=ranking_df, prediction_column="predictions", target_column="targets", k=4)
result = log_batch_ranking_metrics(data=ranking_df, score_column="scores", target_column="true_relevance", k=4)
pandas_summary = result.view().to_pandas()

assert isclose(pandas_summary.loc["norm_dis_cumul_gain_k_4", "distribution/median"], 0.35202, abs_tol=0.00001)
85 changes: 52 additions & 33 deletions python/whylogs/experimental/api/logger/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,16 @@
diagnostic_logger = logging.getLogger(__name__)


def _convert_to_int_if_bool(data: pd.core.frame.DataFrame, *columns: str) -> pd.core.frame.DataFrame:
for col in columns:
if all(isinstance(x, bool) for x in data[col]):
data[col] = data[col].apply(lambda x: 1 if x else 0)
return data


def log_batch_ranking_metrics(
data: pd.core.frame.DataFrame,
prediction_column: str,
prediction_column: Optional[str] = None,
target_column: Optional[str] = None,
score_column: Optional[str] = None,
k: Optional[int] = None,
Expand All @@ -22,26 +29,40 @@ def log_batch_ranking_metrics(
) -> ViewResultSet:
formatted_data = data.copy(deep=True) # TODO: does this have to be deep?

if prediction_column is None:
if score_column is not None and target_column is not None:
prediction_column = "__predictions"

# sort data[prediction_column] by score_column
def _sort_by_score(row):
FelipeAdachi marked this conversation as resolved.
Show resolved Hide resolved
return [x for _, x in sorted(zip(row[score_column], row[target_column]), reverse=True)]

# Ties are not being handled here
formatted_data[prediction_column] = formatted_data.apply(_sort_by_score, axis=1)
else:
raise ValueError("Either prediction_column or score+target columns must be specified")

relevant_cols = [prediction_column]

if target_column is None:
formatted_data = _convert_to_int_if_bool(formatted_data, prediction_column)
target_column = "__targets"
formatted_data[target_column] = formatted_data[prediction_column].apply(lambda x: list(range(len(x)))[::-1])
# formatted_data[target_column] = formatted_data[prediction_column].apply(lambda x: list(range(len(x)))[::-1])
FelipeAdachi marked this conversation as resolved.
Show resolved Hide resolved
# formatted_data[target_column] = [[] for _ in range(len(formatted_data[prediction_column]))]
FelipeAdachi marked this conversation as resolved.
Show resolved Hide resolved
formatted_data[target_column] = formatted_data[prediction_column]

relevant_cols.append(target_column)
if score_column is not None:
relevant_cols.append(score_column)

for col in relevant_cols:
if not formatted_data[col].apply(lambda x: type(x) == list).all():
# wrapping in lists because at least one isn't a list
# TODO: more error checking
formatted_data[col] = formatted_data[col].apply(lambda x: [x])

_max_k = formatted_data[prediction_column].apply(len).max()

formatted_data["count_at_k"] = formatted_data[relevant_cols].apply(
lambda row: sum([1 if pred_val in row[target_column] else 0 for pred_val in row[prediction_column][:k]]), axis=1
)

formatted_data["count_all"] = formatted_data[relevant_cols].apply(
lambda row: sum([1 if pred_val in row[target_column] else 0 for pred_val in row[prediction_column]]), axis=1
)
Expand All @@ -54,12 +75,10 @@ def get_top_rank(row):
return matches[0]

formatted_data["top_rank"] = formatted_data[relevant_cols].apply(get_top_rank, axis=1)

output_data = (formatted_data["count_at_k"] / (k if k else 1)).to_frame()
output_data.columns = ["precision" + ("_k_" + str(k) if k else "")]
output_data["recall" + ("_k_" + str(k) if k else "")] = formatted_data["count_at_k"] / formatted_data["count_all"]
output_data["top_rank"] = formatted_data["top_rank"]

ki_dict: pd.DataFrame = None
for ki in range(1, (k if k else _max_k) + 1):
ki_result = (
Expand All @@ -76,41 +95,43 @@ def get_top_rank(row):
ki_dict.columns = ["p@" + str(ki)]
else:
ki_dict["p@" + str(ki)] = ki_result

output_data["average_precision" + ("_k_" + str(k) if k else "")] = ki_dict.mean(axis=1)

def _convert_non_numeric(row_dict):
return (
[
row_dict[target_column].index(pred_val) if pred_val in row_dict[target_column] else -1
for pred_val in row_dict[prediction_column]
],
list(range(len(row_dict[prediction_column])))[::-1],
)
def _calc_non_numeric_relevance(row_dict):
prediction_relevance = []
ideal_relevance = []
for target_val in row_dict[prediction_column]:
ideal_relevance.append(1 if target_val in row_dict[target_column] else 0)
prediction_relevance.append(1 if target_val in row_dict[target_column] else 0)
for target_val in row_dict[target_column]:
if target_val not in row_dict[prediction_column]:
ideal_relevance.append(1)
return (prediction_relevance, ideal_relevance)

if convert_non_numeric:
formatted_data[[prediction_column, target_column]] = formatted_data.apply(
_convert_non_numeric, result_type="expand", axis=1
formatted_data[["predicted_relevance", "ideal_relevance"]] = formatted_data.apply(
_calc_non_numeric_relevance, result_type="expand", axis=1
)
else:
formatted_data["predicted_relevance"] = formatted_data[prediction_column]
formatted_data["ideal_relevance"] = formatted_data[target_column]

def _calculate_row_ndcg(row_dict, k):
predicted_order = np.array(row_dict[prediction_column]).argsort()[::-1]
target_order = np.array(row_dict[target_column]).argsort()[::-1]
dcg_vals = [
(rel / math.log(i + 2, 2)) for i, rel in enumerate(np.array(row_dict[target_column])[predicted_order][:k])
]
idcg_vals = [
(rel / math.log(i + 2, 2)) for i, rel in enumerate(np.array(row_dict[target_column])[target_order][:k])
]
predicted_relevances = row_dict["predicted_relevance"]
ideal_relevances = sorted(row_dict["ideal_relevance"], reverse=True)
dcg_vals = [(rel / math.log(i + 2, 2)) for i, rel in enumerate(predicted_relevances[:k])]
idcg_vals = [(rel / math.log(i + 2, 2)) for i, rel in enumerate(ideal_relevances[:k])]
if sum(idcg_vals) == 0:
return 1 # if there is no relevant data, not much the recommender can do
jamie256 marked this conversation as resolved.
Show resolved Hide resolved
return sum(dcg_vals) / sum(idcg_vals)

formatted_data["norm_dis_cumul_gain_k_" + str(k)] = formatted_data.apply(_calculate_row_ndcg, args=(k,), axis=1)

formatted_data["norm_dis_cumul_gain" + ("_k_" + str(k) if k else "")] = formatted_data.apply(
_calculate_row_ndcg, args=(k,), axis=1
)
mAP_at_k = ki_dict.mean()
hit_ratio = formatted_data["count_at_k"].apply(lambda x: bool(x)).sum() / len(formatted_data)
mrr = (1 / formatted_data["top_rank"]).replace([np.inf], np.nan).mean()
ndcg = formatted_data["norm_dis_cumul_gain_k_" + str(k)].mean()

ndcg = formatted_data["norm_dis_cumul_gain" + ("_k_" + str(k) if k else "")].mean()
result = log(pandas=output_data, schema=schema)
result = result.merge(
log(
Expand All @@ -123,8 +144,6 @@ def _calculate_row_ndcg(row_dict, k):
schema=schema,
)
)

if log_full_data:
result = result.merge(log(pandas=data, schema=schema))

return result