Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ranking metrics API improvements #1507

Merged
merged 6 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 7 additions & 6 deletions python/tests/experimental/api/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ def test_log_batch_ranking_metrics_single_simple():
}
)
result = log_batch_ranking_metrics(
data=single_df, prediction_column="raw_predictions", target_column="raw_targets", convert_non_numeric=True
data=single_df,
prediction_column="raw_predictions",
target_column="raw_targets",
)
pandas_summary = result.view().to_pandas()

Expand Down Expand Up @@ -53,10 +55,10 @@ def test_log_batch_ranking_metrics_single_simple():

def test_log_batch_ranking_metrics_binary_simple():
binary_df = pd.DataFrame(
{"raw_predictions": [[True, False, True], [False, False, False], [True, True, False], [False, True, False]]}
{"raw_targets": [[True, False, True], [False, False, False], [True, True, False], [False, True, False]]}
)

result = log_batch_ranking_metrics(data=binary_df, prediction_column="raw_predictions", k=2)
result = log_batch_ranking_metrics(data=binary_df, target_column="raw_targets", k=2)
pandas_summary = result.view().to_pandas()

k = 2
Expand Down Expand Up @@ -109,7 +111,6 @@ def test_log_batch_ranking_metrics_multiple_simple():
prediction_column="raw_predictions",
target_column="raw_targets",
k=k,
convert_non_numeric=True,
)
pandas_summary = result.view().to_pandas()

Expand Down Expand Up @@ -139,9 +140,9 @@ def test_log_batch_ranking_metrics_multiple_simple():


def test_log_batch_ranking_metrics_default_target():
multiple_df = pd.DataFrame({"raw_predictions": [[3, 2, 3, 0, 1, 2, 3, 2]]})
multiple_df = pd.DataFrame({"raw_targets": [[3, 2, 3, 0, 1, 2, 3, 2]]})

result = log_batch_ranking_metrics(data=multiple_df, prediction_column="raw_predictions", k=3)
result = log_batch_ranking_metrics(data=multiple_df, target_column="raw_targets", k=3)
pandas_summary = result.view().to_pandas()

k = 3
Expand Down
45 changes: 38 additions & 7 deletions python/whylogs/experimental/api/logger/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,18 +131,39 @@ def _calculate_average_precisions(
return averages


def _all_strings(data: pd.Series) -> bool:
richard-rogers marked this conversation as resolved.
Show resolved Hide resolved
return all([all([isinstance(y, str) for y in x]) for x in data])


def log_batch_ranking_metrics(
data: pd.core.frame.DataFrame,
prediction_column: Optional[str] = None,
target_column: Optional[str] = None,
score_column: Optional[str] = None,
k: Optional[int] = None,
convert_non_numeric=False,
schema: Union[DatasetSchema, None] = None,
log_full_data: bool = False,
) -> ViewResultSet:
"""Log ranking metrics for a batch of data.

You can call the function several ways:
- Pass both prediction_column and target_column.
- The named columns contain lists of strings. In this case, the prediction column contains the
items the model has predicted are relevant, and the target column contains the items that
are actually relevant. In this case, relevance is boolean.

- The prediction column contains lists of integers and the target column contains lists of numbers
or booleans. The value at the i-th position in the predicted list is the predicted rank of the i-th
element of the domain. The value at the i-th position in the target list is the true relevance score of the
i-th element of the domain. The score can be numeric or boolean. Higher scores indicate higher relevance.

- Pass both target_column and score_column. The value at the i-th position in the target list is the true relevance
score of the i-th element of the domain. The value at the i-th position in the score list is the predicted
relevance of the i-th element of the domain.
FelipeAdachi marked this conversation as resolved.
Show resolved Hide resolved

- Pass only target_column. The target column contians lists of numbers or booleans. The list entries are the true
relevance of the items predicted by the model in prediction order.

Parameters
----------
data : pd.core.frame.DataFrame
Expand All @@ -157,9 +178,6 @@ def log_batch_ranking_metrics(
k : Optional[int], optional
Consider the top k ranks for metrics calculation.
If `None`, use all outputs, by default None
convert_non_numeric : bool, optional
Indicates whether prediction/target columns are non-numeric.
If True, prediction/target should be strings, by default False
schema : Union[DatasetSchema, None], optional
Defines the schema for tracking metrics in whylogs, by default None
log_full_data : bool, optional
Expand Down Expand Up @@ -226,19 +244,28 @@ def log_batch_ranking_metrics(

binary_single_df = pd.DataFrame(
{
"raw_predictions": [
"raw_targets": [
[True, False, True], # First recommended item: Relevant, Second: Not relevant, Third: Relevant
[False, False, False], # None of the recommended items are relevant
[True, True, False], # First and second recommended items are relevant
]
}
)

result = log_batch_ranking_metrics(data=binary_single_df, prediction_column="raw_predictions", k=3)
result = log_batch_ranking_metrics(data=binary_single_df, target_column="raw_targets", k=3)

"""
formatted_data = data.copy(deep=True) # TODO: does this have to be deep?

if score_column is not None and prediction_column is not None:
raise ValueError("Cannot specify both score_column and prediction_column")

if prediction_column is None and score_column is None and target_column is not None:
richard-rogers marked this conversation as resolved.
Show resolved Hide resolved
# https://github.com/whylabs/whylogs/issues/1505
# The column use logic is complex, so just swapping them here for this case
# rather than unraveling all the use cases.
prediction_column, target_column = target_column, prediction_column
richard-rogers marked this conversation as resolved.
Show resolved Hide resolved

if prediction_column is None:
if score_column is not None and target_column is not None:
prediction_column = "__predictions"
Expand All @@ -248,7 +275,7 @@ def log_batch_ranking_metrics(
lambda row: list(np.argsort(np.argsort(-np.array(row))) + 1)
)
else:
raise ValueError("Either prediction_column or score+target columns must be specified")
raise ValueError("Either target_column or score+target columns must be specified")

relevant_cols = [prediction_column]

Expand Down Expand Up @@ -280,6 +307,10 @@ def log_batch_ranking_metrics(
if k and k < 1:
raise ValueError("k must be a positive integer")

convert_non_numeric = _all_strings(formatted_data[prediction_column]) and _all_strings(
formatted_data[target_column]
)

row_wise_functions = RowWiseMetrics(target_column, prediction_column, convert_non_numeric)
formatted_data["count_at_k"] = formatted_data.apply(row_wise_functions.relevant_counter, args=(k,), axis=1)
formatted_data["count_all"] = formatted_data.apply(row_wise_functions.relevant_counter, args=(_max_k,), axis=1)
Expand Down