In [None]:
!pip install --upgrade "whylogs[whylabs]==1.1.36-dev0"

In [None]:
import pandas as pd
import whylogs as why

In [None]:
why.__version__

## Example data

We see three data formats as example data. Notice that the predictions AND targets columns can take lists or np.ndarrays of values.

Consider the predictions for each row to be a list of ranked results in decreasing rank importance order (best match is first). In the case of the `binary` example, we pass predictions with a boolean representing whether or not the prediction was successful, e.g., relevant search results.

Consider targets lists to be essentially sets of values that denote successful predictions. These can be values (matched based on equality).

In [None]:
# Single predictions
single_df = pd.DataFrame({"raw_predictions": [["cat", "pig", "elephant"], ["horse", "donkey", "robin"],
                                          ["cow", "pig", "giraffe"], ["pig", "dolphin", "elephant"]],
                          "raw_targets": ["cat", "dog", "pig", "elephant"]})

# Binary predictions (True representing relevant results, False representing irrelevant)
binary_df = pd.DataFrame({"raw_predictions": [[True, False, True], [False, False, False],
                                          [True, True, False], [False, True, False]]})

# Multiple predictions
multiple_df = pd.DataFrame({"raw_targets": [["cat", "elephant"], ["dog", "pig"],
                                            ["pig", "cow"], ["cat", "dolphin"]],
                            "raw_predictions": [["cat", "pig", "elephant"], ["horse", "donkey", "robin"],
                                                ["cow", "pig", "giraffe"], ["pig", "dolphin", "elephant"]]})

## Log using experimental batch ranking metrics API

It's noteworthy that this API is for batch metrics -- meaning that the metrics are not meant to be merged together. This means that multiple profiles from distributed machines, Spark, or multiple uploads within the model granularity window are discouraged.

Contrastingly, non-batch metrics APIs can be merged (but one is not yet released for ranking metrics).

In [None]:
# Log ranking metrics
from whylogs.experimental.api.logger import log_batch_ranking_metrics

results = log_batch_ranking_metrics(
    k=2,
    data=single_df,
    prediction_column="raw_predictions",
    target_column="raw_targets",
    log_full_data=True
)

# NOTE: If you've already ran why.log() on your input data, change log_full_data to False.

In [None]:
# View results
results.view().to_pandas().T

## Send profile summaries to WhyLabs platform
Required information can be found at https://hub.whylabsapp.com under the Settings > Model and Dataset Management page.

API keys are only shown once, so you may need to create a new one and save somewhere safe.

In [None]:
# Configure WhyLabs info, if needed
import getpass
import os

os.environ["WHYLABS_DEFAULT_ORG_ID"] = input("Enter your WhyLabs Org ID")
os.environ["WHYLABS_DEFAULT_DATASET_ID"] = input("Enter your WhyLabs Dataset ID")
os.environ["WHYLABS_API_KEY"] =  getpass.getpass("Enter your WhyLabs API key")
print("Using API Key ID: ", os.environ["WHYLABS_API_KEY"][0:10])

In [None]:
# Write the results to WhyLabs platform
results.writer("whylabs").write()

## WhyLabs UI Improvements

### Optional: Move to Outputs tab
The above metrics will default to the Input tab, but you may organize your data columns by instead displaying relevant columns in the Outputs tab.

There are two options to do so:
1. Using WhyLabsWriter API within whylogs
2. Using REST API

In [None]:
# Update relevant columns to output columns
from whylogs.api.writer.whylabs import WhyLabsWriter
k = 2 
column_names = ["mean_average_precision_k_"+str(k), "accuracy_k_"+str(k), 
                "mean_reciprocal_rank", "precision_k_"+str(k), "recall_k_"+str(k), 
                "top_rank", "average_precision_k_"+str(k)]

whylabs_writer = WhyLabsWriter()
try:
    whylabs_writer.tag_output_columns(column_names)
except:
    print("Issue with changing UI via whylogs. \nUse REST API at "
          "https://api.whylabsapp.com/swagger-ui#/Models/PutEntitySchemaColumn.")

### Show relevant columns in Performance tab

Visit our REST API page: https://api.whylabsapp.com/swagger-ui#/Models/PutEntitySchemaMetric.
Authenticate (top right) using API key. Scroll back to `PutEntitySchemaMetric` and enter your `org_id` and `dataset_id`.

Run three times with the following request bodies:

```
{
  "label": "Mean Average Precision, k=2",
  "column": "mean_average_precision_k_2",
  "defaultMetric": "median"
}
```

```
{
  "label": "Accuracy / Hit Rate, k=2",
  "column": "accuracy_k_2",
  "defaultMetric": "median"
}
```

```
{
  "label": "Mean Reciprocal Rank (MRR)",
  "column": "mean_reciprocal_rank",
  "defaultMetric": "median"
}
```