In [4]:
pip install -U "whylogs[whylabs]>1.0.9"

Defaulting to user installation because normal site-packages is not writeable
Collecting whylabs-client<0.4.0,>=0.3.0
  Using cached whylabs_client-0.3.0-py3-none-any.whl (183 kB)
Installing collected packages: whylabs-client
Successfully installed whylabs-client-0.3.0
Note: you may need to restart the kernel to use updated packages.


## ✔️ Setting the Environment Variables

In order to send our profile to WhyLabs, let's first set up an account. You can skip this if you already have an account and a model set up.

We will need three pieces of information:

- API token
- Organization ID
- Dataset ID (or model-id)

Go to https://whylabs.ai/free and grab a free account. You can follow along with the examples if you wish, but if you’re interested in only following this demonstration, you can go ahead and skip the quick start instructions.

After that, you’ll be prompted to create an API token. Once you create it, copy and store it locally. The second important information here is your org ID. Take note of it as well. After you get your API Token and Org ID, you can go to https://hub.whylabsapp.com/models to see your projects dashboard. You can create a new project and take note of it's ID (if it's a model project it will look like `model-xxxx`).

We'll now set the credentials as environment variables. The WhyLabs Writer will check for the existence of these variables in order to send the profiles to your dashboard.

In [7]:
import getpass
import os

# set your org-id here - should be something like "org-xxxx"
print("Enter your WhyLabs Org ID") 
os.environ["WHYLABS_DEFAULT_ORG_ID"] = input()

# set your datased_id (or model_id) here - should be something like "model-xxxx"
print("Enter your WhyLabs Dataset ID")
os.environ["WHYLABS_DEFAULT_DATASET_ID"] = input()


# set your API key here
print("Enter your WhyLabs API key")
os.environ["WHYLABS_API_KEY"] = getpass.getpass()
print("Using API Key ID: ", os.environ["WHYLABS_API_KEY"][0:10])

Enter your WhyLabs Org ID
Enter your WhyLabs Dataset ID
Enter your WhyLabs API key
Using API Key ID:  pUtqnO0hhC


## Fetching the Data

For demonstration, let's use data from lending club:

In [24]:
import pandas as pd

csv_url = f"https://whylabs-public.s3.us-west-2.amazonaws.com/demo_batches/input_batch_1.csv"
df = pd.read_csv(csv_url)

df.head()

Unnamed: 0.1,Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,...,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
0,12325,118159199,,12000.0,12000.0,12000.0,36 months,7.35,372.45,A,...,,,Cash,N,,,,,,
1,12327,117985648,,11200.0,11200.0,11200.0,60 months,19.03,290.72,D,...,,,Cash,N,,,,,,
2,12329,117821678,,11000.0,11000.0,11000.0,36 months,15.05,381.59,C,...,,,Cash,N,,,,,,
3,12330,118105415,,16000.0,16000.0,16000.0,36 months,16.02,562.68,C,...,,,Cash,N,,,,,,
4,12332,118115293,,20000.0,20000.0,20000.0,36 months,16.02,703.34,C,...,,,Cash,N,,,,,,


# Load example data batches and profile it

The example data is prepared from our public S3 bucket. You can use your own data if you want if you have multiple batches of data.

In [14]:
pdfs = []
for i in range(1, 8):
    path = f"https://whylabs-public.s3.us-west-2.amazonaws.com/demo_batches/input_batch_{i}.csv"
    print(f"Loading data from {path}")
    df = pd.read_csv(path)
    pdfs.append(df)

Loading data from https://whylabs-public.s3.us-west-2.amazonaws.com/demo_batches/input_batch_1.csv
Loading data from https://whylabs-public.s3.us-west-2.amazonaws.com/demo_batches/input_batch_2.csv
Loading data from https://whylabs-public.s3.us-west-2.amazonaws.com/demo_batches/input_batch_3.csv
Loading data from https://whylabs-public.s3.us-west-2.amazonaws.com/demo_batches/input_batch_4.csv
Loading data from https://whylabs-public.s3.us-west-2.amazonaws.com/demo_batches/input_batch_5.csv
Loading data from https://whylabs-public.s3.us-west-2.amazonaws.com/demo_batches/input_batch_6.csv
Loading data from https://whylabs-public.s3.us-west-2.amazonaws.com/demo_batches/input_batch_7.csv


In [None]:
from whylogs.api.writer.whylabs import WhyLabsWriter
from datetime import timezone, timedelta

writer = WhyLabsWriter()

for i, df in enumerate(pdfs):
    # walking backwards. Each dataset has to map to a date to show up as a different batch
    # in WhyLabs
    dt = datetime.now(tz=timezone.utc) - timedelta(days=i)
    
    # Create new logger for date
    profile = why.log(df).profile()
    profile.set_dataset_timestamp(dt)
    print("Log data frame for ", dt)

    # upload this day's profile to Whylabs so we have a number of days
    writer.write(profile)

In [25]:
# lets use the last profile as a reference profile
reference_profile = profile


The reference profile can be uploaded using a whylabs_client directly. First, we need to reference the profile as a file on disk, so write it out.

In [26]:
import tempfile

# write out the profile we just 
tmp_dir = tempfile.mkdtemp()
profile_path = os.path.join(tmp_dir, "reference-profile.bin")
reference_profile.view().write(profile_path)
print(f"Reference profile written to temporary file in preparation to upload to Whylabs as a reference profile: {profile_path}")

Reference profile written to temporary file in preparation to upload to Whylabs as a reference profile: /tmp/tmpx5uq4e8c/reference-profile.bin


In [None]:
import requests
import whylabs_client
from whylabs_client.api.log_api import LogApi
from whylabs_client.model.log_reference_request import LogReferenceRequest

# Now setup some of the inputs required to make the request to upload to Whylabs using the whylabs_client
whylabs_api_endpoint = "https://api.whylabsapp.com"
reference_profile_alias = "demo-reference-profile-in-v1"
api_key = os.environ["WHYLABS_API_KEY"]
print(f"Using API key ID: {api_key[:10]} and endpoint {whylabs_api_endpoint}")
config = whylabs_client.Configuration(host=whylabs_api_endpoint, api_key={"ApiKeyAuth": api_key}, discard_unknown_keys=True)
api_log_client = whylabs_client.ApiClient(config)
log_api = LogApi(api_log_client)

org_id = os.environ.get("WHYLABS_DEFAULT_ORG_ID")
dataset_id = os.environ.get("WHYLABS_DEFAULT_DATASET_ID")
dataset_timestamp = int(reference_profile.dataset_timestamp.timestamp() * 1000)
alias = reference_profile_alias

try:
    with open(profile_path, "rb") as f:
        request = LogReferenceRequest(dataset_timestamp=dataset_timestamp, alias=alias)
        print(f"Making initial call to log_reference to get upload url for {alias} and in [{org_id}] for [{dataset_id}] using request: {request}")
        async_result = log_api.log_reference(org_id=org_id, model_id=dataset_id, log_reference_request=request, async_req=True)
        result = async_result.get()
        upload_url = result["upload_url"]
        print(f"got async_result from log_reference, upload url is: {upload_url[:140]}")
        print(f"About to upload reference profile...")
        http_response = requests.put(upload_url, data=f.read())
        if http_response.ok:
            print(f"Done uploading reference profile with alias: {alias} to: {upload_url[:140]} with API token ID: {api_key[:10]}")
        else:
            print(
                f"Failed to upload reference profile with alias: {alias} to: {upload_url[:140]} with API token ID: {api_key[:10]} to "
                + f"{whylabs_api_endpoint}: unexpected HTTP status {http_response}"
            )
except Exception as e:
    print(f"Failed to upload reference profile: {e}.")