<a href="https://colab.research.google.com/gist/natiska/478c2dfeb347f266209e05186fc6e2e5/upload_ref_profile_and_create_monitor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Writing Segmented Reference Profiles and Updating the Monitor Configuration accordingly

In [4]:
# Note: you may need to restart the kernel to use updated packages.
%pip install -U -q 'whylogs[datasets]'

## ✔️ Setting the Environment Variables


In [5]:
import getpass
import os

# set your org-id here - should be something like "org-xxxx"
print("Enter your WhyLabs Org ID")
os.environ["WHYLABS_DEFAULT_ORG_ID"] = "org-qEZQUE"

# set your datased_id (or model_id) here - should be something like "model-xxxx"
print("Enter your WhyLabs Dataset ID")
os.environ["WHYLABS_DEFAULT_DATASET_ID"] = "model-27"

# set your API key here
print("Enter your WhyLabs API key")
os.environ["WHYLABS_API_KEY"] = getpass.getpass()

print("Using API Key ID: ", os.environ["WHYLABS_API_KEY"][0:10])

Enter your WhyLabs Org ID
Enter your WhyLabs Dataset ID
Enter your WhyLabs API key
··········
Using API Key ID:  iSnmQQOV6g


## Fetching the Data
For demonstration, let's use data for transactions from a small retail business:

In [6]:
import pandas as pd

csv_url = "https://whylabs-public.s3.us-west-2.amazonaws.com/datasets/tour/current.csv"
df = pd.read_csv(csv_url)

df.head()

Unnamed: 0,Transaction ID,Customer ID,Quantity,Item Price,Total Tax,Total Amount,Store Type,Product Category,Product Subcategory,Gender,Transaction Type,Age
0,T14259136777,C274477,1,148.9,15.6345,164.5345,TeleShop,Electronics,Audio and video,F,Purchase,37.0
1,T7313351894,C267568,4,48.1,20.202,212.602,Flagship store,Home and kitchen,Furnishing,M,Purchase,25.0
2,T37745642681,C267098,1,10.9,1.1445,12.0445,Flagship store,Footwear,Mens,F,Purchase,42.0
3,T13861409908,C271608,2,135.2,28.392,298.792,MBR,Footwear,Mens,F,Purchase,43.0
4,T58956348529,C272484,4,144.3,60.606,637.806,TeleShop,Clothing,Mens,F,Purchase,39.0



## Defining segments

TO DO: specify the column to segment the data on.

In [7]:
df["Store Type"].value_counts()

e-Shop            392
TeleShop          206
Flagship store    175
MBR               172
Name: Store Type, dtype: int64

In [8]:
segmentation_colum = "Store Type"

In [9]:
from whylogs.core.segmentation_partition import segment_on_column

column_segments = segment_on_column(segmentation_colum)

## 📊 Profiling the Data

Let's profile the data with whylogs:

In [10]:
import whylogs as why
from whylogs.core.schema import DatasetSchema
from datetime import datetime, timezone

current_date = datetime.now(timezone.utc)
results = why.log(df, schema=DatasetSchema(segments=column_segments))

In [12]:
results.count

4

In [13]:
first_segment = results.segments()[0]
segmented_profile = results.profile(first_segment)
print("Profile view for segment {}".format(first_segment.key))
segmented_profile.view().to_pandas()

Profile view for segment ('Flagship store',)


Unnamed: 0_level_0,cardinality/est,cardinality/lower_1,cardinality/upper_1,counts/inf,counts/n,counts/nan,counts/null,distribution/max,distribution/mean,distribution/median,...,type,types/boolean,types/fractional,types/integral,types/object,types/string,types/tensor,frequent_items/frequent_strings,ints/max,ints/min
column,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Age,24.000001,24.0,24.0012,0,175,0,0,43.0,30.754286,29.0,...,SummaryType.COLUMN,0,175,0,0,0,0,,,
Customer ID,171.000072,171.0,171.00861,0,175,0,0,,0.0,,...,SummaryType.COLUMN,0,0,0,0,175,0,"[FrequentItem(value='C270833', est=2, upper=2,...",,
Gender,2.0,2.0,2.0001,0,175,0,0,,0.0,,...,SummaryType.COLUMN,0,0,0,0,175,0,"[FrequentItem(value='F', est=91, upper=91, low...",,
Item Price,165.000067,165.0,165.008306,0,175,0,0,150.0,80.862286,81.8,...,SummaryType.COLUMN,0,175,0,0,0,0,,,
Product Category,6.0,6.0,6.0003,0,175,0,0,,0.0,,...,SummaryType.COLUMN,0,0,0,0,175,0,"[FrequentItem(value='Books', est=42, upper=42,...",,
Product Subcategory,18.000001,18.0,18.000899,0,175,0,0,,0.0,,...,SummaryType.COLUMN,0,0,0,0,175,0,"[FrequentItem(value='Mens', est=23, upper=23, ...",,
Quantity,10.0,10.0,10.0005,0,175,0,0,5.0,2.291429,3.0,...,SummaryType.COLUMN,0,0,175,0,0,0,"[FrequentItem(value='2', est=35, upper=35, low...",5.0,-5.0
Store Type,1.0,1.0,1.00005,0,175,0,0,,0.0,,...,SummaryType.COLUMN,0,0,0,0,175,0,"[FrequentItem(value='Flagship store', est=175,...",,
Total Amount,170.000071,170.0,170.008559,0,175,0,0,806.65,193.382577,170.612,...,SummaryType.COLUMN,0,175,0,0,0,0,,,
Total Tax,168.00007,168.0,168.008458,0,175,0,0,77.07,25.44024,20.034,...,SummaryType.COLUMN,0,175,0,0,0,0,,,


## 🖊 Writing the Reference Profile to WhyLabs

Defining the Reference Profile name:

In [14]:
reference_profile_name = "training_data_v1"

In [15]:
from whylogs.api.writer.whylabs import WhyLabsWriter

results.writer("whylabs").option(reference_profile_name=reference_profile_name).write()

[(True, 'training_data_v1')]

## Updating the monitor baseline with a new Reference Profile ID

TO DO: Specify the name of the monitor to be updated. This code assumes that a monitor with the given name already exists.

In [25]:
monitor_name = "stddev-monitor"

In [17]:
!pip install -qq whylabs-toolkit

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/47.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.0/81.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [18]:
os.environ["ORG_ID"] = os.environ["WHYLABS_DEFAULT_ORG_ID"]
os.environ["DATASET_ID"] = os.environ["WHYLABS_DEFAULT_DATASET_ID"]

In [26]:
from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
from whylabs_toolkit.monitor.models import *

In [27]:
monitor_setup = MonitorSetup(
    monitor_id=monitor_name

)



In [29]:
if monitor_setup.config:
    print(f"Updating monitor {monitor_name}" in {os.environ['DATASET_ID']})
    monitor_setup.config.baseline=ReferenceProfileId(profileId=reference_profile_name)
else:
    print(f"Creating monitor {monitor_name} in {os.environ['DATASET_ID']}")
    monitor_setup.config = StddevConfig(
        metric=SimpleColumnMetric.median,
        baseline=ReferenceProfileId(profileId=reference_profile_name)
      )
    monitor_setup.actions = [EmailRecipient(id="natalias-email", destination="natalia+test@whylabs.ai")]

Creating monitor stddev-monitor in model-27


In [30]:
monitor_setup.config

StddevConfig(schemaVersion=None, params=None, metric=<SimpleColumnMetric.median: 'median'>, maxUpperThreshold=None, minLowerThreshold=None, type=<AlgorithmType.stddev: 'stddev'>, factor=3.0, minBatchSize=1, baseline=ReferenceProfileId(datasetId=None, type=<BaselineType.Reference: 'Reference'>, profileId='training_data_v1'))

In [31]:
monitor_setup.apply()


In [33]:
manager = MonitorManager(
    setup=monitor_setup
)

manager.save()