# Install basic requirements

In [None]:
pip install -U whylogs pandas

In [2]:
import whylogs
import pandas as pd

# Load example data

The example data is prepared from GitHub. You can use your own data if you want.

WhyLabs only requires whylogs API - your raw data never leaves your premise

In [3]:
full_data = pd.read_csv("https://github.com/whylabs/whylogs-examples/raw/4c52209fc5aca3a5a462242db2f1520452368670/python/lending_club_demo.csv")

full_data.describe()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,int_rate,installment,annual_inc,dti,delinq_2yrs,...,deferral_term,hardship_amount,hardship_length,hardship_dpd,orig_projected_additional_accrued_interest,hardship_payoff_balance_amount,hardship_last_payment_amount,settlement_amount,settlement_percentage,settlement_term
count,16434.0,0.0,16434.0,16434.0,16434.0,16434.0,16434.0,16434.0,16429.0,16434.0,...,53.0,53.0,53.0,53.0,46.0,53.0,53.0,120.0,120.0,120.0
mean,60324930.0,,14835.055373,14827.79299,14806.153567,13.219376,442.485652,77694.3,18.546474,0.338627,...,3.0,128.221321,3.0,12.679245,364.672174,10205.598679,183.086981,5540.947917,48.885917,10.366667
std,36242410.0,,8864.324654,8861.86405,8866.243509,4.695684,261.00453,76612.81,9.347356,0.909076,...,0.0,103.278475,0.0,9.54901,286.708985,7498.442069,175.239316,3958.539886,9.316292,9.125579
min,88046.0,,1000.0,1000.0,0.0,5.32,30.16,0.0,0.0,0.0,...,3.0,8.0,3.0,0.0,52.89,845.36,0.01,340.86,19.59,0.0
25%,29114010.0,,8000.0,8000.0,8000.0,9.76,253.1875,46000.0,12.14,0.0,...,3.0,54.03,3.0,0.0,164.0625,5018.9,46.76,2887.5,45.0,1.0
50%,64185570.0,,12800.0,12800.0,12800.0,12.74,379.76,65000.0,17.91,0.0,...,3.0,96.54,3.0,15.0,265.455,7677.93,117.69,4491.405,45.0,11.0
75%,91283160.0,,20000.0,20000.0,20000.0,15.99,588.08,92000.0,24.37,0.0,...,3.0,168.67,3.0,20.0,493.1475,12926.81,326.93,7557.495,50.79,18.0
max,120117500.0,,40000.0,40000.0,40000.0,30.99,1618.03,5499500.0,281.33,21.0,...,3.0,507.43,3.0,27.0,1522.29,30158.88,659.41,25000.0,91.2,24.0


# Creating batches of data

WhyLogs handles profiles in batches. What that means is:
* If two profiles are sent for the same day, we merge them into one profile (they are mergeable!!)
* To view profiles across different batches, you need to break data into smaller batches


**NOTE**: profiles will show up in real time if they are dated within the last 7 days.

Here we are creating 8 batches

In [4]:
# Create a list of dates
dates = ['Jan-2017', 'Feb-2017', 'Mar-2017', 'Apr-2017', 'May-2017', 'Jun-2017', 'Jul-2017', 'Aug-2017']

dates.reverse()
print(dates)

['Aug-2017', 'Jul-2017', 'Jun-2017', 'May-2017', 'Apr-2017', 'Mar-2017', 'Feb-2017', 'Jan-2017']


In [5]:
pdfs = []  # list with original profile

for date in dates:
    subset_data = full_data[full_data['issue_d']==date]
    pdfs.append(subset_data)

# Configure whylogs

whylogs, by default, does not send information to WhyLabs.

There are a few small steps you need to set up. If you haven't got the access key, please onboard with WhyLabs

In [6]:
from whylogs.app import Session
from whylogs.app.writers import WhyLabsWriter
import os
import datetime

In [7]:
import getpass

# Using development environment. Not needed normally
os.environ["WHYLABS_API_ENDPOINT"] = "https://songbird.development.whylabsdev.com"

# set your org-id here
print("Enter your WhyLabs Org ID")
os.environ["WHYLABS_DEFAULT_ORG_ID"] = input()
# set your API key here
print("Enter your WhyLabs API key")
os.environ["WHYLABS_API_KEY"] = getpass.getpass()
print("Using API Key ID: ", os.environ["WHYLABS_API_KEY"][0:10])

Enter your WhyLabs Org ID


 org-5953


Enter your WhyLabs API key


 ································································


Using API Key ID:  Em2W1PzHqc


## Creating session

Once the environments are set, let's create a whylogs session with a WhyLabs writer.

Note that you can add your local writer or S3 writer if you want here. Check out the API docs for more information.

In [8]:
# create WhyLabs session
writer = WhyLabsWriter("", formats=[])
session = Session(project="demo-project", pipeline="demo-pipeline", writers=[writer])

## Logging to WhyLabs

Ensure you have a **model ID** (also called **dataset ID**) before you start!

In [None]:
print("Enter your model ID from WhyLabs:")
model_id = input()
for i, df in enumerate(pdfs):
    # walking backwards. Each dataset has to map to a date to show up as a different batch
    # in WhyLabs
    dt = datetime.datetime.now(tz=datetime.timezone.utc) - datetime.timedelta(days=i)
    
    # Create new logger for date
    with session.logger(tags={"datasetId": model_id}, dataset_timestamp=dt) as ylog:
        print("Log data frame for ", dt)
        ylog.log_dataframe(df)

## Voila

* Now check the application to see if your **statistics** are in!!
* Also, run the above cell again for the same model ID, do you see the statistics changes in WhyLabs? Especially the counters?