In [None]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Just a simple convenience function to send the internal python
# logs to stdout.  Definitely not required
from whylabs.logs import display_logging
display_logging('debug')

## Load data

In [None]:
# Load some data
df = pd.read_csv('lending_club_1000.csv')
# Split into a test & training set
df_training = df.sample(int(len(df) * 0.8), replace=False, random_state=123)
df_test = df.drop(df_training.index)
df.head()

## Log dataset sketches

In [None]:
from whylabs.logs import get_or_create_session, get_logger

s = get_or_create_session(
    output_to_disk=True,
    output_flat_summary=True,
    output_to_cloud=False, # For now, we won't output to the cloud
    bucket='whylabs-isaac', # although we can still configure cloud output
    cloud_output_folder='test/logging',
)
logger = get_logger()

In [None]:
# Available config options
s.config

#### Log dataframe

In [None]:
logger.log_dataframe(df_training, 'training.data')
# Then you could do whatever training or calculations you'd like

### Inspect profiles/statistics

In [None]:
# You can also capture the logger response and interact with the generated
# profiles

# Log the test data
response = logger.log_dataframe(df_test, 'test.data')
# Inspect the dataset profile sketch
prof = response['profile']
summary = prof.flat_summary()
stats_df = summary['summary']
stats_df

In [None]:
# See one of the inspected histograms
hist_data = summary['hist']['fico_range_high']
bins = hist_data['bin_edges']
n = hist_data['counts']
bin_width = np.diff(bins)

plt.bar(bins[0:-1], n, bin_width, align='edge')

## Load logged data

In [None]:
import glob

### Load flat table statistics

In [None]:
# Load the flat table statistics from the 'test.data' dataset
fnames = glob.glob('whylogs/test.data/dataset_summary/flat_table/dataset_summary*.csv')
fnames.sort()
# Load the most recent file
test_stats = pd.read_csv(fnames[-1])
test_stats

### Load the full dataset profile sketch

In [None]:
from whylabs.logs.core import datasetprofile

# Load a dataset profile from the 'test.data' dataset
fnames = glob.glob('whylogs/test.data/dataset_profile/protobuf/*.bin')
fnames.sort()
with open(fnames[-1], 'rb') as fp:
    test_prof = datasetprofile.DatasetProfile.from_protobuf_string(fp.read())

---

In [None]:
# Not necessary, but you can reset the whylogs session if you want
from whylabs.logs.app.session import reset_session
reset_session()