Skip to content

Commit

Permalink
Merge 5a9ba08 into 8c61613
Browse files Browse the repository at this point in the history
  • Loading branch information
lalmei committed Aug 23, 2021
2 parents 8c61613 + 5a9ba08 commit e457abf
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 20 deletions.
20 changes: 1 addition & 19 deletions examples/notebooks/constraints.ipynb
Original file line number Diff line number Diff line change
@@ -1,23 +1,5 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Volumes/Workspace/whylogs-python/notebooks\n"
]
}
],
"source": [
"! cd .. && python setup.py develop >/dev/null\n",
"! pwd\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -759,7 +741,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.8.8"
}
},
"nbformat": 4,
Expand Down
8 changes: 7 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,14 @@ def s3_all_config_metadata_path():


@pytest.fixture(scope="session")
def df_lending_club():
def local_config_path():
config_path = os.path.join(_MY_DIR, os.pardir, "testdata", ".whylogs_local.yaml")
return config_path


@pytest.fixture(scope="session")
def df_lending_club():
# just the top 50
df = pd.read_csv(os.path.join(_MY_DIR, os.pardir, "testdata", "lending_club_1000.csv"))
return df.head(50)

Expand Down
85 changes: 85 additions & 0 deletions tests/unit/core/statistics/test_constraints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import json

import pytest

from whylogs.app.config import load_config
from whylogs.app.session import session_from_config
from whylogs.core.statistics.constraints import (
DatasetConstraints,
Op,
SummaryConstraint,
ValueConstraint,
_summary_funcs1,
_value_funcs,
)
from whylogs.proto import Op
from whylogs.util.protobuf import message_to_json


def test_value_summary_serialization():

for each_op, _ in _value_funcs.items():

value = ValueConstraint(each_op, 3.6)
msg_value = value.to_protobuf()
json_value = json.loads(message_to_json(msg_value))
assert json_value["name"] == "value " + Op.Name(each_op) + " 3.6"
assert pytest.approx(json_value["value"], 0.001) == 3.6
assert json_value["op"] == Op.Name(each_op)
assert json_value["verbose"] == False

for each_op, _ in _summary_funcs1.items():
# constraints may have an optional name
sum_constraint = SummaryConstraint("min", each_op, 300000, name="< 30K")
msg_sum_const = sum_constraint.to_protobuf()
json_summary = json.loads(message_to_json(msg_sum_const))

assert json_summary["name"] == "< 30K"
assert pytest.approx(json_summary["value"], 0.1) == 300000
assert json_summary["firstField"] == "min"
assert json_summary["op"] == str(Op.Name(each_op))
assert json_summary["verbose"] == False


def test_value_constraints(df_lending_club, local_config_path):

conforming_loan = ValueConstraint(Op.LT, 548250)
smallest_loan = ValueConstraint(Op.GT, 2500.0, verbose=True)

high_fico = ValueConstraint(Op.GT, 4000)

dc = DatasetConstraints(None, value_constraints={"loan_amnt": [conforming_loan, smallest_loan], "fico_range_high": [high_fico]})

config = load_config(local_config_path)
session = session_from_config(config)

profile = session.log_dataframe(df_lending_club, "test.data", constraints=dc)
session.close()
report = dc.report()

assert len(report) == 2
print(report)
# make sure it checked every value
for each_feat in report:
for each_constraint in each_feat[1]:
assert each_constraint[1] == 50

assert report[1][1][0][2] == 50


def test_summary_constraints(df_lending_club, local_config_path):

non_negative = SummaryConstraint("min", Op.GE, 0)

dc = DatasetConstraints(None, summary_constraints={"annual_inc": [non_negative]})
config = load_config(local_config_path)
session = session_from_config(config)
profile = session.log_dataframe(df_lending_club, "test.data", constraints=dc)
session.close()
report = r = profile.apply_summary_constraints()

assert len(report) == 1
# make sure it checked every value
for each_feat in report:
for each_constraint in each_feat[1]:
assert each_constraint[1] == 1
24 changes: 24 additions & 0 deletions tests/unit/viz/test_profile_viewer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import datetime
import os

import numpy as np

from whylogs.app.config import load_config
from whylogs.app.session import session_from_config
from whylogs.viz import profile_viewer


def test_profile_viewer(tmpdir, local_config_path):

config = load_config(local_config_path)
session = session_from_config(config)

with session.logger("mytestytest", dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger:
for _ in range(5):
logger.log({"uniform_integers": np.random.randint(0, 50)})
logger.log({"nulls": None})

profile = logger.profile
result = profile_viewer(profiles=[profile], output_path=tmpdir + "my_test.html")
assert os.path.exists(tmpdir + "my_test.html")
assert result == tmpdir + "my_test.html"

0 comments on commit e457abf

Please sign in to comment.