Skip to content

Commit

Permalink
add method for setting profiles
Browse files Browse the repository at this point in the history
  • Loading branch information
Jirayr-Solvee committed Feb 21, 2022
1 parent 3c09971 commit 044d45a
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 30 deletions.
4 changes: 2 additions & 2 deletions examples/Profile_Viewer_In_Notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@
"metadata": {},
"outputs": [],
"source": [
"# should we allow multiple profiles?\n",
"visualization = NotebookProfileViewer(target_profiles=[target_profile], reference_profiles=[reference_profile])"
"visualization = NotebookProfileViewer()\n",
"visualization.set_profiles(target_profile=target_profile, reference_profile=reference_profile)"
]
},
{
Expand Down
47 changes: 22 additions & 25 deletions src/whylogs/viz/jupyter_notebook_viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import json
import logging
import os
from typing import List

from IPython.core.display import HTML

Expand Down Expand Up @@ -38,16 +37,6 @@ class NotebookProfileViewer:
CONSTRAINTS_REPORT_TEMPLATE_NAME: "750PX",
}

def __init__(self, target_profiles: List[DatasetProfile] = None, reference_profiles: List[DatasetProfile] = None):
self.target_profiles = target_profiles
self.reference_profiles = reference_profiles
if self.target_profiles:
if len(self.target_profiles) > 1:
logger.warning("More than one profile not implemented yet, default to first profile in the list ")
self.target_profile_jsons = [message_to_json(each_prof.to_summary()) for each_prof in self.target_profiles]
if self.reference_profiles:
self.reference_profile_jsons = [message_to_json(each_prof.to_summary()) for each_prof in self.reference_profiles]

def __get_template_path(self, html_file_name):
template_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer", html_file_name))
return template_path
Expand All @@ -70,9 +59,9 @@ def __display_feature_chart(self, feature_names, template_name, preferred_cell_h
if type(feature_names) is not list:
feature_names = [feature_names]
template = self.__get_compiled_template(template_name)
if self.reference_profiles:
target_profile_columns = json.loads(self.target_profile_jsons[0]).get("columns")
reference_profile_columns = json.loads(self.reference_profile_jsons[0]).get("columns")
if self._reference_profile:
target_profile_columns = json.loads(self._target_profile_json).get("columns")
reference_profile_columns = json.loads(self._reference_profile_json).get("columns")
target_profile_features, reference_profile_features = {}, {}
for feature_name in feature_names:
target_profile_features[feature_name] = target_profile_columns.get(feature_name)
Expand All @@ -91,11 +80,19 @@ def __display_rendered_template(self, template, template_name, height):
iframe = f"""<div></div><iframe srcdoc="{html.escape(template)}" width=100% height={height} frameBorder=0></iframe>"""
return HTML(iframe)

def set_profiles(self, target_profile: DatasetProfile = None, reference_profile: DatasetProfile = None):
self._target_profile = target_profile
self._reference_profile = reference_profile
if self._target_profile:
self._target_profile_json = message_to_json(self._target_profile.to_summary())
if self._reference_profile:
self._reference_profile_json = message_to_json(self._reference_profile.to_summary())

def summary_drift_report(self, preferred_cell_height=None):
reference_profile = add_drift_val_to_ref_profile_json(self.target_profiles[0], self.reference_profiles[0], json.loads(self.reference_profile_jsons[0]))
reference_profile = add_drift_val_to_ref_profile_json(self._target_profile, self._reference_profile, json.loads(self._reference_profile_json))
template = self.__get_compiled_template(self.SUMMARY_REPORT_TEMPLATE_NAME)
profiles_summary = {"profile_from_whylogs": self.target_profile_jsons[0]}
if self.reference_profiles:
profiles_summary = {"profile_from_whylogs": self._target_profile_json}
if self._reference_profile:
profiles_summary["reference_profile_from_whylogs"] = json.dumps(reference_profile)
return self.__display_rendered_template(template(profiles_summary), self.SUMMARY_REPORT_TEMPLATE_NAME, preferred_cell_height)

Expand All @@ -110,12 +107,12 @@ def differenced_distribution_chart(self, feature_names, preferred_cell_height=No

def feature_statistics(self, feature_name, profile="reference", preferred_cell_height=None):
template = self.__get_compiled_template(self.FEATURE_STATISTICS_TEMPLATE_NAME)
if self.reference_profiles and profile.lower() == "reference":
selected_profile_json = self.reference_profile_jsons
selected_profile = self.reference_profiles[0].columns
if self._reference_profile and profile.lower() == "reference":
selected_profile_json = self._reference_profile_json
selected_profile = self._reference_profile.columns
else:
selected_profile_json = self.target_profile_jsons
selected_profile = self.target_profiles[0].columns
selected_profile_json = self._target_profile_json
selected_profile = self._target_profile.columns
if selected_profile.get(feature_name).schema_tracker.to_summary().inferred_type.type in numerical_types:
rendered_template = template(
{
Expand All @@ -136,10 +133,10 @@ def constraints_report(self, constraints, preferred_cell_height=None):

def download(self, html, preferred_path=None, html_file_name=None):
if not html_file_name:
if self.reference_profiles:
html_file_name = self.reference_profiles[0].dataset_timestamp
if self._reference_profile:
html_file_name = self._reference_profile.dataset_timestamp
else:
html_file_name = self.target_profiles[0].dataset_timestamp
html_file_name = self._target_profile.dataset_timestamp
if preferred_path:
path = os.path.expanduser(preferred_path)
else:
Expand Down
6 changes: 3 additions & 3 deletions src/whylogs/viz/utils/profile_viz_calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,21 @@ def add_drift_val_to_ref_profile_json(target_profile, reference_profile, referen
return reference_profile_json


def add_feature_statistics(feature, profile_jsons, feature_name):
def add_feature_statistics(feature, profile_json, feature_name):
"""
Calculates different values for feature statistics
Parameters
----------
feature:
profile_jsons: Profile summary serialized json
profile_json: Profile summary serialized json
feature_name: Name of feature
Returns
-------
feature: Feature data with appended values for statistics report
"""
profile_features = json.loads(profile_jsons[0])
profile_features = json.loads(profile_json)
feature_with_statistics = {}
feature_with_statistics["properties"] = profile_features.get("properties")
feature_with_statistics[feature_name] = profile_features.get("columns").get(feature_name)
Expand Down

0 comments on commit 044d45a

Please sign in to comment.