diff --git a/examples/Dataset_Profiler_Viewer.ipynb b/examples/Dataset_Profiler_Viewer.ipynb index 48b2c8809b..768ba8de08 100644 --- a/examples/Dataset_Profiler_Viewer.ipynb +++ b/examples/Dataset_Profiler_Viewer.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "1f0ab34a", "metadata": {}, "outputs": [], @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "fc04cc42", "metadata": {}, "outputs": [], @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 4, "id": "7ebc0b5f", "metadata": {}, "outputs": [], @@ -86,22 +86,24 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "2e018520", "metadata": {}, "outputs": [], "source": [ "session = get_or_create_session()\n", - "with session.logger(\"mytestytest\",dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger:\n", - " for _ in range(500):\n", - " logger.log({\"uniform_integers\": np.random.randint(0,50)})\n", - " logger.log({\"strings\": fake.name()})\n", - " logger.log({\"mixture_distribution\": np.random.choice(distribution, 1)[0]})\n", - " logger.log({\"nulls\": None})\n", - " logger.log({\"moah_data\":1})\n", - " logger.log({\"moah_data\":1})\n", - " logger.log({\"moah_data\":5})\n", - " profile=logger.profile" + "def profile_generator():\n", + " with session.logger(\"mytestytest\",dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger:\n", + " for _ in range(500):\n", + " logger.log({\"uniform_integers\": np.random.randint(0,50)})\n", + " logger.log({\"strings\": fake.name()})\n", + " logger.log({\"mixture_distribution\": np.random.choice(distribution, 1)[0]})\n", + " logger.log({\"nulls\": None})\n", + " logger.log({\"moah_data\":1})\n", + " logger.log({\"moah_data\":1})\n", + " logger.log({\"moah_data\":5})\n", + " return logger.profile\n", + "profile=profile_generator()" ] }, { @@ -123,29 +125,54 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "e293634d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/var/folders/pr/f715zv8x17b1v5vwydgv2gq40000gq/T/tmpabxku502.html'" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "profile_viewer(profiles=[profile], output_path=None)" ] }, + { + "cell_type": "markdown", + "id": "e9cebe5c", + "metadata": {}, + "source": [ + "### Logging reference profile" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d36563e9", + "metadata": {}, + "outputs": [], + "source": [ + "reference_profile=profile_generator() " + ] + }, + { + "cell_type": "markdown", + "id": "56dcaef9", + "metadata": {}, + "source": [ + "### Add reference profile to viewer and open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a4eb38c", + "metadata": {}, + "outputs": [], + "source": [ + "profile_viewer(profiles=[profile], reference_profiles=[reference_profile], output_path=None,)" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "d09732c5", + "id": "1b1d89e2", "metadata": {}, "outputs": [], "source": [] @@ -153,9 +180,9 @@ ], "metadata": { "kernelspec": { - "display_name": "whylogs-dev", + "display_name": "Python 3", "language": "python", - "name": "whylogs-dev" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -167,7 +194,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.8.8" } }, "nbformat": 4, diff --git a/examples/Profile_Viewer_In_Notebook.ipynb b/examples/Profile_Viewer_In_Notebook.ipynb new file mode 100644 index 0000000000..ee2876a96a --- /dev/null +++ b/examples/Profile_Viewer_In_Notebook.ipynb @@ -0,0 +1,503 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6b36ea8e", + "metadata": {}, + "source": [ + "# 📈 Whylogs Profile Visualization\n", + "### gives you various ways to simplify the proccess of detecting dataset drift. Bellow you can see the instructions for creating dummy dataset and list of currently available visual reports." + ] + }, + { + "cell_type": "markdown", + "id": "be56fca7", + "metadata": {}, + "source": [ + "### 🗂️Install dependencies and make imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29e48da5", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install faker\n", + "!pip install pybars3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d40e474", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import datetime\n", + "from collections import OrderedDict\n", + "from faker import Faker" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c3ff3a2", + "metadata": {}, + "outputs": [], + "source": [ + "from whylogs import get_or_create_session\n", + "from whylogs.viz import NotebookProfileViewer\n", + "from whylogs.core.statistics.constraints import (\n", + " columnValuesInSetConstraint,\n", + " containsEmailConstraint,\n", + " minBetweenConstraint,\n", + " maxLessThanEqualConstraint,\n", + " parametrizedKSTestPValueGreaterThanConstraint,\n", + " columnsMatchSetConstraint,\n", + " columnPairValuesInSetConstraint,\n", + " sumOfRowValuesOfMultipleColumnsEqualsConstraint,\n", + " columnValuesUniqueWithinRow,\n", + " DatasetConstraints\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "272f6054", + "metadata": {}, + "source": [ + "### ♻️Create dummy data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41f8a0f9", + "metadata": {}, + "outputs": [], + "source": [ + "locales = OrderedDict([\n", + " ('en-US', 1),\n", + " ('fr-FR', 2),\n", + " ('ja_JP', 2),\n", + "])\n", + "fake = Faker(locales)\n", + "distribution = np.concatenate((np.random.normal(0.1, 0.1, 500), np.random.normal(0.6, 0.2, 500)))" + ] + }, + { + "cell_type": "markdown", + "id": "200841f2", + "metadata": {}, + "source": [ + "### 📝 Log it\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea4ce6a6", + "metadata": {}, + "outputs": [], + "source": [ + "session = get_or_create_session()\n", + "def profile_generator():\n", + " with session.logger(\"mytestytest\", dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger:\n", + " for _ in range(500):\n", + " logger.log({\"uniform_integers\": np.random.randint(0,50)})\n", + " logger.log({\"strings\": fake.name()})\n", + " logger.log({\"mixture_distribution\": np.random.choice(distribution, 1)[0]}) \n", + " logger.log({\"1mixture_distribution\": np.random.choice(distribution, 1)[0]})\n", + " logger.log({\"2mixture_distribution\": np.random.choice(distribution, 1)[0]})\n", + " logger.log({\"3mixture_distribution\": np.random.choice(distribution, 1)[0]})\n", + " logger.log({\"4mixture_distribution\": np.random.choice(distribution, 1)[0]})\n", + " logger.log({\"nulls\": None})\n", + " logger.log({\"moah_data\": 1})\n", + " logger.log({\"moah_data\": 1})\n", + " logger.log({\"moah_data\": 5})\n", + "\n", + " return logger.profile\n", + " \n", + "target_profile = profile_generator()\n", + "\n", + "reference_profile = profile_generator()" + ] + }, + { + "cell_type": "markdown", + "id": "8390f360", + "metadata": {}, + "source": [ + "## ✨ Vizualize profiles with Whylogs" + ] + }, + { + "cell_type": "markdown", + "id": "562fbacf", + "metadata": {}, + "source": [ + "### Initialization\n", + "Initialize Profile viewer by passing profiles for which you want to get the visualizations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7dbff14f", + "metadata": {}, + "outputs": [], + "source": [ + "visualization = NotebookProfileViewer()\n", + "visualization.set_profiles(target_profile=target_profile, reference_profile=reference_profile)" + ] + }, + { + "cell_type": "markdown", + "id": "87ea53b6", + "metadata": {}, + "source": [ + "###### `*target_profiles`: Profiled dataset which will be reffered as `target`\n", + "###### `*reference_profiles`: Profiled dataset which will be reffered as `reference`" + ] + }, + { + "cell_type": "markdown", + "id": "9908a9f9", + "metadata": {}, + "source": [ + "### Summary Drift Report" + ] + }, + { + "cell_type": "markdown", + "id": "650d44c4", + "metadata": {}, + "source": [ + "You can get summary drift report for `target` and `reference` profiles features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "911147eb", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "visualization.summary_drift_report(preferred_cell_height=\"1000px\")" + ] + }, + { + "cell_type": "markdown", + "id": "c4be405d", + "metadata": {}, + "source": [ + "###### `preferred_cell_height`: height in `px` for generated visualization cell " + ] + }, + { + "cell_type": "markdown", + "id": "4dd9e690", + "metadata": {}, + "source": [ + "### Double histogram" + ] + }, + { + "cell_type": "markdown", + "id": "28b512ca", + "metadata": {}, + "source": [ + "You can get double histogram for numerical features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "250bb662", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "visualization.double_histogram(feature_names=\"uniform_integers\")" + ] + }, + { + "cell_type": "markdown", + "id": "af84a332", + "metadata": {}, + "source": [ + "###### `*feature_names`: string or list of strings containing names of the features for which you want to see double histogram\n", + "###### `preferred_cell_height`: height in `px` for generated visualization cell " + ] + }, + { + "cell_type": "markdown", + "id": "193e306d", + "metadata": {}, + "source": [ + "### Distribution chart" + ] + }, + { + "cell_type": "markdown", + "id": "6f4931fb", + "metadata": {}, + "source": [ + "You can get distirubtion chart for categorical features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccc7c73e", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "visualization.distribution_chart(feature_names=\"strings\")" + ] + }, + { + "cell_type": "markdown", + "id": "cd97db45", + "metadata": {}, + "source": [ + "###### `*feature_names`: string or list of strings containing names of the features for which you want to see double histogram\n", + "###### `preferred_cell_height`: height in `px` for generated visualization cell " + ] + }, + { + "cell_type": "markdown", + "id": "408e1c5b", + "metadata": {}, + "source": [ + "### Differenced distribution chart" + ] + }, + { + "cell_type": "markdown", + "id": "4d65b0b4", + "metadata": {}, + "source": [ + "You can get differenced distirubtion chart for categorical features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d26b2fa2", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "visualization.difference_distribution_chart(feature_names=\"strings\")" + ] + }, + { + "cell_type": "markdown", + "id": "92451fbf", + "metadata": {}, + "source": [ + "###### `*feature_names`: string or list of strings containing names of the features for which you want to see double histogram\n", + "###### `preferred_cell_height`: height in `px` for generated visualization cell " + ] + }, + { + "cell_type": "markdown", + "id": "78db1538", + "metadata": {}, + "source": [ + "### Feature Statistics" + ] + }, + { + "cell_type": "markdown", + "id": "4dc0e3f6", + "metadata": {}, + "source": [ + "You can get set of useful statistics for features by passing the profile and feature names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb77bcd5", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "visualization.feature_statistics(feature_name=\"mixture_distribution\", profile=\"reference\")" + ] + }, + { + "cell_type": "markdown", + "id": "8b483a1d", + "metadata": {}, + "source": [ + "###### `*feature_name`: Any feature name from your profiled dataset\n", + "###### `profile_name`: `\"target\"` or `\"reference\"`\n", + "###### `prefered_cell_height`: height in `px` for generated visualization cell " + ] + }, + { + "cell_type": "markdown", + "id": "a3469ba2", + "metadata": {}, + "source": [ + "### Generate constraints" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dafab41d", + "metadata": {}, + "outputs": [], + "source": [ + "def get_sample_dataset_constraints():\n", + " cvisc = columnValuesInSetConstraint(value_set={2, 5, 8})\n", + " email_constraint = containsEmailConstraint()\n", + "\n", + " min_gt_constraint = minBetweenConstraint(lower_value=1, upper_value=5)\n", + " max_le_constraint = maxLessThanEqualConstraint(value=100)\n", + "\n", + " distribution = np.random.normal(0, 1, 50)\n", + "\n", + " ks_test_p_value_constraint = parametrizedKSTestPValueGreaterThanConstraint(\n", + " distribution,\n", + " p_value=0.5,\n", + " name=\"has a standard normal distribution\"\n", + " )\n", + "\n", + " set1 = set([\"col1\", \"col2\"])\n", + " columns_match_constraint = columnsMatchSetConstraint(set1)\n", + "\n", + " val_set = {(1, 2), (3, 5)}\n", + " col_set = [\"A\", \"B\"]\n", + " mcv_constraints = [\n", + " columnPairValuesInSetConstraint(column_A=\"A\", column_B=\"B\", value_set=val_set),\n", + " sumOfRowValuesOfMultipleColumnsEqualsConstraint(columns=col_set, value=100),\n", + " columnValuesUniqueWithinRow(column_A=\"A\", verbose=True),\n", + " ]\n", + "\n", + " return DatasetConstraints(\n", + " None,\n", + " value_constraints={\"A\": [cvisc], \"users\": [email_constraint]},\n", + " summary_constraints={\"B\": [max_le_constraint, min_gt_constraint], \"value\": [ks_test_p_value_constraint]},\n", + " table_shape_constraints=[columns_match_constraint],\n", + " multi_column_value_constraints=mcv_constraints,\n", + " )\n", + "\n", + "data = pd.DataFrame({\n", + " \"A\": [1, 2, 2, 5, 7, 6],\n", + " \"B\": [5, 4, 5, 1, 6, 0],\n", + " \"users\": [\"john\", \"jane@example.com\", \"alex\", \"bob\", \"anna@example.com\", \"dave\"],\n", + " \"value\": [23.4, 123.2, 423.3, 32.1, 42.2, 344.2],\n", + "})\n", + "\n", + "dc = get_sample_dataset_constraints()\n", + "constraints_profile = session.log_dataframe(data, \"test.data\", constraints=dc)\n", + "constraints_profile.apply_summary_constraints()\n", + "constraints_profile.apply_table_shape_constraints()\n", + "session.close()" + ] + }, + { + "cell_type": "markdown", + "id": "17ac9327", + "metadata": {}, + "source": [ + "### Constraints report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a2786f8", + "metadata": {}, + "outputs": [], + "source": [ + "visualization.constraints_report(dc)" + ] + }, + { + "cell_type": "markdown", + "id": "b04f735b", + "metadata": {}, + "source": [ + "### Download prefered cell output" + ] + }, + { + "cell_type": "markdown", + "id": "2015ff5d", + "metadata": {}, + "source": [ + "You can also download any of those visualisation in `HTML` format for further analysys, by passing the visualization name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "980d7236", + "metadata": {}, + "outputs": [], + "source": [ + "visualization.download(html=visualization.summary_drift_report(), html_file_name='example')" + ] + }, + { + "cell_type": "markdown", + "id": "01fbed2c", + "metadata": {}, + "source": [ + "By calling `download()` method of `DisplayProfile` and passing visualizer command, path to be downloaded to (optional) and name of the file you prefer (optional). Exaple `download(visualization.feature(\"title\"), path=\"examlpe/path\", html_file_name=\"example_html_file_name\")`. Command will download HTML format.\n", + "\n", + "If path is not passed file will be downloaded to `html_reports` located in whylogs directory by default.\n", + "\n", + "If name of the file is not passed it will be name of the dataset followed by timestamp of the profile by default. " + ] + }, + { + "cell_type": "markdown", + "id": "7ffc293c", + "metadata": {}, + "source": [ + "###### `*feature_name`: Any feature name from your profiled dataset\n", + "###### `preferred_path`: save path `default:` `/html_reports` located in whylogs directory\n", + "###### `html_file_name`: name of the file `default:` name of the dataset followed by timestamp of the profile" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/html_reports/.gitignore b/html_reports/.gitignore new file mode 100644 index 0000000000..2d19fc766d --- /dev/null +++ b/html_reports/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/src/whylogs/viewer/css/whylogs-styles.css b/src/whylogs/viewer/css/whylogs-styles.css index c7c41eea94..f5c06a4119 100644 --- a/src/whylogs/viewer/css/whylogs-styles.css +++ b/src/whylogs/viewer/css/whylogs-styles.css @@ -154,6 +154,7 @@ z-index: 1000; display: block; padding: 20px; + padding-bottom: 0; overflow-x: hidden; overflow-y: auto; /* Scrollable contents if viewport is shorter than content. */ background-color: var(--brandSecondary100); @@ -196,7 +197,7 @@ margin-top: 0; } -/* +/* * Custom styles */ @@ -374,7 +375,6 @@ a .wl-table-row--bottom-shadow { .wl-table-cell, .wl-table-head { - border-right: 1px solid var(--brandSecondary200); border-bottom: 1px solid var(--brandSecondary200); display: table-cell; padding: 12px 18px; @@ -452,9 +452,12 @@ a .wl-table-row--bottom-shadow { white-space: nowrap; overflow: hidden; font-style: italic; + text-align: center; color: var(--brandSecondary400); } .wl-table-cell__bedge { + display: flex; + align-items: center; height: 24px; margin: 1px; padding: 2px 8px; @@ -668,15 +671,6 @@ a .wl-table-row--bottom-shadow { justify-content: center; } -@media screen and (min-width: 1000px) { - .desktop-content { - display: block; - } - .no-responsive { - display: none; - } -} - .no-responsive__content { max-width: 600px; width: 100%; @@ -697,3 +691,558 @@ a .wl-table-row--bottom-shadow { color: var(--brandSecondary900); line-height: 1.5; } + +.wl-compare-profile { + position: sticky; + left: 0; + display: flex; + padding: 18px; + justify-content: flex-end; + flex-direction: row-reverse; +} + +.wl-property-panel__chart--single { + padding-bottom: 17px; +} + +.wl-property-panel__chart-title { + color: var(--brandSecondary900); + font-size: 14px; + font-family: Asap, sans-serif; + font-weight: bold; + margin-bottom: 15px; +} + +.wl-select-reference-profile { + display: flex; + align-items: center; +} + +.wl__unlock-the-power { + padding: 15px; + position: fixed; + bottom: 0; + left: 0; + z-index: 0; + background: #F1F6F6; + margin-bottom: 0 !important; + display: flex; + flex-direction: column; + align-items: center; + -webkit-box-shadow: 0px -6px 13px 0px rgba(0,0,0,0.32); + box-shadow: 0px 0px 15px rgba(0, 0, 0, 0.05); + width: var(--SIDE-PANEL-WIDTH); +} + +.wl__btn-signup { + font-size: 13px; + padding: 13px; + padding-left: 20px; + padding-right: 20px; +} + +.wl__close-icon { + width: 15px; + height: 15px; +} + +.wl__close-icon img{ + width: 100%; + height: 100%; + cursor: pointer; +} + +.count-color { + color: black; +} + +.wl__dropdown_arrow-icon_container { + width: 18px; + height: 16px; +} + +.wl__dropdown_arrow-icon_container img{ + width: 100%; + height: 100%; + cursor: pointer; +} + +.wl__dropdown_arrow-icon, .open-sign-up-text{ + position: relative; +} + +.notif-circle-container, .open-sign-up-text-notif-container{ + position: absolute; + top: -5px; + right: -5px; + padding: 5.3px; + border-radius: 50%; + background-color: var(--brandSecondary100); + cursor: pointer; +} + +.open-sign-up-text-notif-container { + padding: 10.3px; +} + +.notif-circle { + position: absolute; + top: 2px; + right: 2px; + padding: 3.3px; + border-radius: 50%; + background-color: #F2994A; +} + +.open-sign-up-text-notif { + display: flex; + position: absolute; + top: 2px; + right: 2px; + padding: 2.5px 6px; + color: #FFF; + border-radius: 50%; + background-color: #5CAEBC; +} + +.space-between { + display: flex; + justify-content: space-between; +} + +.feature-count-title { + font-size: 22px; +} + +.wl_filter-options { + background: rgba(255, 255, 255, 0.5); + border: 1px solid #DBE5E7; + box-sizing: border-box; + border-radius: 4px; + padding: 10px; +} + +.form-check-input:checked { + background-color: #0E7384; + border-color: #0E7384; +} + +.form-check-input[type=checkbox] { + border-radius: 1.25em; +} + +.not-clickable { + pointer-events: none; +} + +.wl_arrow-icon { + width: 15px; + height: 15px; +} + +.arrow-icon-container { + height: 100%; + cursor: pointer; +} + +.wl_list-item-dot { + background: #b0d2d7; + width: 8px; + height: 8px; + border-radius: 50px; +} + +.wl_filter-list-item { + display: flex; + align-items: center; +} + +.wl_filter-list-item>span{ + padding-left: 15px; +} + +.display-flex{ + display: flex; +} + +.table-border-none { + padding: 0; + border: none; +} + +.clickable-test-feature-wrap { + background: #F8FAFB; +} + +.clickable-test-feature-heading { + display: flex; + flex-direction: column; +} + +.clickable-test-feature-heading-wrap { + padding: 35px; + padding-bottom: 0; + border-bottom: 2px solid #EBF2F3; +} + +.pages-button-wrap { + display: flex; + align-items: flex-end; +} + +.page-button-wrap { + padding-right: 60px; +} + +.page-button{ + border: none; + background: no-repeat; + color: #6C757D; + font-weight: 600; + font-size: 16px; + letter-spacing: -0.01em; + padding-bottom: 25px; +} + +.activ-pages-button{ + color: #369BAC !important; + border-bottom: 4px solid #369BAC !important; + padding-bottom: 21px !important; +} + +.title p { + font-weight: 600; + font-size: 24px; + color: #313B3D; +} + +.info p { + font-weight: 600; + font-size: 12px; + color: #313B3D; +} + +.info div { + font-weight: 600; + font-size: 18px; + line-height: 20px; + color: #0E7384; +} + +.chart, .info { + display: flex; + flex-direction: column; + align-items: flex-end; + padding-right: 20px; +} + +.chart { + margin-bottom: 1rem; +} + +.info:last-child { + padding: 0; +} + +.clickable-test-feature-body { + display: flex; + flex-direction: column; +} + +.chart-box-wrap { + display: flex; + justify-content: center; +} + +.chart-box { + display: flex; + align-items: center; + justify-content: center; + flex-direction: column; + width: 75%; + height: 310px; + border: 2px solid #EBF2F3; + background: #FFF; + border-radius: 4px; +} + +.chart-box-title { + width: 80%; + justify-content: space-between; +} + +.chart-box-title p{ + font-family: Asap; + font-weight: bold; + font-size: 18px; + line-height: 16px; + color: #4F595B; +} + +.chart-info-wrap { + display: flex; + align-items: flex-end; +} + +.chart-info { + display: flex; + align-items: flex-end; +} + +.property-panel-close-icon { + cursor: pointer; +} + +.reference-json-form { + padding-right: 30px; +} + +.flex-direction-colum { + display: flex; + flex-direction: column; +} + +.feature-file-name { + font-size: 17px; + font-weight: 900; + color: #6C757D; +} + +.dropdown-container { + background: rgba(255, 255, 255, 0.5) !important; + padding: 10px !important; + border: none !important; +} + +.dropdown { + align-items: center; +} + +.align-items { + align-items: center; +} + +.dropdown p { + font-family: Asap; + font-size: 15px; + line-height: 150%; + margin: 0; +} +.dropdown img { + height: 10px; + cursor: pointer; +} + +.search-input{ + padding-top: 0 !important; + padding-bottom: 0 !important; +} + +.search-input input{ + border: none; + background: none; + outline: none; + height: 40px; + width: 100%; +} + +.search-input img{ + height: 19px; + pointer-events: none; +} + +.edit-button-wrap { + padding-right: 5px; + cursor: pointer; +} + +.edit-button { + border: 1px solid #369BAC; + box-sizing: border-box; + border-radius: 3px; + width: 33px; + text-align: center; +} + +.edit-button label{ + color: #369BAC; + font-family: Asap; + font-weight: 500; + line-height: 11px; + cursor: pointer; +} + +.remove-reference-profile-button { + display: flex; + align-items: center; + justify-content: center; + border: 1px solid #6C757D; + border-radius: 3px; + cursor: pointer; +} + +.remove-reference-profile-button img { + padding: 3px +} + +input::placeholder { + color: var(--secondaryLight1000); +} + +.wl-selected-profile { + position: sticky; + left: 0; +} + +.circle-color { + display: inline-block; + padding: 5px; + border-radius: 50px; +} + +.colors-for-distingushing-charts { + padding-right: 10px; +} + +.summary-statistic { + padding-right: 20px; +} + +.summary { + font-family: Asap; + font-style: normal; + font-weight: 600; + font-size: 14px; + line-height: 16px; + color: #6C757D; + padding-right: 10px; +} + +.summary-title { + font-family: Asap; + font-weight: normal; + font-size: 15px; + line-height: 14px; + color: #6C757D; +} + +.wl-selected-reference-profile { + font-weight: 900; +} + +.reference-profile-time-stamp{ + padding-left: 20px; +} + +.reference-profile-time-stamp>strong{ + font-size: 1rem; +} + +.bar.positive { + fill: #369BACB2; +} + +.bar.negative { + fill: #2683C9E5; +} + +.frequent-item-box { + padding: 20px; + width: 90%; + align-items: flex-start; + display: flex; + justify-content: center; + flex-direction: column; + border: 2px solid #EBF2F3; + background: #FFF; + border-radius: 4px; +} + +.frequent-item-box-wrap { + width: 100%; + display: flex; + justify-content: center; +} + +.frequent-item-box-to-title { + margin-left: 30px; + margin-bottom: 20px; + font-family: Asap; + font-weight: bold; + font-size: 16px; + line-height: 16px; + color: #4F595B; +} + +.frequent-items-body { + display: flex; + flex-direction: column; +} + +.text-align-center { + text-align: center; +} + +.fequent-items-wrap { + width: 100%; +} + +.svg-container { + display: inline-block; + position: relative; + width: 85%; + padding-bottom: 34%; + vertical-align: top; + overflow: hidden; +} +.svg-content-responsive { + display: inline-block; + position: absolute; + left: 0; +} + +.graph-svg-container { + padding-bottom: 12%; +} + +.reference-table-head { + min-width: 305px; +} + +@media screen and (min-width: 1000px) { + .desktop-content { + display: block; + } + .no-responsive { + display: none; + } + .wl__unlock-the-power { + z-index: 999999; + } +} + +@media only screen and (min-width: 1350px) { + .clickable-test-feature-body { + display: flex; + flex-direction: row; + flex-wrap: wrap; + justify-content: center; + row-gap: 20px; + column-gap: 20px; + } + + .chart-box-wrap { + width: 45%; + display: flex; + justify-content: center; + margin-bottom: 0 !important; + } + + .chart-box { + width: 100%; + height: 310px; + border: 2px solid #EBF2F3; + background: #FFF; + border-radius: 4px; + } +} diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in.html b/src/whylogs/viewer/index-hbs-cdn-all-in.html deleted file mode 100644 index 40141b4c37..0000000000 --- a/src/whylogs/viewer/index-hbs-cdn-all-in.html +++ /dev/null @@ -1,1407 +0,0 @@ - - - - - - - - - - Profile Viewer | whylogs - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/whylogs/viewer/index.html b/src/whylogs/viewer/index.html deleted file mode 100644 index 40e12ce9e2..0000000000 --- a/src/whylogs/viewer/index.html +++ /dev/null @@ -1,342 +0,0 @@ - - - - - - - - - - Profile Viewer | whylogs - - - - - - - - -
- - -
-
- -
- -
-
-
-
-
-
Feature
-
Frequent items
-
Inf. feature type
-
Total count
-
Null fraction
-
Est. unique values
-
Data type
-
Data type count
-
Mean
-
Std. dev
-
Min
-
First quantile
-
Median
-
Third quantile
-
Max
-
-
-
    -
    -
    - -
    -

    Please select and load a JSON profile from your local drive.

    -
    - -
    - -
    -
    - - - - - - - - - - - - -
    Item
    -
    -
    -
    -
    -
    - - -
    -
    -

    Hold on! :)

    -

    - It looks like your current screen size or device is not yet supported by the WhyLabs Sandbox. The Sandbox is - best experienced on a desktop computer. Please try maximizing this window or switching to another device. We - are working on adding support for a larger variety of devices. -

    -
    -
    - - - - - - - - diff --git a/src/whylogs/viewer/js/whylogs-script.js b/src/whylogs/viewer/js/whylogs-script.js index 6ff99fccd3..0415398a8e 100644 --- a/src/whylogs/viewer/js/whylogs-script.js +++ b/src/whylogs/viewer/js/whylogs-script.js @@ -14,7 +14,19 @@ }; // HTML Elements + const $selectedReferenceProfile = $(".wl__selected-reference-profile"); const $selectedProfile = $(".wl__selected-profile"); + const $removeReferenceProfileButton = $("#remove-reference-profile-button") + const $featureFileName = $(".wl__feature-file-name") + const $notifCircleContainer = $(".notif-circle-container") + const $boxes = $('input[name=checkbox]:checked'); + const $closeIcon = $("#close-icon"); + const $openSignUpText = $("#open-sign-up-text") + const $signUpText = $(".sign-up-text"); + const $dropdownArrowIcon = $("#dropdown-arrow-icon"); + const $referenceJsonForm = $("#reference-json-form"); + const $referencefileInput = $("#reference-file-input"); + const $compareProfile = $("#compare-profile"); const $featureCount = $(".wl__feature-count"); const $sidebarFeatureNameList = $(".wl__sidebar-feature-name-list"); const $featureCountDiscrete = $(".wl__feature-count--discrete"); @@ -28,22 +40,18 @@ const $tableContent = $("#table-content"); const $tableMessage = $("#table-message"); const $sidebarContent = $("#sidebar-content"); - const $singleProfileWrap = $("#sidebar-content-single-profile"); const $multiProfileWrap = $("#sidebar-content-multi-profile"); - const $profileDropdown = $(".sidebar-content__profile-dropdown"); const $propertyPanelTitle = $(".wl-property-panel__title"); const $propertyPanelProfileName = $(".wl-property-panel__table-th-profile"); const $filterOptions = $(".filter-options"); - const $selectOptionFirstTime = $("#select-option-first-time"); - const $removeButton = $(`#remove-button-1`); - const $removeButton2 = $("#remove-button-2"); // Constants and variables let featureSearchValue = ""; let isActiveInferredType = {}; let propertyPanelData = []; - let profiles = []; + let referencePropertyPanelData = []; let jsonData = {}; + let referenceJsonData = {}; let dataForRead = {}; let featureDataForTableForAllProfiles = {}; let numOfProfilesBasedOnType = {}; @@ -51,11 +59,6 @@ selectedProfiles.push("0"); // Util functions - const colors = { - 0: "#0e7384", - 1: "#2683c9", - 2: "#44c0e7", - }; function debounce(func, wait, immediate) { let timeout; @@ -90,7 +93,7 @@ } } - if (!checkJSONValidityForMultiProfile(jsonData)) { + if (jsonData) { for (let i = 0; i < featureListChildren.length; i++) { const name = featureListChildren[i].dataset.featureName.toLowerCase(); const type = featureListChildren[i].dataset.inferredType.toLowerCase(); @@ -101,19 +104,6 @@ } } } - if (checkJSONValidityForMultiProfile(jsonData)) { - for (let i = 0; i < featureListChildren.length; i++) { - const name = featureListChildren[i].dataset.featureName.toLowerCase(); - const type = featureListChildren[i].dataset.inferredType.toLowerCase(); - - const typeOfFirst = type.split(",")[0]; - if (isActiveInferredType[typeOfFirst] && name.startsWith(featureSearchValue)) { - featureListChildren[i].style.display = ""; - } else { - featureListChildren[i].style.display = "none"; - } - } - } $featureCount.html(featureCount); } @@ -152,80 +142,556 @@ ); } - function openPropertyPanel(items, infType, infTypeSecond = null) { - const types = [infType, infTypeSecond]; - $(".wl-property-panel__table-th-profile").addClass("d-none"); - $("#property-panel-0").removeClass("d-none"); - if (!checkJSONValidityForMultiProfile(jsonData)) { - if (items.length > 0 && items !== "undefined") { - let chipString = ""; - const chipElement = (chip) => `${chip}`; - const chipElementTableData = (value) => `${chipElement(value)}`; - const chipElementEstimation = (count) => - `${count}`; - items.forEach((item) => { - chipString += ` - - ${chipElementTableData(item.value)} - ${chipElementEstimation(item.count)} - - `; - }); - $(".wl-property-panel__frequent-items").html(chipString); - if (infType === "non-discrete") { - $propertyPanelTitle.html("Histogram data:"); - $propertyPanelProfileName.html("Bin values"); - } else if (infType === "discrete") { - $propertyPanelTitle.html("Frequent items:"); - $propertyPanelProfileName.html("Counts"); - } + const chipElement = (chip) => `${chip}`; + const chipElementTableData = (value) => `${chipElement(value)}`; + + const chartBoxElement = (chartTitle, chart) => ` +
    +
    +
    ${chartTitle}
    +
    ${chart}
    +
    +
    + ` + + const frequentItemBoxElement = (chartTitle, items) => ` +
    +
    + + ${items} + +
    +
    + ` + + const colorsForDistingushingCharts = (color, text) => ` +
    +
    + ${text} +
    + ` + + class GenerateChartParams { + constructor(height, width, data, bottomMargin=20, topMargin=5) { + this.MARGIN = { + TOP: topMargin, + RIGHT: 5, + BOTTOM: bottomMargin, + LEFT: 55, + }; + this.SVG_WIDTH = width; + this.SVG_HEIGHT = height; + this.CHART_WIDTH = this.SVG_WIDTH - this.MARGIN.LEFT - this.MARGIN.RIGHT; + this.CHART_HEIGHT = this.SVG_HEIGHT - this.MARGIN.TOP - this.MARGIN.BOTTOM; + this.svgEl = d3.create("svg").attr("width", this.SVG_WIDTH).attr("height", this.SVG_HEIGHT); + this.maxYValue = d3.max(data, (d) => Math.abs(d.axisY)); + this.xScale = d3 + .scaleBand() + .domain(data.map((d) => d.axisX)) + .range([this.MARGIN.LEFT, this.MARGIN.LEFT + this.CHART_WIDTH]); + this.yScale = d3 + .scaleLinear() + .domain([0, this.maxYValue * 1.02]) + .range([this.CHART_HEIGHT, 0]); + } + } + + const referenceProfilePanelHeight = () => { + const pageHeight = $(document).height() - 48; + if ($(".clickable-test-feature-wrap").height() <= pageHeight) { + $(".clickable-test-feature-wrap").css("height", pageHeight) + } else { + $(".clickable-test-feature-wrap").css("height", "auto") + } + } - $(".wl-property-panel").addClass("wl-property-panel--open"); - $(".wl-table-wrap").addClass("wl-table-wrap--narrow"); + function getGraphHtml(data, height = 75, width = 350, index = 0, referenceProfile = false, propertyPanelGraph = false) { + const sizes = new GenerateChartParams(height, width, data, 5) + let { + MARGIN, + CHART_HEIGHT, + svgEl, + maxYValue, + xScale, + yScale + } = sizes + const color = ["#369BAC", '#2683C9'] + + if (propertyPanelGraph) { + svgEl = d3.create("svg").attr("width", width).attr("height", height); + } + + // Add the y Axis + if (!referenceProfile) { + svgEl + .append("g") + .attr("transform", "translate(" + MARGIN.LEFT + ", " + MARGIN.TOP + ")") + .call(d3.axisLeft(yScale).tickValues([0, maxYValue/2, maxYValue])); + } + + const gChart = svgEl.append("g"); + gChart + .selectAll(".bar") + .data(data) + .enter() + .append("rect") + .classed("bar", true) + .attr("width", xScale.bandwidth() - 1) + .attr("height", (d) => CHART_HEIGHT - yScale(d.axisY)) + .attr("x", (d) => xScale(d.axisX)) + .attr("y", (d) => yScale(d.axisY) + MARGIN.TOP) + .attr("fill", color[index]); + + return svgEl._groups[0][0].outerHTML; + } + + function generateDoubleHistogramChart(currentWidth, histogramData, overlappedHistogramData) { + let yFormat, + xFormat; + + const sizes = new GenerateChartParams(230, currentWidth, histogramData) + let { + MARGIN, + SVG_WIDTH, + SVG_HEIGHT, + CHART_WIDTH, + CHART_HEIGHT, + svgEl, + xScale, + yScale + } = sizes + + svgEl = d3.create("svg") + .attr("preserveAspectRatio", "xMinYMin meet") + .attr("viewBox", "0 0 600 400") + .classed("svg-content-responsive", true) + + const xAxis = d3.axisBottom(xScale).ticks(SVG_WIDTH / 80, xFormat).tickSizeOuter(0); + const yAxis = d3.axisLeft(yScale).ticks(CHART_HEIGHT / 40, yFormat); + yFormat = yScale.tickFormat(100, yFormat); + + svgEl.append("g") + .attr("transform", `translate(${MARGIN.LEFT}, ${MARGIN.TOP})`) + .call(yAxis) + .call(g => g.select(".domain").remove()) + .call(g => g.selectAll(".tick line") + .attr("x2", CHART_WIDTH) + .attr("stroke-opacity", 0.1)) + .call(g => g.append("text") + .attr("x", -MARGIN.LEFT) + .attr("y", 10) + .attr("fill", "currentColor") + .attr("text-anchor", "start")); + + svgEl.append("g").append("g") + .attr("transform", `translate(0,${SVG_HEIGHT - MARGIN.BOTTOM})`) + .call(xAxis) + .call(g => g.select(".domain").remove()) + .call(g => g.selectAll(".tick line").remove()) + .call(g => g.append("text") + .attr("x", SVG_WIDTH - MARGIN.RIGHT) + .attr("y", 27) + .attr("fill", "currentColor") + .attr("text-anchor", "end")); + + const gChart = svgEl.append("g"); + gChart + .selectAll(".bar") + .data(histogramData) + .enter() + .append("rect") + .classed("bar", true) + .attr("width", xScale.bandwidth()) + .attr("height", (d) => CHART_HEIGHT - yScale(d.axisY)) + .attr("x", (d) => xScale(d.axisX)) + .attr("y", (d) => yScale(d.axisY) + MARGIN.TOP) + .attr("fill", "#369BAC") + .style("opacity","0.6"); + + const gChart1 = svgEl.append("g"); + gChart1 + .selectAll(".bar") + .data(overlappedHistogramData) + .enter() + .append("rect") + .classed("bar", true) + .attr("width", xScale.bandwidth()) + .attr("height", (d) => CHART_HEIGHT - yScale(d.axisY)) + .attr("x", (d) => xScale(d.axisX)) + .attr("y", (d) => yScale(d.axisY) + MARGIN.TOP) + .attr("fill", "#2683C9") + .style("opacity", "0.6"); + + return svgEl._groups[0][0].outerHTML; + } + + function generateBarChart(currentWidth, histogramData, overlappedHistogramData) { + let yFormat, + xFormat; + const data = histogramData.map((profile, index) => { + return { + group: index, + profile: profile.axisY, + reference_profile: overlappedHistogramData[index].axisY } + }).slice(0, 20) + + const sizes = new GenerateChartParams(230, currentWidth, histogramData, undefined, 1) + let { + MARGIN, + SVG_WIDTH, + SVG_HEIGHT, + CHART_WIDTH, + CHART_HEIGHT, + svgEl, + xScale, + yScale + } = sizes + + svgEl = d3.create("svg") + .attr("preserveAspectRatio", "xMinYMin meet") + .attr("viewBox", "0 0 600 400") + .classed("svg-content-responsive", true) + + const subgroups = ['profile', 'reference_profile'] + + xScale.padding([0.3]) + + const xAxis = d3.axisBottom(xScale).ticks(SVG_WIDTH / 80, xFormat).tickSizeOuter(0); + const yAxis = d3.axisLeft(yScale).ticks(SVG_HEIGHT / 40, yFormat); + yFormat = yScale.tickFormat(100, yFormat); + + svgEl.append("g") + .attr("transform", `translate(${MARGIN.LEFT}, ${MARGIN.TOP})`) + .call(yAxis) + .call(g => g.select(".domain").remove()) + .call(g => g.selectAll(".tick line") + .attr("x2", CHART_WIDTH) + .attr("stroke-opacity", 0.1)) + .call(g => g.append("text") + .attr("x", -MARGIN.LEFT) + .attr("y", 10) + .attr("fill", "currentColor") + .attr("text-anchor", "start")); + + svgEl.append("g") + .attr("transform", `translate(0,${SVG_HEIGHT - MARGIN.BOTTOM})`) + .call(xAxis) + .call(g => g.select(".domain").remove()) + .call(g => g.selectAll(".tick line").remove()) + .call(g => g.append("text") + .attr("x", SVG_WIDTH - MARGIN.RIGHT) + .attr("y", 27) + .attr("fill", "currentColor") + .attr("text-anchor", "end")); + // Another scale for subgroup position? + const xSubgroup = d3.scaleBand() + .domain(subgroups) + .range([0, xScale.bandwidth()]) + + // color palette = one color per subgroup + const color = d3.scaleOrdinal() + .domain(subgroups) + .range(['#369BAC', '#2683C9']) + + svgEl.append("g") + .selectAll("g") + // Enter in data = loop group per group + .data(data) + .enter() + .append("g") + .attr("transform", function(d) { return "translate(" + xScale(d.group) + ",0)"; }) + .selectAll("rect") + .data(function(d) { return subgroups.map(function(key) { return {key: key, value: d[key]}; }); }) + .enter().append("rect") + .attr("x", function(d) { return xSubgroup(d.key); }) + .attr("y", function(d) { return yScale(d.value); }) + .attr("width", xSubgroup.bandwidth()) + .attr("height", function(d) { return (CHART_HEIGHT - yScale(d.value)); }) + .attr("fill", function(d) { return color(d.key); }) + .style("opacity", "0.6"); + + + return svgEl._groups[0][0].outerHTML; + } + + function generatePositiveNegativeChart(currentWidth, histogramData, overlappedHistogramData) { + const data = histogramData.map((value, index) => { + const difference = value.axisY - overlappedHistogramData[index].axisY + const negativeValues = difference < 0 ? difference : 0 + return [+value.axisY, negativeValues] + }).flat().slice(0, 20) + + let yFormat, + xFormat; + + const sizes = new GenerateChartParams(230, currentWidth, histogramData, undefined, 1) + let { + MARGIN, + SVG_WIDTH, + SVG_HEIGHT, + CHART_WIDTH, + CHART_HEIGHT, + svgEl + } = sizes + + svgEl = d3.create("svg") + .attr("preserveAspectRatio", "xMinYMin meet") + .attr("viewBox", "0 0 600 400") + .classed("svg-content-responsive", true) + + const y0 = Math.max(Math.abs(d3.min(data)), Math.abs(d3.max(data))); + + const yScale = d3.scaleLinear() + .domain([-y0 * 1.02, y0 * 1.02]) + .range([CHART_HEIGHT,0]) + + const xScale = d3.scaleBand() + .domain(d3.range(data.length)) // so that chart's height has 102% height of the maximum value + .rangeRound([MARGIN.LEFT, SVG_WIDTH]) + .padding([0.1]); + + const xAxis = d3.axisBottom(xScale).ticks(SVG_WIDTH / 80, xFormat).tickSizeOuter(0); + const yAxis = d3.axisLeft(yScale).ticks(CHART_HEIGHT / 40, yFormat); + yFormat = yScale.tickFormat(100, yFormat); + + svgEl.append("g") + .attr("transform", `translate(${MARGIN.LEFT}, ${MARGIN.TOP})`) + .call(yAxis) + .call(g => g.select(".domain").remove()) + .call(g => g.selectAll(".tick line") + .attr("x2", CHART_WIDTH ) + .attr("stroke-opacity", 0.1)) + .call(g => g.append("text") + .attr("x", - MARGIN.LEFT) + .attr("y", 10) + .attr("fill", "currentColor") + .attr("text-anchor", "start")); + + svgEl + .append("g") + .attr("transform", `translate(0,${SVG_HEIGHT - MARGIN.BOTTOM})`) + .call(xAxis) + .call(g => g.select(".domain").remove()) + .call(g => g.selectAll(".tick>line").remove()) + .call(g => g.append("text") + .attr("x", SVG_WIDTH - MARGIN.RIGHT) + .attr("y", 27) + .attr("fill", "currentColor") + .attr("text-anchor", "end")); + + svgEl.selectAll(".bar") + .data(data) + .enter().append("rect") + .attr("class", function(d) { return d < 0 ? "bar negative" : "bar positive"; }) + .attr("y", function(d) { return yScale(Math.max(0, d)); }) + .attr("x", function(d, i) { return xScale(i); }) + .attr("height", function(d) { return Math.abs(yScale(d) - yScale(0)); }) + .attr("width", xScale.bandwidth()); + + return svgEl._groups[0][0].outerHTML; + } + + function numericalDriftChart(getDoubleHistogramChart) { + let colorsForDistingushingChartHTMLElement = ''; + + colorsForDistingushingChartHTMLElement +=` +

    Data Distribution Chart

    +
    + ${colorsForDistingushingCharts("#369BAC", "Current")} + ${colorsForDistingushingCharts("#2683C9", "Reference")} +
    + ` + $(".clickable-test-feature-body").html(` + ${chartBoxElement(colorsForDistingushingChartHTMLElement, getDoubleHistogramChart)} + `); + } + + function categoricalDriftChart(getBarChart, getPositiveNegative) { + $(".clickable-test-feature-body").html(` + ${chartBoxElement('

    Bar Chart

    ', getBarChart)} + ${chartBoxElement('

    Difference Bar Chart

    ', getPositiveNegative)} + `); + } + + function sortWithIndeces(toSort) { + for (let i = 0; i < toSort.length; i++) { + toSort[i] = [toSort[i], i]; + } + toSort.sort((left, right) => { + return right[0] < left[0] ? -1 : 1; + }); + toSort.sortIndices = []; + for (let j = 0; j < toSort.length; j++) { + toSort.sortIndices.push(toSort[j][1]); + toSort[j] = toSort[j][0]; + } + return toSort; + } + + const getProfileCharts = (key, getDoubleHistogramChart, getBarChart, getPositiveNegative) => { + if (jsonData.columns[key].numberSummary.isDiscrete) { + $("#page-button").text("Categorical Data") + categoricalDriftChart(getBarChart, getPositiveNegative) } else { - let chipString = ""; - $propertyPanelTitle.html(""); - const chipElement = (chip) => `${chip}`; - const chipElementTableData = (value) => `${chipElement(value)}`; - const chipElementEstimation = (count) => - `${count}`; - const columns = []; - let itemsLength = items[0].length; - selectedProfiles.forEach((selected, index) => { - if (selected !== null && items[parseInt(selected)].length > 0 && items !== "undefined") { - columns[index] = []; - items[parseInt(selected)].forEach((item, i) => { - columns[index][i] = ""; - - if (index === 0) { - columns[index][i] += `${chipElementTableData(item.value)}`; - } - let value = Number.isInteger(Number(item.count)) ? item.count : roundToThreeDecimals(item.count); - columns[index][i] += `${chipElementEstimation(value)}`; - }); + $("#page-button").text("Numerical Data") + numericalDriftChart(getDoubleHistogramChart) + } + } + + function openReferencePropertyPanel(referenceItems, items, profileItems, key, chart, getDoubleHistogramChart, getBarChart, getPositiveNegative) { + const chartInfoItem = (drift, driftName) => ` +
    +
    ${drift}
    +

    ${driftName}

    +
    + ` + const $clickableTestFeatureWrap = $(".clickable-test-feature-wrap"); + const $pagesButtons = $(".page-button"); + const $pagesButton = $pagesButtons[0]; + let chipString = "", + frequentItemString = "", + referenceFrequentItemString = ""; + + $pagesButtons.removeClass("activ-pages-button") + $($pagesButton).addClass("activ-pages-button") + $tableContent.addClass("d-none") + $clickableTestFeatureWrap.removeClass("d-none") + + // frequentItemString += `${items.forEach((item) => {chipElementTableData(item.value)})}` + const sortedItems = items.map((item) => +Object.values(item)[0]) + // .sort((a,b) => +b.value - (+a.value)) + sortWithIndeces(sortedItems).sortIndices.forEach( + (item) => { + frequentItemString += ` + ${frequentItemBoxElement('',chipElementTableData(items[item].value))} + ` + referenceFrequentItemString += ` + ${frequentItemBoxElement('',chipElementTableData(referenceItems[item].value))} + ` + } + ); + + $("#page-button").on("click", function () { + getProfileCharts( + key, + getDoubleHistogramChart, + getBarChart, + getPositiveNegative + ) + $(".clickable-test-feature-body").removeClass("d-none"); + $(".frequent-items-body").html(``); + referenceProfilePanelHeight() + }) + + getProfileCharts( + key, + getDoubleHistogramChart, + getBarChart, + getPositiveNegative + ) + $(".clickable-test-feature-body").removeClass("d-none"); + $(".frequent-items-body").html(``); + + $("#frequent-item-button").on("click", function () { + $(".frequent-items-body").html(` +
    +
    +
    + items +
    + ${frequentItemString} +
    +
    +
    + reference profile items +
    + ${referenceFrequentItemString} +
    +
    + `); + + $(".clickable-test-feature-body").addClass("d-none"); + + referenceProfilePanelHeight() + }) + + + $("#chart").html(chart); + + chipString += ` + ${chartInfoItem(profileItems.numberSummary.count.toString(), "Total Count")} + ${chartInfoItem(fixNumberTo(profileItems.numberSummary.mean), "Mean")} + ` + $(".chart-info").html(chipString); + referenceProfilePanelHeight() + } + + function openProfilePropertyPanel(items, infType, chart) { + $("#wl-property-panel__chart").html(chart); + let chipString = ""; + const chipElementEstimation = (count) => + `${count}`; + items.forEach((item) => { + chipString += ` + + ${chipElementTableData(item.value)} + ${chipElementEstimation(item.count)} + + `; + }); + $(".wl-property-panel__frequent-items").html(chipString); + if (infType === "non-discrete") { + $propertyPanelTitle.html("Histogram data:"); + $propertyPanelProfileName.html("Bin values"); + } else if (infType === "discrete") { + $propertyPanelTitle.html("Frequent items:"); + $propertyPanelProfileName.html("Counts"); + } + + $(".wl-property-panel").addClass("wl-property-panel--open"); + $(".wl-table-wrap").addClass("wl-table-wrap--narrow"); + } + + function openPropertyPanel(referenceItems, items, infType, feature) { + let getGraph = null, + getPropertyPanelGraph = null, + getDoubleHistogramChart, + getBarChart, + getPositiveNegative, + currentWidth = 600, + propertyPanelGraph = true; + + + if (referencePropertyPanelData[feature[0]][0]) { + getDoubleHistogramChart = generateDoubleHistogramChart(currentWidth, feature[1].chartData[0], feature[1].chartData[1]) + getBarChart = generateBarChart(currentWidth, feature[1].chartData[0], feature[1].chartData[1]) + getPositiveNegative = generatePositiveNegativeChart(currentWidth, feature[1].chartData[0], feature[1].chartData[1]) + items = referencePropertyPanelData[feature[0]][0] + getGraph = getGraphHtml(feature[1].chartData[1], 50, 280, 0, true, propertyPanelGraph) + } + + getPropertyPanelGraph = getPropertyPanelGraphHtml(jsonData.columns[feature[0]], feature[0]) + + if (jsonData) { + if (items.length > 0 && items !== "undefined") { + if (referencePropertyPanelData[feature[0]][0]) { + openReferencePropertyPanel( + referenceItems, + items, + referenceJsonData.columns[feature[0]], + feature[0], + getGraph, + getDoubleHistogramChart, + getBarChart, + getPositiveNegative + ) } else { - columns[index] = []; - for (let i = 0; i < itemsLength; i++) { - columns[index][i] = ""; - } + openProfilePropertyPanel(items, infType, getPropertyPanelGraph) } + } else { - $(`#property-panel-${index}`).html(`Profile ${index + 1}`); - $(`#property-panel-${index}`).removeClass("d-none"); - }); - for (let i = 0; i < items[0].length; i++) { - chipString += ` - `; - for (let j = 0; j < selectedProfiles.length; j++) { - chipString += columns[j][i]; - } - chipString += ``; } - - $(".wl-property-panel").addClass("wl-property-panel--open"); - $(".wl-table-wrap").addClass("wl-table-wrap--narrow"); - $(".wl-property-panel__frequent-items").html(chipString); } } @@ -235,81 +701,106 @@ $(".wl-property-panel__frequent-items").html(""); } - function roundToThreeDecimals(str) { - let floatNum = parseFloat(str); - let floatRounded = floatNum.toFixed(3); + function getPropertyPanelGraphHtml (column) { + let chartString = ""; + const freqData = []; + const histData = []; + + const freqChart = (chart) => + `
    Frequent Items Data
    ${chart}
    `; + const histChart = (chart) => + `
    Histogram Data
    ${chart}
    `; + + if (column.numberSummary) { + if (column.frequentItems && column.frequentItems.items) { + column.frequentItems.items.forEach((item, index) => { + freqData.push({ + axisY: item.estimate, + axisX: index, + }); + }); + } - return String(floatRounded); + if (column.numberSummary.histogram && column.numberSummary.histogram.counts) { + column.numberSummary.histogram.counts.slice(0, 30).forEach((count, index) => { + histData.push({ + axisY: count, + axisX: index, + }); + }); + } + if (column.numberSummary.isDiscrete) { + if (freqData.length > 0) chartString += freqChart(getGraphHtml(freqData, 130)); + if (histData.length > 0) chartString += histChart(getGraphHtml(histData, 130)); + } else { + if (histData.length > 0) chartString += histChart(getGraphHtml(histData, 130)); + if (freqData.length > 0) chartString += freqChart(getGraphHtml(freqData, 130)); + } + } + return chartString; + } + + function sidebarContentHeight() { + const $sidebarContentPadding = +$("#sidebar-content-single-profile").css("padding").replace('px','') * 2 + const $sidebarContentHeight = $("#sidebar-content-single-profile").height() + $sidebarContentPadding + const $sidebar = $(".sidebar") + + $sidebar.css("margin-bottom", `${$sidebarContentHeight}px`) + } + + function checkCurrentProfile(item, referenceItem) { + if (referenceJsonData && Object.values(referenceJsonData)) { + return referenceItem + } else { + return item + } } + // Override and populate HTML element values - function updateHtmlElementValues(index) { + function updateHtmlElementValues() { + $notifCircleContainer.addClass("d-none") + $dropdownArrowIcon.css("transform","rotate(180deg)") + $filterOptions.removeClass("d-none"); + + checkCurrentProfile(true, false) ? + $("#dif-from-ref").addClass("d-none"): + $("#dif-from-ref").removeClass("d-none") + + sidebarContentHeight() $sidebarFeatureNameList.html(""); $tableMessage.addClass("d-none"); Object.entries(featureDataForTableForAllProfiles).forEach((feature) => { - function getGraphHtml(data, index) { - const SINGLE_PROFILE_JSON = !profiles.length; - const MARGIN = { - TOP: 5, - RIGHT: 5, - BOTTOM: 5, - LEFT: 55, - }; - const SVG_WIDTH = 350; - const SVG_HEIGHT = SINGLE_PROFILE_JSON ? 75 : 35; - const CHART_WIDTH = SVG_WIDTH - MARGIN.LEFT - MARGIN.RIGHT; - const CHART_HEIGHT = SVG_HEIGHT - MARGIN.TOP - MARGIN.BOTTOM; - - const svgEl = d3.create("svg").attr("width", SVG_WIDTH).attr("height", SVG_HEIGHT); - - const maxYValue = d3.max(data, (d) => Math.abs(d.axisY)); - - const xScale = d3 - .scaleBand() - .domain(data.map((d) => d.axisX)) - .range([MARGIN.LEFT, MARGIN.LEFT + CHART_WIDTH]); - const yScale = d3 - .scaleLinear() - .domain([0, maxYValue * 1.02]) // so that chart's height has 102% height of the maximum value - .range([CHART_HEIGHT, 0]); - - // Add the y Axis - svgEl - .append("g") - .attr("transform", "translate(" + MARGIN.LEFT + ", " + MARGIN.TOP + ")") - .call(d3.axisLeft(yScale).tickValues([0, maxYValue])); - - const gChart = svgEl.append("g"); - gChart - .selectAll(".bar") - .data(data) - .enter() - .append("rect") - .classed("bar", true) - .attr("width", xScale.bandwidth() - 1) - .attr("height", (d) => CHART_HEIGHT - yScale(d.axisY)) - .attr("x", (d) => xScale(d.axisX)) - .attr("y", (d) => yScale(d.axisY) + MARGIN.TOP) - .attr("fill", colors[index]); - - return svgEl._groups[0][0].outerHTML; - } // strings for tableToShow let tempChartDataString = ""; + let referenceTempChartDataString = ""; feature[1].chartData.forEach((chartData, index) => { if (selectedProfiles.includes(String(index))) { - let profileSelected = selectedProfiles.indexOf(String(index)); tempChartDataString += `
    ${ chartData.length > 0 - ? getGraphHtml(chartData, profileSelected) + ? getGraphHtml(feature[1].chartData[0]) : 'No data to show the chart' }
    `; + if (feature[1].chartData[1] && feature[1].chartData[1].length > 0) { + referenceTempChartDataString+= `
    ${ + chartData.length > 0 + ? getGraphHtml(feature[1].chartData[1], ...[,,], 1, true) + : 'No data to show the chart' + }
    `; + } + } + }); + + let diffFromRef = ""; + feature[1].frequentItemsElemString.forEach((frequentItemElemString, index) => { + if (selectedProfiles.includes(String(index))) { + diffFromRef += `
    ${Math.floor(Math.random() * 10)}
    `; } }); let freaquentItemsElmString = ""; feature[1].frequentItemsElemString.forEach((frequentItemElemString, index) => { if (selectedProfiles.includes(String(index))) { - freaquentItemsElmString += `
    ${frequentItemElemString}
    `; + freaquentItemsElmString += `
    ${frequentItemElemString}
    `; } }); let inferredTypeString = ""; @@ -418,9 +909,15 @@

    ${feature[0]}

    -
    ` + - tempChartDataString + - `
    ` + +
    +
    ` + + tempChartDataString + + referenceTempChartDataString + + `
    +
    +
    ` + + checkCurrentProfile(``, diffFromRef) + + `
    ` + freaquentItemsElmString + `
    ` + inferredTypeString + @@ -450,26 +947,17 @@ quantilesMaxString + `
    `, ); - if (!checkJSONValidityForMultiProfile(jsonData)) { - const $tableRowButton = $(``); - $tableRowButton.on( - "click", - openPropertyPanel.bind(this, propertyPanelData[feature[0]][0], feature[1].inferredType[0].toLowerCase()), - ); - $tableRow.find(".wl-table-cell__title-wrap").append($tableRowButton); - } else { + if (jsonData) { const $tableRowButton = $(``); - - let secondType = feature[1].inferredType[selectedProfiles[1]] || null; - if (typeof secondType === "string") secondType = secondType.toLowerCase(); $tableRowButton.on( "click", openPropertyPanel.bind( this, - propertyPanelData[feature[0]], - feature[1].inferredType[selectedProfiles[0]].toLowerCase(), - secondType, + referencePropertyPanelData[feature[0]][0], + propertyPanelData[feature[0]][0], + feature[1].inferredType[0].toLowerCase(), + feature ), ); $tableRow.find(".wl-table-cell__title-wrap").append($tableRowButton); @@ -478,10 +966,22 @@ $tableBody.append($tableRow); $sidebarFeatureNameList.append( - `
  • ${feature[0]}
  • `, + ` +
  • +
    +
    + +
    + ${feature[0]} +
  • + `, ); }); - if (!checkJSONValidityForMultiProfile(jsonData)) { + + if (jsonData) { const countDiscrete = Object.values(numOfProfilesBasedOnType).reduce( (acc, feature) => (acc += feature.discrete.length), 0, @@ -498,7 +998,10 @@ $featureCountDiscrete.html(countDiscrete); $featureCountNonDiscrete.html(countNonDiscrete); $featureCountUnknown.html(countUnknown); - $selectedProfile.html(formatLabelDate(+dataForRead.properties.dataTimestamp)); + $selectedProfile.html(formatLabelDate(+dataForRead.properties[0].dataTimestamp)); + if (checkCurrentProfile(false, true) && dataForRead.properties[1]) { + $selectedReferenceProfile.html(formatLabelDate(+dataForRead.properties[1].dataTimestamp)) + } } } @@ -511,7 +1014,6 @@ }); for (let i = 0; i < tableBodyChildrens.length; i++) { - const name = tableBodyChildrens[i].dataset.featureName.toLowerCase(); const type = tableBodyChildrens[i].dataset.inferredType.toLowerCase(); if (isActiveInferredType[type]) { @@ -520,7 +1022,6 @@ } } for (let i = 0; i < featureListChildren.length; i++) { - const name = featureListChildren[i].dataset.featureName.toLowerCase(); const type = featureListChildren[i].dataset.inferredType.toLowerCase(); if (isActiveInferredType[type]) { @@ -530,96 +1031,45 @@ $featureCount.html(featureCount); } - function renderProfileDropdown(id) { - $profileDropdown.html(""); - - $profileDropdown.append( - ``, - ); - for (let i = 0; i < profiles.length; i++) { - if (selectedProfiles.includes(String(i))) { - const option = ``; - $profileDropdown.append(option); - } else { - const option = ``; - $profileDropdown.append(option); - } + function getDataForRead(dataForRead, jsonData, referneceData) { + if (!dataForRead.properties) { + dataForRead.properties = []; } - } + dataForRead.properties.push(jsonData.properties); - function reRenderProfileDropdown(id) { - $(`#sidebar-content-multi-profile-dropdown-${id}`).html(""); - let selected = ""; - if (selectedProfiles[id] === null || selectedProfiles.length - 1 < id) { - selected = "selected"; - } - $(`#sidebar-content-multi-profile-dropdown-${id}`).append( - ``, - ); - for (let i = 0; i < profiles.length; i++) { - if (selectedProfiles.includes(String(i))) { - let selected = ""; - if (selectedProfiles[id] === String(i)) { - selected = "selected"; - } - const option = ``; - $(`#sidebar-content-multi-profile-dropdown-${id}`).append(option); - } else { - const option = ``; - $(`#sidebar-content-multi-profile-dropdown-${id}`).append(option); + Object.entries(jsonData.columns).forEach((feature) => { + let tempFeatureName = feature[0]; + if (!dataForRead.columns) { + dataForRead.columns = []; } - } - } - - function handleProfileChange(event) { - handleClosePropertyPanel(); - $tableContent.removeClass("d-none"); - $("#table-content-wrapper").removeClass("d-none"); - const value = event.target.value; - const id = event.target.dataset; - selectedProfiles[parseInt(id.id)] = value; - let addButton = $(`#add-profile-sidebar-button-${id.id}`); - addButton.attr("disabled", false); - for (let i = 0; i < selectedProfiles.length; i++) { - reRenderProfileDropdown(i); - } - $selectOptionFirstTime.addClass("d-none"); - $tableBody.html(""); - updateHtmlElementValues(); - renderList(); - } - - function mapProfileDataToReadData(jsonData, dataForRead) { - if (checkJSONValidityForMultiProfile(jsonData)) { - Object.entries(jsonData).forEach((profile) => { - if (!dataForRead.properties) { - dataForRead.properties = []; - } - dataForRead.properties.push(profile[1].properties); - - Object.entries(profile[1].columns).forEach((feature) => { - let tempFeatureName = feature[0]; - if (!dataForRead.columns) { - dataForRead.columns = []; + if (!dataForRead.columns[tempFeatureName]) dataForRead.columns[tempFeatureName] = []; + dataForRead.columns[tempFeatureName].push(feature[1]); + if ( + referneceData && + referneceData.columns[tempFeatureName].numberSummary + ) { + dataForRead.properties.push(referenceJsonData.properties) + const { + numberSummary, + frequentItems, + ...referenceDataForRead + } = dataForRead.columns[tempFeatureName][0] + dataForRead.columns[tempFeatureName].push({ + ...referenceDataForRead, + referenceNumberSummary: referneceData.columns[tempFeatureName].numberSummary, + referenceFrequentItems: referneceData.columns[tempFeatureName].frequentItems + }) } - if (!dataForRead.columns[tempFeatureName]) dataForRead.columns[tempFeatureName] = []; - dataForRead.columns[tempFeatureName].push(feature[1]); - }); - }); - } else { - if (!dataForRead.properties) { - dataForRead.properties = []; - } - dataForRead.properties.push(jsonData.properties); + }); + return dataForRead + } - Object.entries(jsonData.columns).forEach((feature) => { - let tempFeatureName = feature[0]; - if (!dataForRead.columns) { - dataForRead.columns = []; - } - if (!dataForRead.columns[tempFeatureName]) dataForRead.columns[tempFeatureName] = []; - dataForRead.columns[tempFeatureName].push(feature[1]); - }); + function mapProfileDataToReadData(jsonData, dataForRead, referneceData) { + try { + dataForRead = getDataForRead(dataForRead, jsonData, referneceData) + } catch (e) { + alert("You selected wrong reference profile. Please select again.") + removeReferenceProfile() } makeFeatureDataForAllProfilesToShowOnTable( featureDataForTableForAllProfiles, @@ -628,6 +1078,7 @@ ); } + function makeFeatureDataForAllProfilesToShowOnTable( featureDataForTableForAllProfiles, dataForRead, @@ -652,6 +1103,7 @@ mean: [], stddev: [], chartData: [], + refereneChartData: [], frequentItemsElemString: [], }; numOfProfilesBasedOnType[feature[0]] = { @@ -660,6 +1112,7 @@ unknown: [], }; propertyPanelData[feature[0]] = []; + referencePropertyPanelData[feature[0]] = []; } let iteration = 0; feature[1].forEach((tempFeatureValues) => { @@ -717,6 +1170,14 @@ axisX: index, }); }); + if (tempFeatureValues.referenceFrequentItems) { + tempFeatureValues.referenceFrequentItems.items.forEach((item, index) => { + featureDataForTableForAllProfiles[feature[0]].chartData[1].push({ + axisY: item.estimate, + axisX: index, + }); + }); + } // Frequent item chips / bedge propertyPanelData[feature[0]][iteration] = tempFeatureValues.frequentItems.items.reduce((acc, item) => { @@ -737,7 +1198,7 @@ // Chart if (tempFeatureValues.numberSummary) { // Histogram chips / bedge - propertyPanelData[feature[0]][iteration] = tempFeatureValues.numberSummary.histogram.counts.reduce( + propertyPanelData[feature[0]][0] = tempFeatureValues.numberSummary.histogram.counts.reduce( (acc, value, index) => { acc.push({ value: value, @@ -749,7 +1210,7 @@ ); tempFeatureValues.numberSummary.histogram.counts.slice(0, 30).forEach((count, index) => { - featureDataForTableForAllProfiles[feature[0]].chartData[iteration].push({ + featureDataForTableForAllProfiles[feature[0]].chartData[0].push({ axisY: count, axisX: index, }); @@ -763,7 +1224,37 @@ featureDataForTableForAllProfiles[feature[0]].chartData[iteration] = []; propertyPanelData[feature[0]] = []; } + if (tempFeatureValues.referenceNumberSummary) { + referencePropertyPanelData[feature[0]][0] = tempFeatureValues.referenceNumberSummary.histogram.counts.reduce( + (acc, value, index) => { + acc.push({ + value: value, + count: tempFeatureValues.referenceNumberSummary.histogram.bins[index], + }); + return acc; + }, + [], + ); + + tempFeatureValues.referenceNumberSummary.histogram.counts.slice(0, 30).forEach((count, index) => { + featureDataForTableForAllProfiles[feature[0]].chartData[1].push({ + axisY: count, + axisX: index, + }); + }); + } } + if(tempFeatureValues.referenceFrequentItems && + featureDataForTableForAllProfiles[feature[0]].inferredType[0] === "Discrete" + ){ + featureDataForTableForAllProfiles[feature[0]].chartData[1] = [] + tempFeatureValues.referenceFrequentItems.items.forEach((item, index) => { + featureDataForTableForAllProfiles[feature[0]].chartData[1].push({ + axisY: item.estimate, + axisX: index, + }); + }); + } iteration += 1; }); }); @@ -774,14 +1265,6 @@ $tableMessage.find("p").html(message); } - function collectProfilesFromJSON(data) { - return Object.keys(data).map((profile, i) => ({ - label: profile, - value: profile, - index: i, - })); - } - function compareArrays(array, target) { if (array.length !== target.length) return false; @@ -793,7 +1276,6 @@ return false; } } - return true; } @@ -827,15 +1309,39 @@ $tableMessage.removeClass("d-none"); $sidebarContent.addClass("d-none"); $tableContent.addClass("d-none"); + $compareProfile.removeClass("d-none"); + } + + function addFileName() { + const fileName = $fileInput.val().split('\\').pop(); + if (fileName) { + $featureFileName.html(fileName.split('.json')) + } + } + + function removeReferenceProfile() { + referenceJsonData = undefined + dataForRead = {}; + featureDataForTableForAllProfiles = {}; + numOfProfilesBasedOnType = {}; + + $(".reference-table-head").addClass("d-none") + + mapProfileDataToReadData(jsonData, dataForRead, referenceJsonData); + $tableBody.html(""); + updateHtmlElementValues(); + renderList(); + $(".wl-selected-profile").addClass("d-none") + $(".wl-compare-profile").removeClass("d-none") } - function loadFile() { - profiles = []; + function loadFile(inputId, jsonForm) { isActiveInferredType = {}; $featureFilterInput.html(""); $sidebarFeatureNameList.html(""); $tableBody.html(""); $(".form-check-input").prop("checked", true); + addFileName() if (typeof window.FileReader !== "function") { updateTableMessage(MESSAGES.error.fileAPINotSupported); return; @@ -845,7 +1351,7 @@ numOfProfilesBasedOnType = {}; handleClosePropertyPanel(); - const input = document.getElementById("file-input"); + const input = document.getElementById(inputId); if (!input) { updateTableMessage(MESSAGES.error.noInputElementFound); } else if (!input.files) { @@ -855,53 +1361,44 @@ } else { const file = input.files[0]; const fr = new FileReader(); - fr.onload = receivedText; + fr.onload = (e) => receivedText(e, jsonForm, inputId); fr.readAsText(file); } } - function receivedText(e) { + function receivedText(e, jsonForm, inputId) { const lines = e.target.result; - jsonData = JSON.parse(lines); - if (Object.keys(dataForRead).length !== 0) { - dataForRead = {}; - } - mapProfileDataToReadData(jsonData, dataForRead); - if (selectedProfiles.length > 0) { - selectedProfiles = []; - } - - if (checkJSONValidityForMultiProfile(jsonData)) { - for (let i = 0; i < 2; i++) { - let addButton = $(`#add-profile-sidebar-button-${i}`); - addButton.removeClass("d-none"); - addButton.attr("disabled", true); + let data = JSON.parse(lines); + if (inputId === "file-input") { + if (checkJSONValidityForMultiProfile(data)) { + jsonData = Object.values(data)[0] + } else { + jsonData = data; } - - profiles = collectProfilesFromJSON(jsonData); - renderProfileDropdown(); + removeReferenceProfile() + $(".reference-table-head").addClass("d-none") + $(".wl-selected-profile").addClass("d-none") + $compareProfile.removeClass("d-none"); + } else { + if (checkJSONValidityForMultiProfile(data)) { + referenceJsonData = Object.values(data)[0] + } else { + referenceJsonData = data; + } + $(".reference-table-head").removeClass("d-none") + $(".wl-selected-profile").removeClass("d-none") + $(".wl-compare-profile").addClass("d-none") + } + mapProfileDataToReadData(jsonData, dataForRead, referenceJsonData); + if (data) { $(".compare-select").addClass("d-none"); - $tableMessage.removeClass("d-none"); - $tableContent.addClass("d-none"); - $("#table-content-wrapper").addClass("d-none"); - updateTableMessage(MESSAGES.error.noProfileSelected); $multiProfileWrap.removeClass("d-none"); - $singleProfileWrap.addClass("d-none"); - $filterOptions.addClass("d-none"); - $sidebarContent.removeClass("d-none"); - renderList(); - $jsonForm.trigger("reset"); - } else if (checkJSONValidityForSingleProfile(jsonData)) { - $(".compare-select").addClass("d-none"); - $multiProfileWrap.addClass("d-none"); - $singleProfileWrap.removeClass("d-none"); - $filterOptions.removeClass("d-none"); selectedProfiles[0] = "0"; $tableBody.html(""); updateHtmlElementValues(); renderList(); showDataVisibility(); - $jsonForm.trigger("reset"); + jsonForm.trigger("reset"); } else { $tableBody.html(""); updateTableMessage(MESSAGES.error.invalidJSONFile); @@ -910,60 +1407,87 @@ } } - for (let i = 0; i < 2; i++) { - let addButton = $(`#add-profile-sidebar-button-${i}`); - - addButton.on("click", function () { - reRenderProfileDropdown(i + 1); - if ($(`#sidebar-content-multi-profile-dropdown-${i} option:selected`).text() !== "Select your profile") { - addButton.addClass("d-none"); - - $(`#add-profile-wrap-${i + 1}`).addClass("button-remove-select"); - $(`#add-profile-wrap-${i + 1}`).removeClass("d-none"); - $(`#remove-button-${i + 1}`).removeClass("d-none"); - } else { + function checkedBoxes() { + const item = Object.values($boxes).find( + function(value) { + return $('#' + $(value)[0].id).is(":checked") } - }); - } + ); - $removeButton.on("click", function () { - handleClosePropertyPanel(); - $(`#add-profile-wrap-1`).addClass("d-none"); - $("#sidebar-content-multi-profile-dropdown-1").val("none"); - $removeButton.addClass("d-none"); - let addButton = $(`#add-profile-sidebar-button-1`); - addButton.attr("disabled", true); - $(`#add-profile-wrap-0`).removeClass("button-remove-select"); - $(`#remove-button-0`).addClass("d-none"); - $(`#add-profile-sidebar-button-0`).removeClass("d-none"); - selectedProfiles[1] = null; - for (let i = 0; i < selectedProfiles.length; i++) { - reRenderProfileDropdown(i); + if (item) { + $notifCircleContainer.removeClass("d-none") } - $tableBody.html(""); - updateHtmlElementValues(); - renderList(); + } + + $removeReferenceProfileButton.on("click", removeReferenceProfile) + + $(document).on("click", ".page-button", function(e) { + const $pagesButtons = $(".page-button"), + $pagesButtonIndex = $pagesButtons.index(e.target), + $pagesButton = $(".page-button")[$pagesButtonIndex] + + $pagesButtons.removeClass("activ-pages-button") + $($pagesButton).addClass("activ-pages-button") + }) + + $(window).ready(function() { + sidebarContentHeight() + }) + + $("#property-panel-close-icon").on("click", function (e) { + const $clickableTestFeatureWrap = $(".clickable-test-feature-wrap") + + $tableContent.removeClass("d-none") + $clickableTestFeatureWrap.addClass("d-none") }); - $removeButton2.on("click", function () { - handleClosePropertyPanel(); - $(`#add-profile-wrap-2`).addClass("d-none"); - $("#sidebar-content-multi-profile-dropdown-1").val("none"); - $removeButton2.addClass("d-none"); - $(`#add-profile-wrap-1`).removeClass("button-remove-select"); - $(`#add-profile-sidebar-button-1`).removeClass("d-none"); - selectedProfiles[2] = null; - for (let i = 0; i < selectedProfiles.length; i++) { - reRenderProfileDropdown(i); + $dropdownArrowIcon.on("click", function () { + const filterClass = $filterOptions.attr("class"); + + if (filterClass.indexOf("d-none") > 0) { + $notifCircleContainer.addClass("d-none") + $filterOptions.removeClass("d-none"); + $("#dropdown-container").css("background-color", '#FFF') + $dropdownArrowIcon.css("transform","rotate(180deg)") + } else { + $filterOptions.addClass("d-none"); + checkedBoxes() + $("#dropdown-container").css("background-color", 'none') + $dropdownArrowIcon.css("transform","rotate(0)") } - $tableBody.html(""); - updateHtmlElementValues(); - renderList(); }); + + $closeIcon.on("click", function () { + $signUpText.addClass("d-none"); + sidebarContentHeight() + $(".open-sign-up-text-notif-container").removeClass("d-none") + }); + + $openSignUpText.on("click", function () { + $signUpText.removeClass("d-none"); + sidebarContentHeight() + $(".open-sign-up-text-notif-container").addClass("d-none") + }); + + + $(document).on("click", ".js-list-group-item span", function (e) { + const listItem = $("li>span"), + listItemIndex = listItem.index(e.target), + $listItemDot = $(".wl_list-item-dot")[listItemIndex], + $arrowIcon = $(".wl_arrow-icon")[listItemIndex] + + listItem.css("padding-left", "15px") + $(".wl_list-item-dot").removeClass("d-none") + $(".wl_arrow-icon").addClass("d-none") + $($arrowIcon).removeClass("d-none") + $($listItemDot).addClass("d-none") + $(listItem[listItemIndex]).css("padding-left", "8px") + }); + // Bind event listeners - $fileInput.on("change", loadFile); + $fileInput.on("change", () => loadFile("file-input", $jsonForm)); + $referencefileInput.on("change", () => loadFile("reference-file-input", $referenceJsonForm)); - $profileDropdown.on("change", handleProfileChange); $(document).on("click", ".js-list-group-item span", scrollToFeatureName); $featureSearch.on( "input", diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-for-jupyter-notebook.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-for-jupyter-notebook.html new file mode 100644 index 0000000000..d339ac3d6a --- /dev/null +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-for-jupyter-notebook.html @@ -0,0 +1,1352 @@ + + + + + + + + + + Profile Viewer | whylogs + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html new file mode 100644 index 0000000000..c212ba499c --- /dev/null +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html @@ -0,0 +1,433 @@ + + + + + + + + + + Profile Viewer | whylogs + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-constraints-report.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-constraints-report.html new file mode 100644 index 0000000000..b306f5b7ce --- /dev/null +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-constraints-report.html @@ -0,0 +1,741 @@ + + + + + + + + + + Profile Viewer | whylogs + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-differenced-chart.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-differenced-chart.html new file mode 100644 index 0000000000..32192537ff --- /dev/null +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-differenced-chart.html @@ -0,0 +1,421 @@ + + + + + + + + + + Profile Viewer | whylogs + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html new file mode 100644 index 0000000000..bab944d520 --- /dev/null +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html @@ -0,0 +1,414 @@ + + + + + + + + + + Profile Viewer | whylogs + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html new file mode 100644 index 0000000000..a30cc99b8a --- /dev/null +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html @@ -0,0 +1,474 @@ + + + + + + + + + + Profile Viewer | whylogs + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in.html new file mode 100644 index 0000000000..9b2f575e29 --- /dev/null +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in.html @@ -0,0 +1,2822 @@ + + + + + + + + + + Profile Viewer | whylogs + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/whylogs/viewer/index-hbs-library-all-in.html b/src/whylogs/viewer/templates/index-hbs-library-all-in.html similarity index 100% rename from src/whylogs/viewer/index-hbs-library-all-in.html rename to src/whylogs/viewer/templates/index-hbs-library-all-in.html diff --git a/src/whylogs/viewer/index-hbs.html b/src/whylogs/viewer/templates/index-hbs.html similarity index 100% rename from src/whylogs/viewer/index-hbs.html rename to src/whylogs/viewer/templates/index-hbs.html diff --git a/src/whylogs/viewer/templates/index.html b/src/whylogs/viewer/templates/index.html new file mode 100644 index 0000000000..4c48006db2 --- /dev/null +++ b/src/whylogs/viewer/templates/index.html @@ -0,0 +1,370 @@ + + + + + + + + + + Profile Viewer | whylogs + + + + + + + + +
    + +
    +
    + +
    +
    +
    +
    +
    +
    +
    +

    Clickable test feature

    +
    +
    + +
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    Compare profile to reference profile, such as training data
    +
    +
    +
    + + +
    +
    +
    +
    +
    +
    +
    +
    Reference profile
    +
    +
    +
    +
    +
    +
    + + +
    +
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    + +
    +
    +
    +

    58

    +

    Summary

    +
    +
    +

    54

    +

    Summary

    +
    +
    +

    131

    +

    Summary

    +
    +
    +
    +
    +
    +
    +
    +
    Feature
    +
    Reference
    +
    +
    Diff from ref
    +
    Frequent items
    +
    Inf. feature type
    +
    Total count
    +
    Null fraction
    +
    Est. unique values
    +
    Data type
    +
    Data type count
    +
    Mean
    +
    Std. dev
    +
    Min
    +
    First quantile
    +
    Median
    +
    Third quantile
    +
    Max
    +
    +
    +
      +
      +
      +
      +

      Please select and load a JSON profile from your local drive.

      +
      +
      + +
      +
      +
      + + + + + + + + + + +
      Item
      +
      +
      +
      +
      +
      +
      +
      +
      +

      Hold on! :)

      +

      + It looks like your current screen size or device is not yet supported by the WhyLabs Sandbox. The Sandbox is + best experienced on a desktop computer. Please try maximizing this window or switching to another device. We + are working on adding support for a larger variety of devices. +

      +
      +
      + + + + + + + diff --git a/src/whylogs/viz/__init__.py b/src/whylogs/viz/__init__.py index a536267476..0a6959984f 100644 --- a/src/whylogs/viz/__init__.py +++ b/src/whylogs/viz/__init__.py @@ -1,4 +1,5 @@ from .browser_viz import profile_viewer +from .jupyter_notebook_viz import NotebookProfileViewer from .visualizer import BaseProfileVisualizer, ProfileVisualizer -__ALL__ = [ProfileVisualizer, BaseProfileVisualizer, profile_viewer] +__ALL__ = [ProfileVisualizer, BaseProfileVisualizer, profile_viewer, NotebookProfileViewer] diff --git a/src/whylogs/viz/browser_viz.py b/src/whylogs/viz/browser_viz.py index a59244b5cf..5c48efa33f 100644 --- a/src/whylogs/viz/browser_viz.py +++ b/src/whylogs/viz/browser_viz.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) -def profile_viewer(profiles: List[DatasetProfile] = None, output_path=None) -> str: +def profile_viewer(profiles: List[DatasetProfile] = None, reference_profiles: List[DatasetProfile] = None, output_path=None) -> str: """ open a profile viewer loader on your default browser """ @@ -23,7 +23,7 @@ def profile_viewer(profiles: List[DatasetProfile] = None, output_path=None) -> s logger.debug(str(e)) logger.debug("Unable to load pybars; install pybars3 to load profile from directly from the current session ") - index_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer", "index.html")) + index_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer/templates", "index.html")) webbrowser.open_new_tab(f"file:{index_path}#") return None @@ -32,12 +32,15 @@ def profile_viewer(profiles: List[DatasetProfile] = None, output_path=None) -> s if len(profiles) > 1: logger.warning("More than one profile not implemented yet, default to first profile in the list ") profile_jsons = [message_to_json(each_prof.to_summary()) for each_prof in profiles] + if reference_profiles: + reference_profile_jsons = [message_to_json(each_prof.to_summary()) for each_prof in reference_profiles] + else: - index_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer", "index.html")) + index_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer/templates", "index.html")) webbrowser.open_new_tab(f"file:{index_path}#") return None - index_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer", "index-hbs-cdn-all-in.html")) + index_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer/templates", "index-hbs-cdn-all-in.html")) with open(index_path, "r") as file_with_template: source = file_with_template.read() @@ -46,7 +49,10 @@ def profile_viewer(profiles: List[DatasetProfile] = None, output_path=None) -> s compiler = Compiler() template = compiler.compile(source) # replace handlebars for json profiles - output_index = template({"profile_from_whylogs": profile_jsons[0]}) + if reference_profiles: + output_index = template({"profile_from_whylogs": profile_jsons[0], "reference_profile": reference_profile_jsons[0]}) + else: + output_index = template({"profile_from_whylogs": profile_jsons[0]}) if not output_path: output_path = tempfile.mkstemp(suffix=".html")[1] diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py new file mode 100644 index 0000000000..e46c167fed --- /dev/null +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -0,0 +1,148 @@ +import html +import json +import logging +import os + +from IPython.core.display import HTML + +from whylogs.core import DatasetProfile +from whylogs.proto import InferredType +from whylogs.util.protobuf import message_to_json + +from .utils.profile_viz_calculations import ( + add_drift_val_to_ref_profile_json, + add_feature_statistics, +) + +_MY_DIR = os.path.realpath(os.path.dirname(__file__)) + +logger = logging.getLogger(__name__) + +numerical_types = (InferredType.Type.INTEGRAL, InferredType.Type.FRACTIONAL) + + +class NotebookProfileViewer: + SUMMARY_REPORT_TEMPLATE_NAME = "index-hbs-cdn-all-in-for-jupyter-notebook.html" + DOUBLE_HISTOGRAM_TEMPLATE_NAME = "index-hbs-cdn-all-in-jupyter-distribution-chart.html" + DISTRIBUTION_CHART_TEMPLATE_NAME = "index-hbs-cdn-all-in-jupyter-bar-chart.html" + DIFFERENCED_CHART_TEMPLATE_NAME = "index-hbs-cdn-all-in-jupyter-differenced-chart.html" + FEATURE_STATISTICS_TEMPLATE_NAME = "index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html" + CONSTRAINTS_REPORT_TEMPLATE_NAME = "index-hbs-cdn-all-in-jupyter-constraints-report.html" + PAGE_SIZES = { + SUMMARY_REPORT_TEMPLATE_NAME: "1000px", + DOUBLE_HISTOGRAM_TEMPLATE_NAME: "300px", + DISTRIBUTION_CHART_TEMPLATE_NAME: "277px", + DIFFERENCED_CHART_TEMPLATE_NAME: "277px", + FEATURE_STATISTICS_TEMPLATE_NAME: "650px", + CONSTRAINTS_REPORT_TEMPLATE_NAME: "750PX", + } + + def __get_template_path(self, html_file_name): + template_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer/templates", html_file_name)) + return template_path + + def __get_compiled_template(self, template_name): + template_path = self.__get_template_path(template_name) + try: + from pybars import Compiler + except ImportError as e: + Compiler = None + logger.debug(str(e)) + logger.debug("Unable to load pybars; install pybars3 to load profile from directly from the current session ") + with open(template_path, "r") as file_with_template: + source = file_with_template.read() + compiler = Compiler() + template = compiler.compile(source) + return template + + def __display_feature_chart(self, feature_names, template_name, preferred_cell_height=None): + if type(feature_names) is not list: + feature_names = [feature_names] + template = self.__get_compiled_template(template_name) + if self._reference_profile: + target_profile_columns = json.loads(self._target_profile_json).get("columns") + reference_profile_columns = json.loads(self._reference_profile_json).get("columns") + target_profile_features, reference_profile_features = {}, {} + for feature_name in feature_names: + target_profile_features[feature_name] = target_profile_columns.get(feature_name) + reference_profile_features[feature_name] = reference_profile_columns.get(feature_name) + distribution_chart = template( + {"profile_from_whylogs": json.dumps(target_profile_features), "reference_profile_from_whylogs": json.dumps(reference_profile_features)} + ) + return self.__display_rendered_template(distribution_chart, template_name, preferred_cell_height) + else: + logger.warning("This method has to get both target and reference profiles, with valid feature title") + return None + + def __display_rendered_template(self, template, template_name, height): + if not height: + height = self.PAGE_SIZES[template_name] + iframe = f"""
      """ + return HTML(iframe) + + def set_profiles(self, target_profile: DatasetProfile = None, reference_profile: DatasetProfile = None): + self._target_profile = target_profile + self._reference_profile = reference_profile + if self._target_profile: + self._target_profile_json = message_to_json(self._target_profile.to_summary()) + if self._reference_profile: + self._reference_profile_json = message_to_json(self._reference_profile.to_summary()) + + def summary_drift_report(self, preferred_cell_height=None): + reference_profile = add_drift_val_to_ref_profile_json(self._target_profile, self._reference_profile, json.loads(self._reference_profile_json)) + template = self.__get_compiled_template(self.SUMMARY_REPORT_TEMPLATE_NAME) + profiles_summary = {"profile_from_whylogs": self._target_profile_json} + if self._reference_profile: + profiles_summary["reference_profile_from_whylogs"] = json.dumps(reference_profile) + return self.__display_rendered_template(template(profiles_summary), self.SUMMARY_REPORT_TEMPLATE_NAME, preferred_cell_height) + + def double_histogram(self, feature_names, preferred_cell_height=None): + return self.__display_feature_chart(feature_names, self.DOUBLE_HISTOGRAM_TEMPLATE_NAME, preferred_cell_height) + + def distribution_chart(self, feature_names, preferred_cell_height=None): + return self.__display_feature_chart(feature_names, self.DISTRIBUTION_CHART_TEMPLATE_NAME, preferred_cell_height) + + def difference_distribution_chart(self, feature_names, preferred_cell_height=None): + return self.__display_feature_chart(feature_names, self.DIFFERENCED_CHART_TEMPLATE_NAME, preferred_cell_height) + + def feature_statistics(self, feature_name, profile="reference", preferred_cell_height=None): + template = self.__get_compiled_template(self.FEATURE_STATISTICS_TEMPLATE_NAME) + if self._reference_profile and profile.lower() == "reference": + selected_profile_json = self._reference_profile_json + selected_profile = self._reference_profile.columns + else: + selected_profile_json = self._target_profile_json + selected_profile = self._target_profile.columns + if selected_profile.get(feature_name).schema_tracker.to_summary().inferred_type.type in numerical_types: + rendered_template = template( + { + "profile_feature_statistics_from_whylogs": json.dumps( + add_feature_statistics(selected_profile.get(feature_name), selected_profile_json, feature_name) + ) + } + ) + return self.__display_rendered_template(rendered_template, self.FEATURE_STATISTICS_TEMPLATE_NAME, preferred_cell_height) + else: + logger.warning("Quantile and descriptive statistics can be calculated for numerical features only!") + return None + + def constraints_report(self, constraints, preferred_cell_height=None): + template = self.__get_compiled_template(self.CONSTRAINTS_REPORT_TEMPLATE_NAME) + rendered_template = template({"constraints_report": json.dumps(constraints.report())}) + return self.__display_rendered_template(rendered_template, self.CONSTRAINTS_REPORT_TEMPLATE_NAME, preferred_cell_height) + + def download(self, html, preferred_path=None, html_file_name=None): + if not html_file_name: + if self._reference_profile: + html_file_name = self._reference_profile.dataset_timestamp + else: + html_file_name = self._target_profile.dataset_timestamp + if preferred_path: + path = os.path.join(os.path.expanduser(preferred_path), str(html_file_name) + ".html") + else: + path = os.path.join(os.pardir, "html_reports", str(html_file_name) + ".html") + full_path = os.path.abspath(path) + with open(full_path, "w") as saved_html: + saved_html.write(html.data) + saved_html.close() + return None diff --git a/src/whylogs/viz/utils/__init__.py b/src/whylogs/viz/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/whylogs/viz/utils/profile_viz_calculations.py b/src/whylogs/viz/utils/profile_viz_calculations.py new file mode 100644 index 0000000000..7d1f1d5a9d --- /dev/null +++ b/src/whylogs/viz/utils/profile_viz_calculations.py @@ -0,0 +1,180 @@ +import json + +from whylogs.core.summaryconverters import ( + compute_chi_squared_test_p_value, + ks_test_compute_p_value, + single_quantile_from_sketch, +) +from whylogs.proto import InferredType, ReferenceDistributionDiscreteMessage + +categorical_types = (InferredType.Type.INTEGRAL, InferredType.Type.STRING, InferredType.Type.BOOLEAN) + + +def __calculate_variance(profile_jsons, feature_name): + """ + Calculates variance for single feature + + Parameters + ---------- + profile_jsons: Profile summary serialized json + feature_name: Name of feature + + Returns + ------- + variance : Calculated variance for feature + """ + feature = profile_jsons.get("columns").get(feature_name) + variance = feature.get("numberSummary").get("stddev") ** 2 if feature.get("numberSummary") is not None else 0 + return variance + + +def __calculate_coefficient_of_variation(profile_jsons, feature_name): + """ + Calculates coefficient of variation for single feature + + Parameters + ---------- + profile_jsons: Profile summary serialized json + feature_name: Name of feature + + Returns + ------- + coefficient_of_variation : Calculated coefficient of variation for feature + """ + feature = profile_jsons.get("columns").get(feature_name) + coefficient_of_variation = ( + feature.get("numberSummary").get("stddev") / feature.get("numberSummary").get("mean") if feature.get("numberSummary") is not None else 0 + ) + return coefficient_of_variation + + +def __calculate_sum(profile_jsons, feature_name): + """ + Calculates sum for single feature + + Parameters + ---------- + profile_jsons: Profile summary serialized json + feature_name: Name of feature + + Returns + ------- + coefficient_of_variation : Calculated sum for feature + """ + feature = profile_jsons.get("columns").get(feature_name) + feature_number_summary = feature.get("numberSummary") + if feature_number_summary: + sum = feature_number_summary.get("mean") * int(feature.get("counters").get("count")) + else: + sum = 0 + return sum + + +def __calculate_quantile_statistics(feature, profile_jsons, feature_name): + """ + Calculates sum for single feature + + Parameters + ---------- + profile_jsons: Profile summary serialized json + feature_name: Name of feature + + Returns + ------- + coefficient_of_variation : Calculated sum for feature + """ + quantile_statistics = {} + feature_number_summary = profile_jsons.get("columns").get(feature_name).get("numberSummary") + if feature.number_tracker and feature.number_tracker.histogram.get_n() > 0: + kll_sketch = feature.number_tracker.histogram + quantile_statistics["fifth_percentile"] = single_quantile_from_sketch(kll_sketch, quantile=0.05).quantile + quantile_statistics["q1"] = single_quantile_from_sketch(kll_sketch, quantile=0.25).quantile + quantile_statistics["median"] = single_quantile_from_sketch(kll_sketch, quantile=0.5).quantile + quantile_statistics["q3"] = single_quantile_from_sketch(kll_sketch, quantile=0.75).quantile + quantile_statistics["ninety_fifth_percentile"] = single_quantile_from_sketch(kll_sketch, quantile=0.95).quantile + quantile_statistics["range"] = feature_number_summary.get("max") - feature_number_summary.get("min") + quantile_statistics["iqr"] = quantile_statistics["q3"] - quantile_statistics["q1"] + return quantile_statistics + + +def add_drift_val_to_ref_profile_json(target_profile, reference_profile, reference_profile_json): + """ + Calculates drift value for reference profile based on profile type and inserts that data into reference profile + + Parameters + ---------- + target_profile: Target profile + reference_profile: Reference profile + reference_profile_json: Reference profile summary serialized json + + Returns + ------- + reference_profile_json : Reference profile summary serialized json with drift value for every feature + """ + observations = 0 + missing_cells = 0 + total_count = 0 + for target_col_name in target_profile.columns.keys(): + target_col = target_profile.columns[target_col_name] + observations += target_col.counters.to_protobuf().count + null_count = target_col.to_summary().counters.null_count.value + missing_cells += null_count if null_count else 0 + total_count += target_col.to_summary().counters.count + + if target_col_name in reference_profile.columns: + ref_col = reference_profile.columns[target_col_name] + target_type = target_col.schema_tracker.to_summary().inferred_type.type + ref_type = ref_col.schema_tracker.to_summary().inferred_type.type + if all([type == InferredType.FRACTIONAL for type in (ref_type, target_type)]): + target_kll_sketch = target_col.number_tracker.histogram + reference_kll_sketch = ref_col.number_tracker.histogram + ks_p_value = ks_test_compute_p_value(target_kll_sketch, reference_kll_sketch) + reference_profile_json["columns"][target_col_name]["drift_from_ref"] = ks_p_value.ks_test + elif all([type in categorical_types for type in (ref_type, target_type)]) and ref_type == target_type: + target_frequent_items_sketch = target_col.frequent_items + reference_frequent_items_sketch = ref_col.frequent_items + if any([msg.to_summary() is None for msg in (target_frequent_items_sketch, reference_frequent_items_sketch)]): + continue + target_total_count = target_col.counters.count + target_message = ReferenceDistributionDiscreteMessage(frequent_items=target_frequent_items_sketch.to_summary(), total_count=target_total_count) + ref_total_count = ref_col.counters.count + + reference_message = ReferenceDistributionDiscreteMessage( + frequent_items=reference_frequent_items_sketch.to_summary(), total_count=ref_total_count + ) + chi_squared_p_value = compute_chi_squared_test_p_value(target_message, reference_message) + if chi_squared_p_value.chi_squared_test: + reference_profile_json["columns"][target_col_name]["drift_from_ref"] = chi_squared_p_value.chi_squared_test + else: + reference_profile_json["columns"][target_col_name]["drift_from_ref"] = None + reference_profile_json["properties"]["observations"] = observations + reference_profile_json["properties"]["missing_cells"] = missing_cells + reference_profile_json["properties"]["total_count"] = total_count + reference_profile_json["properties"]["missing_percentage"] = (missing_cells / total_count) * 100 if total_count else 0 + + return reference_profile_json + + +def add_feature_statistics(feature, profile_json, feature_name): + """ + Calculates different values for feature statistics + + Parameters + ---------- + feature: + profile_json: Profile summary serialized json + feature_name: Name of feature + + Returns + ------- + feature: Feature data with appended values for statistics report + """ + profile_features = json.loads(profile_json) + feature_with_statistics = {} + feature_with_statistics["properties"] = profile_features.get("properties") + feature_with_statistics[feature_name] = profile_features.get("columns").get(feature_name) + feature_with_statistics[feature_name]["sum"] = __calculate_sum(profile_features, feature_name) + feature_with_statistics[feature_name]["variance"] = __calculate_variance(profile_features, feature_name) + feature_with_statistics[feature_name]["coefficient_of_variation"] = __calculate_coefficient_of_variation(profile_features, feature_name) + feature_with_statistics[feature_name]["quantile_statistics"] = __calculate_quantile_statistics(feature, profile_features, feature_name) + return feature_with_statistics diff --git a/tests/unit/viz/test_jupyter_notebook_viz.py b/tests/unit/viz/test_jupyter_notebook_viz.py new file mode 100644 index 0000000000..9a4b06b959 --- /dev/null +++ b/tests/unit/viz/test_jupyter_notebook_viz.py @@ -0,0 +1,219 @@ +import datetime +import os + +import numpy as np +import pandas as pd + +from whylogs import get_or_create_session +from whylogs.core.statistics.constraints import ( + DatasetConstraints, + Op, + SummaryConstraint, + ValueConstraint, + columnPairValuesInSetConstraint, + columnsMatchSetConstraint, + columnValuesInSetConstraint, + columnValuesUniqueWithinRow, + sumOfRowValuesOfMultipleColumnsEqualsConstraint, +) +from whylogs.viz import NotebookProfileViewer + + +def __generate_target_profile(): + + session = get_or_create_session() + + with session.logger("mytestytest", dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger: + for _ in range(5): + logger.log({"uniform_integers": np.random.randint(0, 50)}) + logger.log({"nulls": None}) + + return logger.profile + + +def __generate_reference_profile(): + + session = get_or_create_session() + + with session.logger("mytestytest", dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger: + for _ in range(5): + logger.log({"uniform_integers": np.random.randint(0, 50)}) + logger.log({"nulls": None}) + + return logger.profile + + +def __generate_categorical_target_profile(): + session = get_or_create_session() + credit_cards = pd.DataFrame( + [ + {"credit_card": "3714-496353-98431"}, + {"credit_card": "3787 344936 71000"}, + {"credit_card": "3056 930902 5904"}, + {"credit_card": "3065 133242 2899"}, + ] + ) + return session.log_dataframe(credit_cards, "test.data") + + +def __generate_categorical_reference_profile(): + session = get_or_create_session() + credit_cards = pd.DataFrame( + [ + {"credit_card": "6011 1111 1111 1117"}, + {"credit_card": "6011-0009-9013-9424"}, + {"credit_card": "3530 1113 3330 0000"}, + {"credit_card": "3566-0020-2036-0505"}, + ] + ) + return session.log_dataframe(credit_cards, "test.data") + + +def _get_sample_dataset_constraints(): + cvisc = columnValuesInSetConstraint(value_set={2, 5, 8}) + ltc = ValueConstraint(Op.LT, 1) + + min_gt_constraint = SummaryConstraint("min", Op.GT, value=100) + max_le_constraint = SummaryConstraint("max", Op.LE, value=5) + + set1 = set(["col1", "col2"]) + columns_match_constraint = columnsMatchSetConstraint(set1) + + val_set = {(1, 2), (3, 5)} + col_set = ["A", "B"] + mcv_constraints = [ + columnValuesUniqueWithinRow(column_A="A", verbose=True), + columnPairValuesInSetConstraint(column_A="A", column_B="B", value_set=val_set), + sumOfRowValuesOfMultipleColumnsEqualsConstraint(columns=col_set, value=100), + ] + + return DatasetConstraints( + None, + value_constraints={"annual_inc": [cvisc, ltc]}, + summary_constraints={"annual_inc": [max_le_constraint, min_gt_constraint]}, + table_shape_constraints=[columns_match_constraint], + multi_column_value_constraints=mcv_constraints, + ) + + +def test_notebook_profile_viewer_set_profiles(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + + +def test_summary_drift_report_without_preferred_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.summary_drift_report() + + +def test_summary_drift_report_with_preferred_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.summary_drift_report() + + +def test_feature_statistics_not_passing_profile_type(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.feature_statistics("uniform_integers") + + +def test_feature_statistics_passing_profile_type(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.feature_statistics("uniform_integers", "target") + + +def test_feature_statistics_passing_profile_type_and_prefered_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.feature_statistics("uniform_integers", "target", "1000px") + + +def test_download_passing_all_arguments(tmpdir): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + + download = viz.download(viz.summary_drift_report(), tmpdir, html_file_name="foo") + assert os.path.exists(tmpdir + "/foo.html") + + +def test_constraints_report_without_preferred_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + dc = _get_sample_dataset_constraints() + viz.constraints_report(dc) + + +def test_constraints_report_with_preferred_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + dc = _get_sample_dataset_constraints() + viz.constraints_report(dc, preferred_cell_height="1000px") + + +def test_double_histogram_without_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.double_histogram("uniform_integers") + + +def test_double_histogram_with_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.double_histogram("uniform_integers", "1000px") + + +def test_distribution_chart_without_height(): + target_profile = __generate_categorical_target_profile() + reference_profile = __generate_categorical_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.distribution_chart("credit_card") + + +def test_distribution_chart_with_height(): + target_profile = __generate_categorical_target_profile() + reference_profile = __generate_categorical_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.distribution_chart("credit_card", "1000px") + + +def test_difference_distribution_chart_without_height(): + target_profile = __generate_categorical_target_profile() + reference_profile = __generate_categorical_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.difference_distribution_chart("credit_card") + + +def test_difference_distribution_chart_with_height(): + target_profile = __generate_categorical_target_profile() + reference_profile = __generate_categorical_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.difference_distribution_chart("credit_card", "1000px")