From a89ecf2699e562aa79c61eebee3110a94f6ea448 Mon Sep 17 00:00:00 2001 From: Perch Date: Thu, 23 Dec 2021 23:25:13 +0400 Subject: [PATCH 001/119] visualizer new design, add reference profile in profile viewer --- examples/Dataset_Profiler_Viewer.ipynb | 89 +- src/whylogs/viewer/index-hbs-cdn-all-in.html | 802 +++++++++++++++---- src/whylogs/viz/browser_viz.py | 22 +- 3 files changed, 714 insertions(+), 199 deletions(-) diff --git a/examples/Dataset_Profiler_Viewer.ipynb b/examples/Dataset_Profiler_Viewer.ipynb index 48b2c8809b..768ba8de08 100644 --- a/examples/Dataset_Profiler_Viewer.ipynb +++ b/examples/Dataset_Profiler_Viewer.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "1f0ab34a", "metadata": {}, "outputs": [], @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "fc04cc42", "metadata": {}, "outputs": [], @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 4, "id": "7ebc0b5f", "metadata": {}, "outputs": [], @@ -86,22 +86,24 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "2e018520", "metadata": {}, "outputs": [], "source": [ "session = get_or_create_session()\n", - "with session.logger(\"mytestytest\",dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger:\n", - " for _ in range(500):\n", - " logger.log({\"uniform_integers\": np.random.randint(0,50)})\n", - " logger.log({\"strings\": fake.name()})\n", - " logger.log({\"mixture_distribution\": np.random.choice(distribution, 1)[0]})\n", - " logger.log({\"nulls\": None})\n", - " logger.log({\"moah_data\":1})\n", - " logger.log({\"moah_data\":1})\n", - " logger.log({\"moah_data\":5})\n", - " profile=logger.profile" + "def profile_generator():\n", + " with session.logger(\"mytestytest\",dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger:\n", + " for _ in range(500):\n", + " logger.log({\"uniform_integers\": np.random.randint(0,50)})\n", + " logger.log({\"strings\": fake.name()})\n", + " logger.log({\"mixture_distribution\": np.random.choice(distribution, 1)[0]})\n", + " logger.log({\"nulls\": None})\n", + " logger.log({\"moah_data\":1})\n", + " logger.log({\"moah_data\":1})\n", + " logger.log({\"moah_data\":5})\n", + " return logger.profile\n", + "profile=profile_generator()" ] }, { @@ -123,29 +125,54 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "e293634d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'/var/folders/pr/f715zv8x17b1v5vwydgv2gq40000gq/T/tmpabxku502.html'" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "profile_viewer(profiles=[profile], output_path=None)" ] }, + { + "cell_type": "markdown", + "id": "e9cebe5c", + "metadata": {}, + "source": [ + "### Logging reference profile" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d36563e9", + "metadata": {}, + "outputs": [], + "source": [ + "reference_profile=profile_generator() " + ] + }, + { + "cell_type": "markdown", + "id": "56dcaef9", + "metadata": {}, + "source": [ + "### Add reference profile to viewer and open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a4eb38c", + "metadata": {}, + "outputs": [], + "source": [ + "profile_viewer(profiles=[profile], reference_profiles=[reference_profile], output_path=None,)" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "d09732c5", + "id": "1b1d89e2", "metadata": {}, "outputs": [], "source": [] @@ -153,9 +180,9 @@ ], "metadata": { "kernelspec": { - "display_name": "whylogs-dev", + "display_name": "Python 3", "language": "python", - "name": "whylogs-dev" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -167,7 +194,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.8.8" } }, "nbformat": 4, diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in.html b/src/whylogs/viewer/index-hbs-cdn-all-in.html index 40141b4c37..e18aca1989 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in.html @@ -327,7 +327,6 @@ .wl-table-cell, .wl-table-head { - border-right: 1px solid var(--brandSecondary200); border-bottom: 1px solid var(--brandSecondary200); display: table-cell; padding: 12px 18px; @@ -438,7 +437,7 @@ } .wl-property-panel__chart--single { - padding-bottom: 30px; + padding-bottom: 17px; } .wl-property-panel__chart-title { @@ -666,6 +665,246 @@ color: var(--brandSecondary900); line-height: 1.5; } + + .wl__unlock-the-power { + padding: 15px; + position: fixed; + bottom: 0; + left: 0; + z-index: 999999; + background: #F1F6F6; + margin-bottom: 0 !important; + display: flex; + flex-direction: column; + align-items: center; + -webkit-box-shadow: 0px -6px 13px 0px rgba(0,0,0,0.32); + box-shadow: 0px 0px 15px rgba(0, 0, 0, 0.05); + width: var(--SIDE-PANEL-WIDTH); + } + + .wl__btn-signup { + font-size: 13px; + } + + .wl__close-icon { + width: 15px; + height: 15px; + } + + .wl__close-icon img{ + width: 100%; + height: 100%; + cursor: pointer; + } + + .count-color { + color: black; + } + + .wl__burger_icon { + width: 18px; + height: 16px; + } + + .wl__burger_icon img{ + width: 100%; + height: 100%; + cursor: pointer; + } + + .space-between { + display: flex; + justify-content: space-between; + } + + .align-items { + display: flex; + align-items: center; + } + + .display-flex{ + display: flex; + } + + .feature-count-title { + font-size: 22px; + } + + .wl_filter-options { + background: rgba(255, 255, 255, 0.5); + border: 1px solid #DBE5E7; + box-sizing: border-box; + border-radius: 4px; + padding: 10px; + } + + .form-check-input:checked { + background-color: #0E7384; + border-color: #0E7384; + } + + .form-check-input[type=checkbox] { + border-radius: 1.25em; + } + + .not-clickable { + pointer-events: none; + } + + .wl_arrow-icon { + width: 15px; + height: 15px; + } + + .arrow-icon-container { + height: 100%; + cursor: pointer; + } + + .wl_list-item-dot { + background: #b0d2d7; + width: 8px; + height: 8px; + border-radius: 50px; + } + + .wl_filter-list-item { + display: flex; + align-items: center; + } + + .wl_filter-list-item>span{ + padding-left: 15px; + } + + .table-border-none { + border: none; + } + + .clickable-test-feature-wrap { + background: #F8FAFB; + } + + .clickable-test-feature-heading { + display: flex; + flex-direction: column; + } + + .clickable-test-feature-heading-wrap { + padding: 35px; + padding-bottom: 0; + border-bottom: 2px solid #EBF2F3; + } + + .pages-button-wrap { + display: flex; + align-items: flex-end; + } + + .page-button-wrap { + padding-right: 60px; + } + + .page-button{ + border: none; + background: no-repeat; + color: #6C757D; + font-weight: 600; + font-size: 16px; + letter-spacing: -0.01em; + padding-bottom: 25px; + } + + .activ-pages-button{ + color: #369BAC !important; + border-bottom: 4px solid #369BAC !important; + padding-bottom: 21px !important; + } + + .title p { + font-weight: 600; + font-size: 24px; + color: #313B3D; + } + + .info p { + font-weight: 600; + font-size: 12px; + color: #313B3D; + } + + .info div { + font-weight: 600; + font-size: 18px; + line-height: 20px; + color: #0E7384; + } + + .chart, .info { + display: flex; + flex-direction: column; + align-items: flex-end; + padding-right: 20px; + } + + .info:last-child { + padding: 0; + } + + .clickable-test-feature-body { + display: flex; + flex-direction: column; + } + + .chart-box-wrap { + display: flex; + justify-content: center; + } + + .chart-box { + width: 75%; + height: 310px; + border: 2px solid #EBF2F3; + background: #FFF; + border-radius: 4px; + } + + .chart-info-wrap { + display: flex; + align-items: flex-end; + } + + .chart-info { + display: flex; + align-items: flex-end; + } + + + @media only screen and (min-width: 1350px) { + .clickable-test-feature-body { + display: flex; + flex-direction: row; + flex-wrap: wrap; + justify-content: center; + row-gap: 20px; + column-gap: 20px; + } + + .chart-box-wrap { + width: 45%; + display: flex; + justify-content: center; + margin-bottom: 0 !important; + } + + .chart-box { + width: 100%; + height: 310px; + border: 2px solid #EBF2F3; + background: #FFF; + border-radius: 4px; + } + } @@ -689,12 +928,12 @@
Profile viewer for whylogs
rel="noreferrer noopener" > @@ -716,98 +955,163 @@
Profile viewer for whylogs
-
+ -
+
- - + + +
+
+
+ +
+
+ +
+
@@ -796,9 +907,14 @@

Hold on! :)

target.innerHTML = html; } + + function initWebsiteScripts() { const $featureSearch = document.getElementById("wl__feature-search"); const $tableBody = document.getElementById("table-body"); + const $discrete = document.getElementById("inferredDiscrete"); + const $nonDiscrete = document.getElementById("inferredNonDiscrete"); + const $unknown = document.getElementById("inferredUnknown"); const activeTypes = { discrete: true, @@ -808,6 +924,8 @@

Hold on! :)

const getReferenceProfile = () => { return {{{reference_profile}}} } + let searchString = ""; + function debounce(func, wait, immediate) { let timeout; @@ -826,6 +944,17 @@

Hold on! :)

}; } + function filterNotification() { + const $notifCircleContainer = $(".notif-circle-container") + const $boxes = $('.wl_filter-options>.form-check>input[name=checkbox]:checked'); + const item = Object.values($boxes).find(function(value) { return $(value)[0] === undefined}); + if (item === undefined) { + $notifCircleContainer.removeClass("d-none") + } else { + $notifCircleContainer.addClass("d-none") + } + } + function handleSearch() { const tableBodyChildren = $tableBody.children; @@ -849,6 +978,36 @@

Hold on! :)

}, 100), ); + $discrete.addEventListener("change", (event) => { + if (event.currentTarget.checked) { + activeTypes["discrete"] = true; + } else { + activeTypes["discrete"] = false; + } + handleSearch(); + filterNotification() + }); + + $nonDiscrete.addEventListener("change", (event) => { + if (event.currentTarget.checked) { + activeTypes["non-discrete"] = true; + } else { + activeTypes["non-discrete"] = false; + } + handleSearch(); + filterNotification() + }); + + $unknown.addEventListener("change", (event) => { + if (event.currentTarget.checked) { + activeTypes["unknown"] = true; + } else { + activeTypes["unknown"] = false; + } + handleSearch(); + filterNotification() + }); + function checkCurrentProfile(item, referenceItem) { const refData = getReferenceProfile() if (refData && Object.values(refData)) { @@ -868,6 +1027,20 @@

Hold on! :)

} } + function openFilter() { + const $filterOptions = $(".dropdown-container"); + const filterClass = $filterOptions.attr("class"); + + if (filterClass.indexOf("d-none") > 0) { + $filterOptions.removeClass("d-none"); + $(".filter-icon").addClass("d-none") + $(".close-filter-icon").removeClass("d-none") + } else { + $filterOptions.addClass("d-none"); + $(".close-filter-icon").addClass("d-none") + $(".filter-icon").removeClass("d-none") + } + } // Invoke functions -- keep in mind invokation order registerHandlebarHelperFunctions(); From 57534b315dba6431a92540068215c35c13878397 Mon Sep 17 00:00:00 2001 From: Perch2005 Date: Mon, 31 Jan 2022 23:41:17 +0300 Subject: [PATCH 053/119] add ability to add multiple features in double histogram. --- ...cdn-all-in-jupyter-distribution-chart.html | 77 +++++++++++++++---- 1 file changed, 63 insertions(+), 14 deletions(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html index 226584f099..8606fe763f 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html @@ -24,9 +24,6 @@ > @@ -203,45 +170,37 @@
-
- Profile Summary: {{getProfileName this}}_{{getProfileTimeStamp this}} -
-
-
-
Observations
-
{{{observations}}}
-
-
-
Missing Cells
-
- {{{missingCells}}} -
(8.1%)
-
+
+ Profile Summary: {{getProfileName this}}_{{getProfileTimeStamp this}} +
+
+
+
Observations
+
{{{observations}}}
+
+
+
Missing Cells
+
+ {{{missingCells}}} +
(8.1%)
-
-
Duplicate Rows
-
- {{{duplicateRows}}} -
(0%)
-
+
+
+
Duplicate Rows
+
+ {{{duplicateRows}}} +
(0%)
-
-
Total Size
- {{{totalSize}}} -
-
-
Average record size
- {{{averageRecordSize}}} -
-
-
- -
-
-
32
-
-
-
+
+
+
Total Size
+ {{{totalSize}}} +
+
+
Average record size
+ {{{averageRecordSize}}} +
+
@@ -344,7 +303,6 @@

Hold on! :)

return formatLabelDate(+properties.properties.dataTimestamp) }); - Handlebars.registerHelper("getProfileName", function (properties) { return properties.properties.tags.name }); @@ -363,13 +321,8 @@

Hold on! :)

target.innerHTML = html; } - function initWebsiteScripts() { - // Target HTML elements - } - // Invoke functions -- keep in mind invokation order registerHandlebarHelperFunctions(); initHandlebarsTemplate(); - initWebsiteScripts(); From 36c13acc7ae387d836649bc1843621566216e626 Mon Sep 17 00:00:00 2001 From: Perch2005 Date: Wed, 2 Feb 2022 16:47:59 +0300 Subject: [PATCH 057/119] remove the red color of statistics --- ...dex-hbs-cdn-all-in-jupyter-feature-summary-statistics.html | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html index 30367ad622..fb8973ccad 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html @@ -112,10 +112,6 @@ width: 49% ; } - .statistic-item:nth-child(1) > .statistic-number-title, .statistic-item:nth-child(1) > .statistic-number { - color: #E4593A; - } - .notif-circle-container{ position: absolute; top: -8px; From 7caf36e88011512a8cbe23ecf5f8a0e99b1ac078 Mon Sep 17 00:00:00 2001 From: Perch2005 Date: Wed, 2 Feb 2022 16:49:35 +0300 Subject: [PATCH 058/119] code cleaning --- ...x-hbs-cdn-all-in-for-jupyter-notebook.html | 403 ++++++++++-------- ...cdn-all-in-jupyter-distribution-chart.html | 4 +- ...in-jupyter-feature-summary-statistics.html | 269 ++++++------ 3 files changed, 349 insertions(+), 327 deletions(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html index 3899ea314d..110b92b5ad 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html @@ -273,68 +273,68 @@ justify-content: space-between; } - .align-items { - display: flex; - align-items: center; - } - - .display-flex{ - display: flex; - } - - .table-border-none { - padding: 0; - border: none; - } - - .flex-direction-colum { - display: flex; - flex-direction: column; - } - - .align-items { - align-items: center; - } - - .search-input{ - padding-top: 0 !important; - padding-bottom: 0 !important; - } - - .search-input input{ - border: none; - background: none; - outline: none; - height: 40px; - width: 100%; - font-size: 14px; - } - - .search-input img{ - height: 19px; - pointer-events: none; - } - - input::placeholder { - color: var(--secondaryLight1000); - } - - .bar.positive { - fill: #369BACB2; - } - - .bar.negative { - fill: #2683C9E5; - } + .align-items { + display: flex; + align-items: center; + } - .text-align-center { - text-align: center; - } + .display-flex{ + display: flex; + } + + .table-border-none { + padding: 0; + border: none; + } + + .flex-direction-colum { + display: flex; + flex-direction: column; + } + + .align-items { + align-items: center; + } + + .search-input{ + padding-top: 0 !important; + padding-bottom: 0 !important; + } - .drift-detection { - justify-content: space-between; - align-items: center; - } + .search-input input{ + border: none; + background: none; + outline: none; + height: 40px; + width: 100%; + font-size: 14px; + } + + .search-input img{ + height: 19px; + pointer-events: none; + } + + input::placeholder { + color: var(--secondaryLight1000); + } + + .bar.positive { + fill: #369BACB2; + } + + .bar.negative { + fill: #2683C9E5; + } + + .text-align-center { + text-align: center; + } + + .drift-detection { + justify-content: space-between; + align-items: center; + } .drift-detection-info-circle { width: 15px; @@ -457,16 +457,15 @@ border-radius: 1.25em; } + .justify-content-center { + justify-content: center; + } + + .wl-table-cell__graph-wrap { + width: 0; + } - .justify-content-center { - justify-content: center; - } - - .wl-table-cell__graph-wrap { - width: 0; - } - - .svg-container { + .svg-container { display: inline-block; position: relative; width: 85%; @@ -474,6 +473,7 @@ vertical-align: top; overflow: hidden; } + .svg-content-responsive { display: inline-block; position: absolute; @@ -484,14 +484,14 @@ min-width: 250px; } - @media screen and (min-width: 500px) { - .desktop-content { - display: block; - } - .no-responsive { - display: none; - } - } + @media screen and (min-width: 500px) { + .desktop-content { + display: block; + } + .no-responsive { + display: none; + } + } @@ -504,113 +504,113 @@
-
-
-
-
-
-

- Drift detected in 65 of 131 features -

-
-
-
-
-
-

{{severeDrift}}

-

Severe drift

-
-
-
-
-
-

{{moderateDrift}}

-

Moderate drift

-
-
-
-
-
-

{{mildDrift}}

-

Mild drift

-
-
-
-
-
-

{{minimalDrift}}

-

Minimal drift

-
-
-
-
-
-
- - -
-
-
- -
-
- -
-
-
-
-
- +
+
+
+
+
+

+ Drift detected in 65 of 131 features +

+
+
+
+
+
+

{{severeDrift}}

+

Severe drift

+
+
+
+
+
+

{{moderateDrift}}

+

Moderate drift

+
+
+
+
+
+

{{mildDrift}}

+

Mild drift

+
+
+
+
+
+

{{minimalDrift}}

+

Minimal drift

+
+
+
+
+
+
+ + +
+
+
+ +
+
+ +
+
+
+
+
+
@@ -894,6 +894,39 @@

Hold on! :)

Handlebars.registerHelper("getGraphHtml",(column,key) => graph(column, key, null)); Handlebars.registerHelper("getReferenceGraphHtml",(column,key) => graph(column, key, profileFromCSVfile)); + + Handlebars.registerHelper("getDiscreteTypeCount", function () { + let count = 0; + + Object.entries(this.columns).forEach((feature) => { + if (feature[1].numberSummary && feature[1].numberSummary.isDiscrete === true) { + count++; + } + }); + return count.toString(); + }); + + Handlebars.registerHelper("getNonDiscreteTypeCount", function () { + let count = 0; + + Object.entries(this.columns).forEach((feature) => { + if (feature[1].numberSummary && feature[1].numberSummary.isDiscrete === false) { + count++; + } + }); + return count; + }); + + Handlebars.registerHelper("getUnknownTypeCount", function () { + let count = 0; + + Object.entries(this.columns).forEach((feature) => { + if (!feature[1].numberSummary) { + count++; + } + }); + return count; + }); } function initHandlebarsTemplate() { @@ -907,8 +940,6 @@

Hold on! :)

target.innerHTML = html; } - - function initWebsiteScripts() { const $featureSearch = document.getElementById("wl__feature-search"); const $tableBody = document.getElementById("table-body"); diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html index 8606fe763f..946e9b2241 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html @@ -125,7 +125,6 @@ color: #4F595B; } - @media screen and (min-width: 500px) { .desktop-content { display: block; @@ -134,7 +133,6 @@ display: none; } } - @@ -347,7 +345,7 @@

Hold on! :)

// Config handlebars and pass data to HBS template const source = document.getElementById("entry-template").innerHTML; const template = Handlebars.compile(source); - const html = template(context.columns); + const html = template(context); const target = document.getElementById("generated-html"); target.innerHTML = html; } diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html index fb8973ccad..04cdb665a7 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html @@ -160,16 +160,14 @@ padding-right: 30px; } - @media screen and (min-width: 500px) { - .desktop-content { - display: block; - } - .no-responsive { - display: none; - } - } - - + @media screen and (min-width: 500px) { + .desktop-content { + display: block; + } + .no-responsive { + display: none; + } + } @@ -180,143 +178,143 @@
-
- {{featureName this}}: Summary Statistics {{getProfileName this}}_{{getProfileTimeStamp this}} +
+ {{featureName this}}: Summary Statistics {{getProfileName this}}_{{getProfileTimeStamp this}} +
+
+
+
+
Distinct (%)
+
{{distinct}}
+
+
+
Missing
+
{{missing}}
+
+
+
Infinite
+
{{infinite}}
+
+
+
Mean
+
{{mean this}}
+
+
+
Minimum
+
{{minimum this}}
+
+
+
Maximum
+
{{maximum this}}
+
+
+
Zeros
+
{{zeros}}
+
+
+
Negative
+
{{negative}}
+
+
+
Memory size
+
{{memorySize}}
+
-
-
-
-
Distinct (%)
-
{{distinct}}
-
-
-
Missing
-
{{missing}}
-
-
-
Infinite
-
{{infinite}}
-
-
-
Mean
-
{{mean this}}
-
-
-
Minimum
-
{{minimum this}}
-
-
-
Maximum
-
{{maximum this}}
+
+
+
+
+
+
+ Quantile statistics
-
-
Zeros
-
{{zeros}}
-
-
-
Negative
-
{{negative}}
-
-
-
Memory size
-
{{memorySize}}
-
-
-
-
-
-
-
- Quantile statistics +
+
Minimum
+
{{minimum this}}
+
+
+
5-th percentile
+
45.5
+
+
+
Q1
+
223.5
+
+
+
median
+
{{median this}}
+
+
+
Q3
+
668.5
+
+
+
95-th percentile
+
846.5
+
+
+
Maximum
+
{{maximum this}}
+
+
+
Range
+
890
-
-
-
Minimum
-
{{minimum this}}
-
-
-
5-th percentile
-
45.5
-
-
-
Q1
-
223.5
-
-
-
median
-
{{median this}}
-
-
-
Q3
-
668.5
-
-
-
95-th percentile
-
846.5
-
-
-
Maximum
-
{{maximum this}}
-
-
-
Range
-
890
-
-
-
Interquartile range (IQR)
-
445
-
+
+
Interquartile range (IQR)
+
445
-
+
+
+
+
+ Descriptive statistics +
-
- Descriptive statistics +
+
Standard deviation
+
247.353842
+
+
+
Coefficient of variation (CV)
+
0.5770265516
+
+
+
Kurtosis
+
-1.2
+
+
+
Mean
+
446
+
+
+
Median Absolute Deviation (MAD)
+
223
-
-
-
Standard deviation
-
247.353842
-
-
-
Coefficient of variation (CV)
-
0.5770265516
-
-
-
Kurtosis
-
-1.2
-
-
-
Mean
-
446
-
-
-
Median Absolute Deviation (MAD)
-
223
-
-
-
Skewness
-
0
-
-
-
Sum
-
397386
-
-
-
Variance
-
66231
-
-
-
Monotonicity
-
Strictly increasing
-
+
+
Skewness
+
0
+
+
+
Sum
+
397386
+
+
+
Variance
+
66231
+
+
+
Monotonicity
+
Strictly increasing
+
@@ -447,13 +445,8 @@

Hold on! :)

target.innerHTML = html; } - function initWebsiteScripts() { - // Target HTML elements - } - // Invoke functions -- keep in mind invokation order registerHandlebarHelperFunctions(); initHandlebarsTemplate(); - initWebsiteScripts(); \ No newline at end of file From 4eb42298875fe5cf2575f2b8277532eda8bd139e Mon Sep 17 00:00:00 2001 From: ssxcho Date: Wed, 2 Feb 2022 22:36:07 +0400 Subject: [PATCH 059/119] add ability for user to choose html template height, code cleaning --- src/whylogs/viz/jupyter_notebook_viz.py | 136 ++++++++++++++---------- 1 file changed, 82 insertions(+), 54 deletions(-) diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py index 3d8ff288ab..2efb1d2212 100644 --- a/src/whylogs/viz/jupyter_notebook_viz.py +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -39,21 +39,21 @@ def __init__(self, profiles: List[DatasetProfile] = None, reference_profiles: Li "Got no profile data, make sure you pass data correctly ") return None - def __display_html(self, output_index, height): + def __display_html(self, template, height): # convert html to iframe and return it wrapped in Ipython...HTML() - iframe = f'''''' + iframe = f'''''' return HTML(iframe) - def __iframe_output_height(self, html_frame_height): + def __get_iframe_output_height(self, html_frame_height): # add all required heights and widths for individual HTMLs to be displayed in notebook - sizes = {'index-hbs-cdn-all-in-for-jupyter-notebook.html': 'height=1000px', - 'index-hbs-cdn-all-in-jupyter-distribution-chart.html': 'height=277px', - 'index-hbs-cdn-all-in-jupyter-full-summary-statistics.html': 'height=250px', - 'index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html': 'height=650px', + sizes = {'index-hbs-cdn-all-in-for-jupyter-notebook.html': '1000px', + 'index-hbs-cdn-all-in-jupyter-distribution-chart.html': '277px', + 'index-hbs-cdn-all-in-jupyter-full-summary-statistics.html': '250px', + 'index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html': '650px', } return str(sizes.get(html_frame_height)) - def __html_template_data(self, index_path): + def __compile_html_template(self, template_path): # bind profile jsons to html template try: from pybars import Compiler @@ -63,7 +63,7 @@ def __html_template_data(self, index_path): logger.debug( "Unable to load pybars; install pybars3 to load profile from directly from the current session " ) - with open(index_path, "r") as file_with_template: + with open(template_path, "r") as file_with_template: source = file_with_template.read() # compile templated files compiler = Compiler() @@ -77,29 +77,44 @@ def __extract_feature_data(self, profile_jsons, feature_name): feature_data[feature_name] = profile_features.get('columns').get(feature_name) return feature_data - def summary(self): - index_path = os.path.abspath(os.path.join( - _MY_DIR, os.pardir, "viewer", - "index-hbs-cdn-all-in-for-jupyter-notebook.html") + def __create_template_path(self, html_file_name): + template_path = os.path.abspath( + os.path.join( + _MY_DIR, os.pardir, "viewer", html_file_name + ) + ) + return template_path + + def summary(self, frame_height=None): + if frame_height: + html_frame_height = frame_height + else: + html_frame_height = self.__get_iframe_output_height( + "index-hbs-cdn-all-in-for-jupyter-notebook.html" + ) + template = self.__compile_html_template( + self.__create_template_path( + "index-hbs-cdn-all-in-for-jupyter-notebook.html" + ) ) - html_frame_height = self.__iframe_output_height( - "index-hbs-cdn-all-in-for-jupyter-notebook.html") - template = self.__html_template_data(index_path) if self.reference_profiles: - output_index = template( + profiles_summary = template( {"profile_from_whylogs": self.profile_jsons[0], "reference_profile": self.reference_profile_jsons[0]} ) + return self.__display_html(profiles_summary, html_frame_height) else: - output_index = template( + target_profile_summary = template( {"profile_from_whylogs": self.profile_jsons[0]} ) - return self.__display_html(output_index, html_frame_height) + return self.__display_html(target_profile_summary, html_frame_height) def download(self, html, path=None, html_file_name=None): # code to write html arg to file and generate name using TimeStamp if path: - output_path = os.path.abspath(os.path.expanduser(path)) + output_path = os.path.abspath( + os.path.expanduser(path) + ) else: output_path = os.path.abspath( os.path.join( @@ -119,16 +134,18 @@ def download(self, html, path=None, html_file_name=None): saved_html.write(html.data) saved_html.close() - def feature(self, names): - index_path = os.path.abspath(os.path.join( - _MY_DIR, os.pardir, "viewer", - "index-hbs-cdn-all-in-jupyter-distribution-chart.html") - ) - - html_frame_height = self.__iframe_output_height( - "index-hbs-cdn-all-in-jupyter-distribution-chart.html" + def feature(self, names, frame_height=None): + if frame_height: + html_frame_height = frame_height + else: + html_frame_height = self.__get_iframe_output_height( + "index-hbs-cdn-all-in-jupyter-distribution-chart.html" + ) + template = self.__compile_html_template( + self.__create_template_path( + "index-hbs-cdn-all-in-jupyter-distribution-chart.html" + ) ) - template = self.__html_template_data(index_path) # replace handlebars for json profiles if self.reference_profiles: profile_feature = json.loads(self.profile_jsons[0]) @@ -138,52 +155,59 @@ def feature(self, names): for name in names: profile_from_whylogs[name] = profile_feature.get('columns').get(name) reference_profile[name] = reference_profile_feature.get('columns').get(name) - output_index = template( + distribution_chart = template( {"profile_from_whylogs": json.dumps(profile_from_whylogs), "reference_profile": json.dumps(reference_profile)} ) - return self.__display_html(output_index, html_frame_height) + return self.__display_html(distribution_chart, html_frame_height) else: logger.warning( "This method has to get both target and reference profiles, with valid feature title" ) return None - def summary_statistics(self, profile): - index_path = os.path.abspath(os.path.join( - _MY_DIR, os.pardir, "viewer", - "index-hbs-cdn-all-in-jupyter-full-summary-statistics.html") + def summary_statistics(self, profile, frame_height=None): + if frame_height: + html_frame_height = frame_height + else: + html_frame_height = self.__get_iframe_output_height( + "index-hbs-cdn-all-in-jupyter-full-summary-statistics.html" + ) + template = self.__compile_html_template( + self.__create_template_path( + "index-hbs-cdn-all-in-jupyter-full-summary-statistics.html" + ) ) - html_frame_height = self.__iframe_output_height( - "index-hbs-cdn-all-in-jupyter-full-summary-statistics.html") - template = self.__html_template_data(index_path) if self.reference_profiles and profile == 'Reference': - output_index = template( + reference_summary_statistics = template( {"reference_profile": self.reference_profile_jsons[0]} ) + return self.__display_html(reference_summary_statistics, html_frame_height) elif profile == 'Target': - output_index = template( + target_profile_statistics = template( {"profile_from_whylogs": self.profile_jsons[0]} ) + return self.__display_html(target_profile_statistics, html_frame_height) else: logger.warning( "Please select from available options, 'Target' or 'Reference'" ) - return self.__display_html(output_index, html_frame_height) - - def feature_summary_statistics(self, feature_name, profile): - index_path = os.path.abspath(os.path.join( - _MY_DIR, os.pardir, "viewer", - "index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html") - ) - html_frame_height = self.__iframe_output_height( - "index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html" + def feature_summary_statistics(self, feature_name, profile, frame_height=None): + if frame_height: + html_frame_height = frame_height + else: + html_frame_height = self.__get_iframe_output_height( + "index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html" + ) + template = self.__compile_html_template( + self.__create_template_path( + "index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html" + ) ) - template = self.__html_template_data(index_path) # replace handlebars for json profiles if self.reference_profiles and profile == 'Reference': - output_index = template( + reference_feature_summary_statistics = template( { "reference_profile": json.dumps( __extract_feature_data( @@ -192,9 +216,11 @@ def feature_summary_statistics(self, feature_name, profile): ) } ) - return self.__display_html(output_index, html_frame_height) + return self.__display_html( + reference_feature_summary_statistics, html_frame_height + ) elif self.profiles and profile == 'Target': - output_index = template( + target_feature_summary_statistics = template( { "profile_from_whylogs": json.dumps( __extract_feature_data( @@ -203,7 +229,9 @@ def feature_summary_statistics(self, feature_name, profile): ) } ) - return self.__display_html(output_index, html_frame_height) + return self.__display_html( + target_feature_summary_statistics, html_frame_height + ) else: logger.warning( "Make sure you have profile logged in and pass a valid feature name" From 1ac37449cecfd995bb1825322932398f346dc995 Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Sun, 6 Feb 2022 06:58:12 +0400 Subject: [PATCH 060/119] code clearing, better naming --- ...x-hbs-cdn-all-in-for-jupyter-notebook.html | 32 +- ...cdn-all-in-jupyter-distribution-chart.html | 14 +- ...in-jupyter-feature-summary-statistics.html | 22 +- ...ll-in-jupyter-full-summary-statistics.html | 34 +-- src/whylogs/viz/jupyter_notebook_viz.py | 281 +++++++----------- 5 files changed, 162 insertions(+), 221 deletions(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html index 110b92b5ad..fbd9762e06 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html @@ -281,21 +281,21 @@ .display-flex{ display: flex; } - + .table-border-none { padding: 0; border: none; } - + .flex-direction-colum { display: flex; flex-direction: column; } - + .align-items { align-items: center; } - + .search-input{ padding-top: 0 !important; padding-bottom: 0 !important; @@ -309,28 +309,28 @@ width: 100%; font-size: 14px; } - + .search-input img{ height: 19px; pointer-events: none; } - + input::placeholder { color: var(--secondaryLight1000); } - + .bar.positive { fill: #369BACB2; } - + .bar.negative { fill: #2683C9E5; } - + .text-align-center { text-align: center; } - + .drift-detection { justify-content: space-between; align-items: center; @@ -405,7 +405,7 @@ margin-right: 5px; } - .drift-detection-search-input input::placeholder { + .drift-detection-search-input input::placeholder { font-family: Arial; font-weight: normal; font-size: 13px; @@ -460,7 +460,7 @@ .justify-content-center { justify-content: center; } - + .wl-table-cell__graph-wrap { width: 0; } @@ -551,7 +551,7 @@
- +
@@ -818,7 +818,7 @@

Hold on! :)

const randomNumbers = (range) => Math.floor(Math.random() * range) - const profileFromCSVfile = {{{reference_profile}}} + const profileFromCSVfile = {{{reference_profile_from_whylogs}}} Handlebars.registerHelper("severeDrift", function () { return randomNumbers(50); @@ -953,7 +953,7 @@

Hold on! :)

unknown: true, }; - const getReferenceProfile = () => { return {{{reference_profile}}} } + const getReferenceProfile = () => { return {{{reference_profile_from_whylogs}}} } let searchString = ""; @@ -1058,7 +1058,7 @@

Hold on! :)

} } - function openFilter() { + function openFilter() { const $filterOptions = $(".dropdown-container"); const filterClass = $filterOptions.attr("class"); diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html index 946e9b2241..d82461b52b 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html @@ -27,10 +27,10 @@ :root { /** Branded colors */ - --brandSecondary900: #4f595b; + --brandSecondary900: #4f595b; --secondaryLight1000: #313b3d; /** Purpose colors */ - --tealBackground: #eaf2f3; + --tealBackground: #eaf2f3; } /* RESET STYLE */ @@ -41,7 +41,7 @@ padding: 0; box-sizing: border-box; } - + /* Screen on smaller screens */ .no-responsive { display: none; @@ -142,7 +142,7 @@
{{#each this}}
-
+

{{@key}}

@@ -247,7 +247,7 @@

Hold on! :)

histogramData = chartData(data) overlappedHistogramData = chartData(referenceData) } - + let yFormat, xFormat; const sizes = new GenerateChartParams($(window).height()-32, $(window).width(), histogramData) @@ -322,13 +322,13 @@

Hold on! :)

return svgEl._groups[0][0].outerHTML; } - const profileFromCSVfile = {{{reference_profile}}} + const profileFromCSVfile = {{{reference_profile_from_whylogs}}} Handlebars.registerHelper("getDoubleHistogramChart",(column,key) => { const columnKey = key.data.key if (profileFromCSVfile) { return generateDoubleHistogramChart ( - column, + column, profileFromCSVfile[columnKey] ) } diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html index 04cdb665a7..d06b807416 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html @@ -43,7 +43,7 @@ @media screen and (min-width: 1000px) { .desktop-content { display: block; - } + } .no-responsive { display: none; } @@ -163,7 +163,7 @@ @media screen and (min-width: 500px) { .desktop-content { display: block; - } + } .no-responsive { display: none; } @@ -349,19 +349,19 @@

Hold on! :)

} const randomNumbers = (range) => Math.floor(Math.random() * range) - + Handlebars.registerHelper("distinct", function () { return randomNumbers(50); }); - + Handlebars.registerHelper("missing", function () { return randomNumbers(50); }); - + Handlebars.registerHelper("infinite", function () { return randomNumbers(50); }); - + Handlebars.registerHelper("mean", function (column) { const feture = Object.values(column)[1] if (feture.numberSummary) { @@ -405,7 +405,7 @@

Hold on! :)

} return "-"; }); - + Handlebars.registerHelper("zeros", function () { return randomNumbers(50); }); @@ -434,9 +434,9 @@

Hold on! :)

function initHandlebarsTemplate() { // Replace this context with JSON from .py file - const targetedProfile = () => { return {{{profile_from_whylogs}}} } - const profileFromCSVfile = () => { return {{{reference_profile}}} } - const context = profileFromCSVfile() ? profileFromCSVfile() : targetedProfile(); + // const targetedProfile = () => { return {{{profile_from_whylogs}}} } + // const profileFromCSVfile = () => { return {{{profile_feature_summary_statistics_from_whylogs}}} } + const context = {{{profile_feature_summary_statistics_from_whylogs}}}; // Config handlebars and pass data to HBS template const source = document.getElementById("entry-template").innerHTML; const template = Handlebars.compile(source); @@ -449,4 +449,4 @@

Hold on! :)

registerHandlebarHelperFunctions(); initHandlebarsTemplate(); - \ No newline at end of file + diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-full-summary-statistics.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-full-summary-statistics.html index f2d750ecd6..fcc31b8090 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-full-summary-statistics.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-full-summary-statistics.html @@ -43,7 +43,7 @@ @media screen and (min-width: 1000px) { .desktop-content { display: block; - } + } .no-responsive { display: none; } @@ -107,7 +107,7 @@ position: relative; display: inline-block; } - + .tooltip-full-number .tooltiptext { visibility: hidden; background-color: #0E7384; @@ -126,7 +126,7 @@ font-weight: normal; line-height: 100%; } - + .tooltip-full-number:hover .tooltiptext { visibility: visible; opacity: 1; @@ -155,7 +155,7 @@ @media screen and (min-width: 500px) { .desktop-content { display: block; - } + } .no-responsive { display: none; } @@ -181,20 +181,20 @@
Missing Cells
- {{{missingCells}}} + {{{missingCells}}}
(8.1%)
Duplicate Rows
- {{{duplicateRows}}} + {{{duplicateRows}}}
(0%)
Total Size
- {{{totalSize}}} + {{{totalSize}}}
Average record size
@@ -253,18 +253,18 @@

Hold on! :)

let newValue, suffixe = "" if (bytes === 0) return '0 Bytes'; - + const k = 1024; const dm = decimals < 0 ? 0 : decimals; const sizes = ['Bytes', 'KiB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']; - + const i = Math.floor(Math.log(bytes) / Math.log(k)); newValue = parseFloat((bytes / Math.pow(k, i)).toFixed(dm)); suffixe = sizes[i] return {bytes, newValue, suffixe}; } - const numberWithSuffixe = (number, newNumber, suffixe) => + const numberWithSuffixe = (number, newNumber, suffixe) => `
${newNumber}
${suffixe}
@@ -273,22 +273,22 @@

Hold on! :)

` const randomNumbers = (range) => Math.floor(Math.random() * range) - + Handlebars.registerHelper("observations", function () { const {value, newValue, suffixe} = abbreviateNumber(randomNumbers(900000000)) return numberWithSuffixe(value, newValue, suffixe); }); - + Handlebars.registerHelper("missingCells", function () { const {value, newValue, suffixe} = abbreviateNumber(randomNumbers(90000)) return numberWithSuffixe(value, newValue, suffixe); }); - + Handlebars.registerHelper("duplicateRows", function () { const {value, newValue, suffixe} = abbreviateNumber(randomNumbers(90000)) return numberWithSuffixe(value, newValue, suffixe); }); - + Handlebars.registerHelper("averageRecordSize", function () { const {bytes, newValue, suffixe} = formatBytes(randomNumbers(900000)) return numberWithSuffixe(bytes, newValue, suffixe); @@ -310,9 +310,9 @@

Hold on! :)

function initHandlebarsTemplate() { // Replace this context with JSON from .py file - const targetedProfile = () => { return {{{profile_from_whylogs}}} } - const profileFromCSVfile = () => { return {{{reference_profile}}} } - const context = profileFromCSVfile() ? profileFromCSVfile() : targetedProfile(); + // const targetedProfile = () => { return } + // const profileFromCSVfile = () => { return {{{reference_profile}}} } + const context = {{{profile_summary_statistics_from_whylogs}}} // Config handlebars and pass data to HBS template const source = document.getElementById("entry-template").innerHTML; const template = Handlebars.compile(source); diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py index 2efb1d2212..56d67a6d83 100644 --- a/src/whylogs/viz/jupyter_notebook_viz.py +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -1,7 +1,5 @@ import logging import os -import tempfile -import webbrowser import sys import json import html @@ -18,43 +16,48 @@ class DisplayProfile: - - def __init__(self, profiles: List[DatasetProfile] = None, reference_profiles: List[DatasetProfile] = None): - self.profiles = profiles + SUMMARY_REPORT_TEMPLATE_NAME = 'index-hbs-cdn-all-in-for-jupyter-notebook.html' + SUMMARY_STATISTICS_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-full-summary-statistics.html' + DOUBLE_HISTOGRAM_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-distribution-chart.html' + FEATURE_STATISTICS_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html' + PAGE_SIZES = { + SUMMARY_REPORT_TEMPLATE_NAME: '1000px', + DOUBLE_HISTOGRAM_TEMPLATE_NAME: '277px', + SUMMARY_STATISTICS_TEMPLATE_NAME: '250px', + FEATURE_STATISTICS_TEMPLATE_NAME: '650px', + } + + def __init__(self, target_profiles: List[DatasetProfile] = None, reference_profiles: List[DatasetProfile] = None): + self.target_profiles = target_profiles self.reference_profiles = reference_profiles # create json output from profiles - if self.profiles: - if len(self.profiles) > 1: + if self.target_profiles: + if len(self.target_profiles) > 1: logger.warning( "More than one profile not implemented yet, default to first profile in the list " ) - self.profile_jsons = [message_to_json(each_prof.to_summary()) - for each_prof in self.profiles] + self.target_profile_jsons = [ + message_to_json(each_prof.to_summary()) for each_prof in self.target_profiles + ] if self.reference_profiles: - self.reference_profile_jsons = [message_to_json(each_prof.to_summary()) - for each_prof in self.reference_profiles] - + self.reference_profile_jsons = [ + message_to_json(each_prof.to_summary()) for each_prof in self.reference_profiles + ] else: logger.warning( "Got no profile data, make sure you pass data correctly ") return None - def __display_html(self, template, height): - # convert html to iframe and return it wrapped in Ipython...HTML() - iframe = f'''''' - return HTML(iframe) + def __get_template_path(self, html_file_name): + template_path = os.path.abspath( + os.path.join( + _MY_DIR, os.pardir, "viewer", html_file_name + ) + ) + return template_path - def __get_iframe_output_height(self, html_frame_height): - # add all required heights and widths for individual HTMLs to be displayed in notebook - sizes = {'index-hbs-cdn-all-in-for-jupyter-notebook.html': '1000px', - 'index-hbs-cdn-all-in-jupyter-distribution-chart.html': '277px', - 'index-hbs-cdn-all-in-jupyter-full-summary-statistics.html': '250px', - 'index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html': '650px', - } - return str(sizes.get(html_frame_height)) - - def __compile_html_template(self, template_path): - # bind profile jsons to html template + def __get_compiled_template(self, template_name): + template_path = self.__get_template_path(template_name) try: from pybars import Compiler except ImportError as e: @@ -70,170 +73,108 @@ def __compile_html_template(self, template_path): template = compiler.compile(source) return template - def __extract_feature_data(self, profile_jsons, feature_name): + def __pull_feature_data(self, profile_jsons, feature_name): profile_features = json.loads(profile_jsons[0]) feature_data = {} feature_data['properties'] = profile_features.get('properties') feature_data[feature_name] = profile_features.get('columns').get(feature_name) return feature_data - def __create_template_path(self, html_file_name): - template_path = os.path.abspath( - os.path.join( - _MY_DIR, os.pardir, "viewer", html_file_name - ) - ) - return template_path + # rename + def __display_rendered_template(self, template, template_name, height): + if not height: + height = self.PAGE_SIZES[template_name] + # convert html to iframe and return it wrapped in Ipython...HTML() + iframe = f'''''' + return HTML(iframe) def summary(self, frame_height=None): - if frame_height: - html_frame_height = frame_height - else: - html_frame_height = self.__get_iframe_output_height( - "index-hbs-cdn-all-in-for-jupyter-notebook.html" - ) - template = self.__compile_html_template( - self.__create_template_path( - "index-hbs-cdn-all-in-for-jupyter-notebook.html" - ) - ) + template = self.__get_compiled_template(self.SUMMARY_REPORT_TEMPLATE_NAME) + profiles_summary = {"profile_from_whylogs": self.target_profile_jsons[0]} if self.reference_profiles: - profiles_summary = template( - {"profile_from_whylogs": self.profile_jsons[0], - "reference_profile": self.reference_profile_jsons[0]} - ) - return self.__display_html(profiles_summary, html_frame_height) - else: - target_profile_summary = template( - {"profile_from_whylogs": self.profile_jsons[0]} - ) - return self.__display_html(target_profile_summary, html_frame_height) - - def download(self, html, path=None, html_file_name=None): - # code to write html arg to file and generate name using TimeStamp - if path: - output_path = os.path.abspath( - os.path.expanduser(path) - ) - else: - output_path = os.path.abspath( - os.path.join( - os.pardir, "html_reports" - ) - ) - data_timestamp = '' - if html_file_name: - file_name = html_file_name - elif self.reference_profiles: - data_timestamp = self.reference_profiles[0].dataset_timestamp - else: - data_timestamp = self.profiles[0].dataset_timestamp - - full_path = os.path.join(output_path, str(data_timestamp)+".html") - with open(full_path, "w") as saved_html: - saved_html.write(html.data) - saved_html.close() - - def feature(self, names, frame_height=None): - if frame_height: - html_frame_height = frame_height - else: - html_frame_height = self.__get_iframe_output_height( - "index-hbs-cdn-all-in-jupyter-distribution-chart.html" - ) - template = self.__compile_html_template( - self.__create_template_path( - "index-hbs-cdn-all-in-jupyter-distribution-chart.html" - ) + profiles_summary["reference_profile_from_whylogs"] = self.reference_profile_jsons[0] + return self.__display_rendered_template( + template(profiles_summary), + self.SUMMARY_REPORT_TEMPLATE_NAME, + frame_height ) - # replace handlebars for json profiles + + def feature(self, feature_names, frame_height=None): + if type(feature_names) is not list: + feature_names = [feature_names] + template = self.__get_compiled_template(self.DOUBLE_HISTOGRAM_TEMPLATE_NAME) if self.reference_profiles: - profile_feature = json.loads(self.profile_jsons[0]) - reference_profile_feature = json.loads(self.reference_profile_jsons[0]) - profile_from_whylogs = {} - reference_profile = {} - for name in names: - profile_from_whylogs[name] = profile_feature.get('columns').get(name) - reference_profile[name] = reference_profile_feature.get('columns').get(name) - distribution_chart = template( - {"profile_from_whylogs": json.dumps(profile_from_whylogs), - "reference_profile": json.dumps(reference_profile)} + target_profile_columns = json.loads(self.target_profile_jsons[0]).get('columns') + reference_profile_columns = json.loads(self.reference_profile_jsons[0]).get('columns') + target_profile_features, reference_profile_features = {}, {} + for feature_name in feature_names: + target_profile_features[feature_name] = target_profile_columns.get(feature_name) + reference_profile_features[feature_name] = reference_profile_columns.get( + feature_name + ) + distribution_chart = template({ + "profile_from_whylogs": json.dumps(target_profile_features), + "reference_profile_from_whylogs": json.dumps(reference_profile_features) + }) + return self.__display_rendered_template( + distribution_chart, + self.DOUBLE_HISTOGRAM_TEMPLATE_NAME, + frame_height ) - return self.__display_html(distribution_chart, html_frame_height) else: logger.warning( "This method has to get both target and reference profiles, with valid feature title" ) return None - def summary_statistics(self, profile, frame_height=None): - if frame_height: - html_frame_height = frame_height + def summary_statistics(self, profile_name=None, frame_height=None): + template = self.__get_compiled_template(self.SUMMARY_STATISTICS_TEMPLATE_NAME) + if self.reference_profiles: + if not profile_name or profile_name.lower() == 'reference': + profile_statistics = self.reference_profile_jsons[0] else: - html_frame_height = self.__get_iframe_output_height( - "index-hbs-cdn-all-in-jupyter-full-summary-statistics.html" - ) - template = self.__compile_html_template( - self.__create_template_path( - "index-hbs-cdn-all-in-jupyter-full-summary-statistics.html" - ) + profile_statistics = self.target_profile_jsons[0] + rendered_template = template({ + "profile_summary_statistics_from_whylogs": profile_statistics + }) + return self.__display_rendered_template( + rendered_template, + self.SUMMARY_STATISTICS_TEMPLATE_NAME, + frame_height ) - if self.reference_profiles and profile == 'Reference': - reference_summary_statistics = template( - {"reference_profile": self.reference_profile_jsons[0]} - ) - return self.__display_html(reference_summary_statistics, html_frame_height) - elif profile == 'Target': - target_profile_statistics = template( - {"profile_from_whylogs": self.profile_jsons[0]} - ) - return self.__display_html(target_profile_statistics, html_frame_height) - else: - logger.warning( - "Please select from available options, 'Target' or 'Reference'" - ) - def feature_summary_statistics(self, feature_name, profile, frame_height=None): - if frame_height: - html_frame_height = frame_height + def feature_summary_statistics(self, feature_name, profile_name, frame_height=None): + template = self.__get_compiled_template(self.FEATURE_STATISTICS_TEMPLATE_NAME) + if self.reference_profiles and (not profile_name or profile_name.lower() == 'reference'): + selected_profile = self.reference_profile_jsons else: - html_frame_height = self.__get_iframe_output_height( - "index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html" - ) - template = self.__compile_html_template( - self.__create_template_path( - "index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html" - ) + selected_profile = self.target_profile_jsons + + rendered_template = template({ + "profile_feature_summary_statistics_from_whylogs": json.dumps( + self.__pull_feature_data(selected_profile, feature_name) + )} ) - # replace handlebars for json profiles - if self.reference_profiles and profile == 'Reference': - reference_feature_summary_statistics = template( - { - "reference_profile": json.dumps( - __extract_feature_data( - self, self.reference_profile_jsons, feature_name - ) - ) - } - ) - return self.__display_html( - reference_feature_summary_statistics, html_frame_height - ) - elif self.profiles and profile == 'Target': - target_feature_summary_statistics = template( - { - "profile_from_whylogs": json.dumps( - __extract_feature_data( - self, self.profile_jsons, feature_name - ) - ) - } - ) - return self.__display_html( - target_feature_summary_statistics, html_frame_height - ) + return self.__display_rendered_template( + rendered_template, + self.FEATURE_STATISTICS_TEMPLATE_NAME, + frame_height + ) + + def download(self, html, prefered_path=None, html_file_name=None): + if not html_file_name: + if self.reference_profiles: + html_file_name = self.reference_profiles[0].dataset_timestamp + else: + html_file_name = self.target_profiles[0].dataset_timestamp + + if prefered_path: + path = os.path.expanduser(prefered_path) else: - logger.warning( - "Make sure you have profile logged in and pass a valid feature name" - ) - return None + path = os.path.join(os.pardir, "html_reports", str(html_file_name)+".html") + + full_path = os.path.abspath(path) + with open(full_path, "w") as saved_html: + saved_html.write(html.data) + saved_html.close() + return None From 30c9f6b5a2bebadbdf02ba200f25fee3a54ccaef Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Sun, 6 Feb 2022 07:10:19 +0400 Subject: [PATCH 061/119] remove comment whitespaces --- src/whylogs/viz/jupyter_notebook_viz.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py index 56d67a6d83..90f0f53ce9 100644 --- a/src/whylogs/viz/jupyter_notebook_viz.py +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -80,7 +80,6 @@ def __pull_feature_data(self, profile_jsons, feature_name): feature_data[feature_name] = profile_features.get('columns').get(feature_name) return feature_data - # rename def __display_rendered_template(self, template, template_name, height): if not height: height = self.PAGE_SIZES[template_name] @@ -149,7 +148,6 @@ def feature_summary_statistics(self, feature_name, profile_name, frame_height=No selected_profile = self.reference_profile_jsons else: selected_profile = self.target_profile_jsons - rendered_template = template({ "profile_feature_summary_statistics_from_whylogs": json.dumps( self.__pull_feature_data(selected_profile, feature_name) @@ -167,12 +165,10 @@ def download(self, html, prefered_path=None, html_file_name=None): html_file_name = self.reference_profiles[0].dataset_timestamp else: html_file_name = self.target_profiles[0].dataset_timestamp - if prefered_path: path = os.path.expanduser(prefered_path) else: path = os.path.join(os.pardir, "html_reports", str(html_file_name)+".html") - full_path = os.path.abspath(path) with open(full_path, "w") as saved_html: saved_html.write(html.data) From 1ae6c0838414554763fda2e50e7d72dc9683971a Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Mon, 7 Feb 2022 09:16:09 +0400 Subject: [PATCH 062/119] add documentation in notebook, naming fixes --- examples/Profile_Viewer_In_Notebook.ipynb | 214 +++++++++++++++++----- src/whylogs/viz/__init__.py | 4 +- src/whylogs/viz/jupyter_notebook_viz.py | 31 ++-- 3 files changed, 185 insertions(+), 64 deletions(-) diff --git a/examples/Profile_Viewer_In_Notebook.ipynb b/examples/Profile_Viewer_In_Notebook.ipynb index 7e69ca7a47..35798b9b21 100644 --- a/examples/Profile_Viewer_In_Notebook.ipynb +++ b/examples/Profile_Viewer_In_Notebook.ipynb @@ -5,7 +5,26 @@ "id": "6b36ea8e", "metadata": {}, "source": [ - "# Profiler Viewer In Notebook" + "# 📈 Whylogs Profile Vizualization\n", + "### gives you various ways to simplify the proccess of detecting dataset drift. Bellow you can see the instructions for creating dummy dataset and list of currently available visual reports." + ] + }, + { + "cell_type": "markdown", + "id": "be56fca7", + "metadata": {}, + "source": [ + "### 🗂️Install dependencies and make imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29e48da5", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install faker" ] }, { @@ -16,7 +35,9 @@ "outputs": [], "source": [ "import numpy as np\n", - "import datetime" + "import datetime\n", + "from collections import OrderedDict\n", + "from faker import Faker" ] }, { @@ -27,8 +48,7 @@ "outputs": [], "source": [ "from whylogs import get_or_create_session\n", - "from whylogs.core import DatasetProfile\n", - "from whylogs.viz import DisplayProfile\n" + "from whylogs.viz import NotebookProfileViewer" ] }, { @@ -36,17 +56,7 @@ "id": "272f6054", "metadata": {}, "source": [ - "## Data for Logging" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "29e48da5", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install faker" + "### ♻️Create dummy data" ] }, { @@ -56,15 +66,12 @@ "metadata": {}, "outputs": [], "source": [ - "from collections import OrderedDict\n", - "from faker import Faker\n", - "\n", "locales = OrderedDict([\n", " ('en-US', 1),\n", " ('fr-FR', 2),\n", " ('ja_JP', 2),\n", "])\n", - "fake= Faker(locales)\n", + "fake = Faker(locales)\n", "distribution = np.concatenate((np.random.normal(0.1, 0.1, 500), np.random.normal(0.6, 0.2, 500)))" ] }, @@ -85,17 +92,19 @@ "source": [ "session = get_or_create_session()\n", "def profile_generator():\n", - " with session.logger(\"mytestytest\",dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger:\n", + " with session.logger(\"mytestytest\", dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger:\n", " for _ in range(500):\n", " logger.log({\"uniform_integers\": np.random.randint(0,50)})\n", " logger.log({\"strings\": fake.name()})\n", " logger.log({\"mixture_distribution\": np.random.choice(distribution, 1)[0]})\n", " logger.log({\"nulls\": None})\n", - " logger.log({\"moah_data\":1})\n", - " logger.log({\"moah_data\":1})\n", - " logger.log({\"moah_data\":5})\n", + " logger.log({\"moah_data\": 1})\n", + " logger.log({\"moah_data\": 1})\n", + " logger.log({\"moah_data\": 5})\n", " return logger.profile\n", - "profile=profile_generator()" + "\n", + "target_profile = profile_generator()\n", + "reference_profile = profile_generator() " ] }, { @@ -103,15 +112,16 @@ "id": "8390f360", "metadata": {}, "source": [ - "## 👀 data visualization" + "## ✨ Vizualize profiles with Whylogs" ] }, { "cell_type": "markdown", - "id": "650d44c4", + "id": "562fbacf", "metadata": {}, "source": [ - "Pass profile data to `DisplayProfile` and show histogram data by calling `summary()` method, it will display a summary of your profiled dataset." + "### Initialization\n", + "initialize Profile viewer by passing profiles for which you want to get the visualizations" ] }, { @@ -121,8 +131,17 @@ "metadata": {}, "outputs": [], "source": [ - "visualization=DisplayProfile([profile])\n", - "visualization.summary()\n" + "# should we allow multiple profiles?\n", + "visualization = NotebookProfileViewer(target_profiles=[target_profile], reference_profiles=[reference_profile])" + ] + }, + { + "cell_type": "markdown", + "id": "87ea53b6", + "metadata": {}, + "source": [ + "###### `*target_profiles`: Profiled dataset which will be reffered as `target`\n", + "###### `*reference_profiles`: Profiled dataset which will be reffered as `reference`" ] }, { @@ -130,7 +149,15 @@ "id": "9908a9f9", "metadata": {}, "source": [ - "### Logging reference profile" + "### Summary Drift Report" + ] + }, + { + "cell_type": "markdown", + "id": "650d44c4", + "metadata": {}, + "source": [ + "You can get summary drift report for `target` and `reference` profiles features" ] }, { @@ -140,15 +167,31 @@ "metadata": {}, "outputs": [], "source": [ - "reference_profile=profile_generator() " + "visualization.summary_drift_report(preferred_cell_height=\"1000px\")" + ] + }, + { + "cell_type": "markdown", + "id": "c4be405d", + "metadata": {}, + "source": [ + "###### `preferred_cell_height`: height in `px` for generated vizualization cell " ] }, { "cell_type": "markdown", - "id": "2768a074", + "id": "4dd9e690", "metadata": {}, "source": [ - "### Add reference profile to viewer and open" + "### Double histogram" + ] + }, + { + "cell_type": "markdown", + "id": "28b512ca", + "metadata": {}, + "source": [ + "You can get double histogram for any of your features" ] }, { @@ -158,34 +201,87 @@ "metadata": {}, "outputs": [], "source": [ - "visualization=DisplayProfile([profile], [reference_profile])\n", - "visualization.summary()" + "visualization.double_histogram(feature_names=\"uniform_integers\")" ] }, { "cell_type": "markdown", - "id": "4dd9e690", + "id": "af84a332", "metadata": {}, "source": [ - "### Display double histogram of a feature" + "###### `*feature_names`: string or list of strings containing name of the features for which you want to see double histogram\n", + "###### `preferred_cell_height`: height in `px` for generated vizualization cell " ] }, { "cell_type": "markdown", - "id": "28b512ca", + "id": "d318761d", + "metadata": {}, + "source": [ + "### Summary Statistics" + ] + }, + { + "cell_type": "markdown", + "id": "32974704", + "metadata": {}, + "source": [ + "You can get set of useful statistics for both `target` and `reference` by passing the name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d68c5e6a", + "metadata": {}, + "outputs": [], + "source": [ + "visualization.summary_statistics(profile=\"target\")" + ] + }, + { + "cell_type": "markdown", + "id": "2fc9c79e", + "metadata": {}, + "source": [ + "###### `profile_name`: `\"target\"` or `\"reference\"`\n", + "###### `prefered_cell_height`: height in `px` for generated vizualization cell " + ] + }, + { + "cell_type": "markdown", + "id": "78db1538", + "metadata": {}, + "source": [ + "### Feature Summary Statistics" + ] + }, + { + "cell_type": "markdown", + "id": "4dc0e3f6", "metadata": {}, "source": [ - "Both target profile and reference profile must be passed to `DisplayProfile`. By calling `feature()` method with your preferred feature title (`feature(\"title\")`) detailed double histogram will be displayed bellow." + "You can get set of useful statistics for features by passing the profile and feature names" ] }, { "cell_type": "code", "execution_count": null, - "id": "02f38de0", + "id": "fb77bcd5", "metadata": {}, "outputs": [], "source": [ - "visualization.feature(\"uniform_integers\")" + "visualization.feature_summary_statistics(feature_name\"uniform_integers\", profile=\"reference\")" + ] + }, + { + "cell_type": "markdown", + "id": "8b483a1d", + "metadata": {}, + "source": [ + "###### `*feature_name`: Any feature name from your profiled dataset\n", + "###### `profile_name`: `\"target\"` or `\"reference\"`\n", + "###### `prefered_cell_height`: height in `px` for generated vizualization cell " ] }, { @@ -196,6 +292,24 @@ "### Download prefered cell output" ] }, + { + "cell_type": "markdown", + "id": "2015ff5d", + "metadata": {}, + "source": [ + "You can also download any of those visualisation in `HTML` format for further analysys, by passing the vizualization name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "980d7236", + "metadata": {}, + "outputs": [], + "source": [ + "visualization.download(html=visualization.summary_drift_report(), html_file_name='example')" + ] + }, { "cell_type": "markdown", "id": "01fbed2c", @@ -208,20 +322,28 @@ "If name of the file is not passed it will be name of the dataset followed by timestamp of the profile by default. " ] }, + { + "cell_type": "markdown", + "id": "7ffc293c", + "metadata": {}, + "source": [ + "###### `*feature_name`: Any feature name from your profiled dataset\n", + "###### `preferred_path`: save path `default:` `/html_reports` located in whylogs directory\n", + "###### `html_file_name`: name of the file `default:` name of the dataset followed by timestamp of the profile" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "980d7236", + "id": "8eaac410", "metadata": {}, "outputs": [], - "source": [ - "visualization.download(visualization.summary(), html_file_name='example')" - ] + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/src/whylogs/viz/__init__.py b/src/whylogs/viz/__init__.py index ccb1633ded..50f3404926 100644 --- a/src/whylogs/viz/__init__.py +++ b/src/whylogs/viz/__init__.py @@ -1,6 +1,6 @@ from .browser_viz import profile_viewer -from .jupyter_notebook_viz import DisplayProfile +from .jupyter_notebook_viz import NotebookProfileViewer from .visualizer import BaseProfileVisualizer, ProfileVisualizer -__ALL__ = [ProfileVisualizer, BaseProfileVisualizer, DisplayProfile, profile_viewer] +__ALL__ = [ProfileVisualizer, BaseProfileVisualizer, NotebookProfileViewer, profile_viewer] diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py index 90f0f53ce9..d49e63624b 100644 --- a/src/whylogs/viz/jupyter_notebook_viz.py +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) -class DisplayProfile: +class NotebookProfileViewer: SUMMARY_REPORT_TEMPLATE_NAME = 'index-hbs-cdn-all-in-for-jupyter-notebook.html' SUMMARY_STATISTICS_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-full-summary-statistics.html' DOUBLE_HISTOGRAM_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-distribution-chart.html' @@ -87,7 +87,7 @@ def __display_rendered_template(self, template, template_name, height): iframe = f'''''' return HTML(iframe) - def summary(self, frame_height=None): + def summary_drift_report(self, preferred_cell_height=None): template = self.__get_compiled_template(self.SUMMARY_REPORT_TEMPLATE_NAME) profiles_summary = {"profile_from_whylogs": self.target_profile_jsons[0]} if self.reference_profiles: @@ -95,10 +95,10 @@ def summary(self, frame_height=None): return self.__display_rendered_template( template(profiles_summary), self.SUMMARY_REPORT_TEMPLATE_NAME, - frame_height + preferred_cell_height ) - def feature(self, feature_names, frame_height=None): + def double_histogram(self, feature_names, preferred_cell_height=None): if type(feature_names) is not list: feature_names = [feature_names] template = self.__get_compiled_template(self.DOUBLE_HISTOGRAM_TEMPLATE_NAME) @@ -118,7 +118,7 @@ def feature(self, feature_names, frame_height=None): return self.__display_rendered_template( distribution_chart, self.DOUBLE_HISTOGRAM_TEMPLATE_NAME, - frame_height + preferred_cell_height ) else: logger.warning( @@ -126,11 +126,10 @@ def feature(self, feature_names, frame_height=None): ) return None - def summary_statistics(self, profile_name=None, frame_height=None): + def summary_statistics(self, profile='reference', preferred_cell_height=None): template = self.__get_compiled_template(self.SUMMARY_STATISTICS_TEMPLATE_NAME) - if self.reference_profiles: - if not profile_name or profile_name.lower() == 'reference': - profile_statistics = self.reference_profile_jsons[0] + if self.reference_profiles and profile.lower() == 'reference': + profile_statistics = self.reference_profile_jsons[0] else: profile_statistics = self.target_profile_jsons[0] rendered_template = template({ @@ -139,12 +138,12 @@ def summary_statistics(self, profile_name=None, frame_height=None): return self.__display_rendered_template( rendered_template, self.SUMMARY_STATISTICS_TEMPLATE_NAME, - frame_height + preferred_cell_height ) - def feature_summary_statistics(self, feature_name, profile_name, frame_height=None): + def feature_summary_statistics(self, feature_name, profile='reference', preferred_cell_height=None): template = self.__get_compiled_template(self.FEATURE_STATISTICS_TEMPLATE_NAME) - if self.reference_profiles and (not profile_name or profile_name.lower() == 'reference'): + if self.reference_profiles and profile.lower() == 'reference': selected_profile = self.reference_profile_jsons else: selected_profile = self.target_profile_jsons @@ -156,17 +155,17 @@ def feature_summary_statistics(self, feature_name, profile_name, frame_height=No return self.__display_rendered_template( rendered_template, self.FEATURE_STATISTICS_TEMPLATE_NAME, - frame_height + preferred_cell_height ) - def download(self, html, prefered_path=None, html_file_name=None): + def download(self, html, preferred_path=None, html_file_name=None): if not html_file_name: if self.reference_profiles: html_file_name = self.reference_profiles[0].dataset_timestamp else: html_file_name = self.target_profiles[0].dataset_timestamp - if prefered_path: - path = os.path.expanduser(prefered_path) + if preferred_path: + path = os.path.expanduser(preferred_path) else: path = os.path.join(os.pardir, "html_reports", str(html_file_name)+".html") full_path = os.path.abspath(path) From 695a31b191f512daa1e54bc84d46382a4f286793 Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Mon, 7 Feb 2022 18:45:57 +0400 Subject: [PATCH 063/119] small notebook fixes --- examples/Profile_Viewer_In_Notebook.ipynb | 24 ++++++++--------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/examples/Profile_Viewer_In_Notebook.ipynb b/examples/Profile_Viewer_In_Notebook.ipynb index 35798b9b21..02bdd50ea9 100644 --- a/examples/Profile_Viewer_In_Notebook.ipynb +++ b/examples/Profile_Viewer_In_Notebook.ipynb @@ -5,7 +5,7 @@ "id": "6b36ea8e", "metadata": {}, "source": [ - "# 📈 Whylogs Profile Vizualization\n", + "# 📈 Whylogs Profile Visualization\n", "### gives you various ways to simplify the proccess of detecting dataset drift. Bellow you can see the instructions for creating dummy dataset and list of currently available visual reports." ] }, @@ -121,7 +121,7 @@ "metadata": {}, "source": [ "### Initialization\n", - "initialize Profile viewer by passing profiles for which you want to get the visualizations" + "Initialize Profile viewer by passing profiles for which you want to get the visualizations" ] }, { @@ -175,7 +175,7 @@ "id": "c4be405d", "metadata": {}, "source": [ - "###### `preferred_cell_height`: height in `px` for generated vizualization cell " + "###### `preferred_cell_height`: height in `px` for generated visualization cell " ] }, { @@ -209,8 +209,8 @@ "id": "af84a332", "metadata": {}, "source": [ - "###### `*feature_names`: string or list of strings containing name of the features for which you want to see double histogram\n", - "###### `preferred_cell_height`: height in `px` for generated vizualization cell " + "###### `*feature_names`: string or list of strings containing names of the features for which you want to see double histogram\n", + "###### `preferred_cell_height`: height in `px` for generated visualization cell " ] }, { @@ -245,7 +245,7 @@ "metadata": {}, "source": [ "###### `profile_name`: `\"target\"` or `\"reference\"`\n", - "###### `prefered_cell_height`: height in `px` for generated vizualization cell " + "###### `prefered_cell_height`: height in `px` for generated visualization cell " ] }, { @@ -281,7 +281,7 @@ "source": [ "###### `*feature_name`: Any feature name from your profiled dataset\n", "###### `profile_name`: `\"target\"` or `\"reference\"`\n", - "###### `prefered_cell_height`: height in `px` for generated vizualization cell " + "###### `prefered_cell_height`: height in `px` for generated visualization cell " ] }, { @@ -297,7 +297,7 @@ "id": "2015ff5d", "metadata": {}, "source": [ - "You can also download any of those visualisation in `HTML` format for further analysys, by passing the vizualization name" + "You can also download any of those visualisation in `HTML` format for further analysys, by passing the visualization name" ] }, { @@ -331,14 +331,6 @@ "###### `preferred_path`: save path `default:` `/html_reports` located in whylogs directory\n", "###### `html_file_name`: name of the file `default:` name of the dataset followed by timestamp of the profile" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8eaac410", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 57cd18ce7642af80bda2131bee7a86d085d2069e Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Tue, 8 Feb 2022 18:11:13 +0400 Subject: [PATCH 064/119] add empty folder and gitignore, notebook fixes --- examples/Profile_Viewer_In_Notebook.ipynb | 5 +++-- html_reports/.gitignore | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 html_reports/.gitignore diff --git a/examples/Profile_Viewer_In_Notebook.ipynb b/examples/Profile_Viewer_In_Notebook.ipynb index 02bdd50ea9..340ad82476 100644 --- a/examples/Profile_Viewer_In_Notebook.ipynb +++ b/examples/Profile_Viewer_In_Notebook.ipynb @@ -24,7 +24,8 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install faker" + "!pip install faker\n", + "!pip install pybars3" ] }, { @@ -271,7 +272,7 @@ "metadata": {}, "outputs": [], "source": [ - "visualization.feature_summary_statistics(feature_name\"uniform_integers\", profile=\"reference\")" + "visualization.feature_summary_statistics(feature_name=\"uniform_integers\", profile=\"reference\")" ] }, { diff --git a/html_reports/.gitignore b/html_reports/.gitignore new file mode 100644 index 0000000000..2d19fc766d --- /dev/null +++ b/html_reports/.gitignore @@ -0,0 +1 @@ +*.html From 39df1efa7f73742f993abd799967e47289b8af39 Mon Sep 17 00:00:00 2001 From: Perch2005 Date: Tue, 8 Feb 2022 21:49:21 +0300 Subject: [PATCH 065/119] make filter checkboxes square --- .../viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html index fbd9762e06..c9174eb939 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html @@ -454,7 +454,7 @@ } .form-check-input[type=checkbox] { - border-radius: 1.25em; + border-radius: 2px; } .justify-content-center { From c71575ec0a62bdb9a35cd7dc807b23397f4e38dd Mon Sep 17 00:00:00 2001 From: Perch2005 Date: Tue, 8 Feb 2022 21:56:02 +0300 Subject: [PATCH 066/119] code cleaning --- ...ex-hbs-cdn-all-in-for-jupyter-notebook.html | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html index c9174eb939..a98c1932ae 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html @@ -680,6 +680,9 @@

Hold on! :)

- + \ No newline at end of file From 17e0cd0f14d4ab853dd6e1b15269217593aa6a75 Mon Sep 17 00:00:00 2001 From: Perch2005 Date: Sun, 13 Feb 2022 21:57:35 +0300 Subject: [PATCH 073/119] fix the position of the distribution chart title --- ...dex-hbs-cdn-all-in-jupyter-distribution-chart.html | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html index 76950bad14..62492518c2 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html @@ -111,6 +111,9 @@ display: flex; } + .align-items-flex-end { + align-items: flex-end; + } .chart-box-title { width: 98%; @@ -118,6 +121,8 @@ } .chart-box-title p{ + margin-bottom: 0; + margin-top: 1rem; font-family: Asap; font-weight: bold; font-size: 18px; @@ -142,7 +147,7 @@
{{#each this}}
-
+

{{@key}}

@@ -176,10 +181,6 @@

Hold on! :)

+ + + + + + + + + + + + + + From 8291b3521e2b1ef1d0ad14c33b3b2180aebff417 Mon Sep 17 00:00:00 2001 From: ssxcho Date: Wed, 16 Feb 2022 04:57:56 +0400 Subject: [PATCH 082/119] add constraints report backend, add constraints report in notebook --- examples/Profile_Viewer_In_Notebook.ipynb | 113 +++++++++++++++++++++- src/whylogs/viz/jupyter_notebook_viz.py | 13 +++ 2 files changed, 121 insertions(+), 5 deletions(-) diff --git a/examples/Profile_Viewer_In_Notebook.ipynb b/examples/Profile_Viewer_In_Notebook.ipynb index 340ad82476..8bf6036019 100644 --- a/examples/Profile_Viewer_In_Notebook.ipynb +++ b/examples/Profile_Viewer_In_Notebook.ipynb @@ -36,6 +36,7 @@ "outputs": [], "source": [ "import numpy as np\n", + "import pandas as pd\n", "import datetime\n", "from collections import OrderedDict\n", "from faker import Faker" @@ -49,7 +50,19 @@ "outputs": [], "source": [ "from whylogs import get_or_create_session\n", - "from whylogs.viz import NotebookProfileViewer" + "from whylogs.viz import NotebookProfileViewer\n", + "from whylogs.core.statistics.constraints import (\n", + " columnValuesInSetConstraint,\n", + " containsEmailConstraint,\n", + " minBetweenConstraint,\n", + " maxLessThanEqualConstraint,\n", + " parametrizedKSTestPValueGreaterThanConstraint,\n", + " columnsMatchSetConstraint,\n", + " columnPairValuesInSetConstraint,\n", + " sumOfRowValuesOfMultipleColumnsEqualsConstraint,\n", + " columnValuesUniqueWithinRow,\n", + " DatasetConstraints\n", + ")" ] }, { @@ -102,8 +115,9 @@ " logger.log({\"moah_data\": 1})\n", " logger.log({\"moah_data\": 1})\n", " logger.log({\"moah_data\": 5})\n", - " return logger.profile\n", "\n", + " return logger.profile\n", + " \n", "target_profile = profile_generator()\n", "reference_profile = profile_generator() " ] @@ -272,7 +286,7 @@ "metadata": {}, "outputs": [], "source": [ - "visualization.feature_summary_statistics(feature_name=\"uniform_integers\", profile=\"reference\")" + "visualization.feature_summary_statistics(feature_name=\"mixture_distribution\", profile=\"reference\")" ] }, { @@ -285,6 +299,87 @@ "###### `prefered_cell_height`: height in `px` for generated visualization cell " ] }, + { + "cell_type": "markdown", + "id": "a3469ba2", + "metadata": {}, + "source": [ + "### Generate constraints" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dafab41d", + "metadata": {}, + "outputs": [], + "source": [ + "def get_sample_dataset_constraints():\n", + " cvisc = columnValuesInSetConstraint(value_set={2, 5, 8})\n", + " email_constraint = containsEmailConstraint()\n", + "\n", + " min_gt_constraint = minBetweenConstraint(lower_value=1, upper_value=5)\n", + " max_le_constraint = maxLessThanEqualConstraint(value=100)\n", + "\n", + " distribution = np.random.normal(0, 1, 50)\n", + "\n", + " ks_test_p_value_constraint = parametrizedKSTestPValueGreaterThanConstraint(\n", + " distribution,\n", + " p_value=0.5,\n", + " name=\"has a standard normal distribution\"\n", + " )\n", + "\n", + " set1 = set([\"col1\", \"col2\"])\n", + " columns_match_constraint = columnsMatchSetConstraint(set1)\n", + "\n", + " val_set = {(1, 2), (3, 5)}\n", + " col_set = [\"A\", \"B\"]\n", + " mcv_constraints = [\n", + " columnPairValuesInSetConstraint(column_A=\"A\", column_B=\"B\", value_set=val_set),\n", + " sumOfRowValuesOfMultipleColumnsEqualsConstraint(columns=col_set, value=100),\n", + " columnValuesUniqueWithinRow(column_A=\"A\", verbose=True),\n", + " ]\n", + "\n", + " return DatasetConstraints(\n", + " None,\n", + " value_constraints={\"A\": [cvisc], \"users\": [email_constraint]},\n", + " summary_constraints={\"B\": [max_le_constraint, min_gt_constraint], \"value\": [ks_test_p_value_constraint]},\n", + " table_shape_constraints=[columns_match_constraint],\n", + " multi_column_value_constraints=mcv_constraints,\n", + " )\n", + "\n", + "data = pd.DataFrame({\n", + " \"A\": [1, 2, 2, 5, 7, 6],\n", + " \"B\": [5, 4, 5, 1, 6, 0],\n", + " \"users\": [\"john\", \"jane@example.com\", \"alex\", \"bob\", \"anna@example.com\", \"dave\"],\n", + " \"value\": [23.4, 123.2, 423.3, 32.1, 42.2, 344.2],\n", + "})\n", + "\n", + "dc = get_sample_dataset_constraints()\n", + "constraints_profile = session.log_dataframe(data, \"test.data\", constraints=dc)\n", + "constraints_profile.apply_summary_constraints()\n", + "constraints_profile.apply_table_shape_constraints()\n", + "session.close()" + ] + }, + { + "cell_type": "markdown", + "id": "17ac9327", + "metadata": {}, + "source": [ + "### Constraints report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a2786f8", + "metadata": {}, + "outputs": [], + "source": [ + "visualization.constraints_report(dc)" + ] + }, { "cell_type": "markdown", "id": "b04f735b", @@ -332,11 +427,19 @@ "###### `preferred_path`: save path `default:` `/html_reports` located in whylogs directory\n", "###### `html_file_name`: name of the file `default:` name of the dataset followed by timestamp of the profile" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d3bf231", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -350,7 +453,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.8.10" } }, "nbformat": 4, diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py index e213419866..142ae0b8ad 100644 --- a/src/whylogs/viz/jupyter_notebook_viz.py +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -26,11 +26,13 @@ class NotebookProfileViewer: SUMMARY_STATISTICS_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-full-summary-statistics.html' DOUBLE_HISTOGRAM_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-distribution-chart.html' FEATURE_STATISTICS_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html' + CONSTRAINTS_REPORT_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-constraints-report.html' PAGE_SIZES = { SUMMARY_REPORT_TEMPLATE_NAME: '1000px', DOUBLE_HISTOGRAM_TEMPLATE_NAME: '277px', SUMMARY_STATISTICS_TEMPLATE_NAME: '250px', FEATURE_STATISTICS_TEMPLATE_NAME: '650px', + CONSTRAINTS_REPORT_TEMPLATE_NAME: '750PX' } def __init__(self, target_profiles: List[DatasetProfile] = None, reference_profiles: List[DatasetProfile] = None): @@ -203,3 +205,14 @@ def download(self, html, preferred_path=None, html_file_name=None): saved_html.write(html.data) saved_html.close() return None + + def constraints_report(self, constraints, preferred_cell_height=None): + template = self.__get_compiled_template(self.CONSTRAINTS_REPORT_TEMPLATE_NAME) + rendered_template = template({ + "constraints_report": json.dumps(constraints.report())} + ) + return self.__display_rendered_template( + rendered_template, + self.CONSTRAINTS_REPORT_TEMPLATE_NAME, + preferred_cell_height + ) From 7f9671d71079b6591ffbf61f8d8af8cd85eadaf9 Mon Sep 17 00:00:00 2001 From: ssxcho Date: Wed, 16 Feb 2022 05:29:09 +0400 Subject: [PATCH 083/119] constraints report with real values --- ...cdn-all-in-jupyter-constraints-report.html | 179 ++++++++++-------- 1 file changed, 100 insertions(+), 79 deletions(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-constraints-report.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-constraints-report.html index b46e9c288a..bd111a3d0d 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-constraints-report.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-constraints-report.html @@ -43,7 +43,7 @@ @media screen and (min-width: 1000px) { .desktop-content { display: block; - } + } .no-responsive { display: none; } @@ -148,7 +148,7 @@ margin-right: 5px; } - .drift-detection-search-input input::placeholder { + .drift-detection-search-input input::placeholder { font-family: Arial; font-weight: normal; font-size: 13px; @@ -286,7 +286,7 @@ line-height: 140%; color: #FFFFFF; } - + .turquoise-background-color { background-color: #369BAC; } @@ -295,28 +295,6 @@ background-color: #C6462A; } - .notif-circle-container{ - position: absolute; - top: -8px; - right: -8px; - padding: 5.3px; - border-radius: 50%; - background-color: var(--brandSecondary100); - cursor: pointer; - } - - .notif-circle { - position: absolute; - top: 2px; - right: 2px; - height: 16px; - width: 16px; - border-radius: 50%; - font-size: 10px; - color: #fff; - background-color: #ECB100; - } - .border-solid-gray { border: 1px solid #CED4DA; border-radius: 4px; @@ -346,10 +324,29 @@ padding-right: 30px; } - @media screen and (min-width: 1000px) { + .notif-circle-container{ + position: absolute; + top: 25px; + right: 25px; + padding: 5.3px; + border-radius: 50%; + background-color: white; + cursor: pointer; + } + + .notif-circle { + position: absolute; + top: 2px; + right: 2px; + padding: 3.3px; + border-radius: 50%; + background-color: #F2994A; + } + + @media screen and (min-width: 500px) { .desktop-content { display: block; - } + } .no-responsive { display: none; } @@ -394,18 +391,23 @@
-
-
- +
+
+
+
-
+
-
+
+ + + +
-
- {{{alertLIst}}} + {{{alertLIst this}}}
@@ -498,7 +500,7 @@

Hold on! :)

const findFetureWithNumberSummary = (column) => { const fetureIndex = Object.values(column.columns) - .findIndex((feture) => feture.numberSummary) + .findIndex((feture) => feture.numberSummary) return Object.keys(column.columns)[fetureIndex] } @@ -515,7 +517,7 @@

Hold on! :)

const firstWord = text.split(' ')[0] const newText = text.split(' ').slice(1).join(' ') return ( - `
@@ -540,37 +542,7 @@

Hold on! :)

) } - const datas = [ - {text: "Dataset has 2687 (14.4%) missing values", status: true}, - {text: "Sit mean is less than 0.0065", status: true}, - {text: "Sit has 177 (19.9%) missing values", status: true}, - {text: "Sit mean is greater than Lorem mean", status: true}, - {text: "Sit is uniformly distributed", status: true}, - {text: "Lorem has 177 (19.9%) missing values", status: false}, - {text: "Lorem mean is less than Dolor mean", status: false}, - {text: "Lorem is uniformly distributed", status: false}, - {text: "Lorem has a high cardinality: 1346 distinct values", status: false}, - {text: "Lectus has a high cardinality: 891 distinct values", status: false}, - {text: "Lectus is uniformly distributed", status: true}, - {text: "Lectus has 273 (23.4%) missing values", status: true}, - {text: "Dolor has a high cardinality: 891 distinct values", status: true}, - {text: "Dolor mean is less than 0.0065", status: true}, - {text: "Dolor is uniformly distributed", status: true}, - ] - - const profileFromCSVfile = {{{reference_profile}}} - - Handlebars.registerHelper("getFailedConstraints", function (column) { - return datas.filter(({status}) => status === false).length - }); - - Handlebars.registerHelper("getPassedConstraints", function (column) { - return datas.filter(({status}) => status === true).length - }); - - Handlebars.registerHelper("getAllConstraints", function (column) { - return datas.length - }); + let failedConstraints = 0; Handlebars.registerHelper("getProfileTimeStamp", function (column) { return formatLabelDate(+column.properties.dataTimestamp) @@ -581,13 +553,26 @@

Hold on! :)

}); Handlebars.registerHelper("alertLIst", function (column) { - console.log(datas.map(({text, status}) => alertListElement(text, status))); - let alertListItem = datas.map(({text, status}) => alertListElement(text, status)) - console.log(alertListItem); + let alertListItem = column.map((value) => { + if (value[1][0]) { + return alertListElement(`${value[0]} ${value[1][0][0]}`, value[1][0][value[1][0].length - 1] === 0 || (failedConstraints++, false)) + } else { + return alertListElement(value[0], value[value.length - 1] === 0 || (failedConstraints++, false)) + } + }) + $(document).ready(() => { + $(".wl__feature-count--discrete").append(failedConstraints) + $(".wl__feature-count--non-discrete").append(column.length - failedConstraints) + $(".wl__feature-count--unknown").append(column.length) + }) return alertListItem.join(' ') }); + Handlebars.registerHelper("getPassedConstraints", function (column) { + // return datas.filter(({status}) => status === true).length + }); + Handlebars.registerHelper("columnsWithConstraints", function () { return randomNumbers(200); }); @@ -597,7 +582,7 @@

Hold on! :)

}); } - function openFilter() { + function openFilter() { const $filterOptions = $(".dropdown-container"); const filterClass = $filterOptions.attr("class"); @@ -614,7 +599,7 @@

Hold on! :)

function initHandlebarsTemplate() { // Replace this context with JSON from .py file - const context = {{{profile_from_whylogs}}}; + const context = {{{constraints_report}}}; // Config handlebars and pass data to HBS template const source = document.getElementById("entry-template").innerHTML; const template = Handlebars.compile(source); @@ -622,7 +607,7 @@

Hold on! :)

const target = document.getElementById("generated-html"); target.innerHTML = html; } - + function initWebsiteScripts() { const $featureSearch = document.getElementById("wl__feature-search"); const $alertList = document.getElementById("alert-list"); @@ -635,8 +620,6 @@

Hold on! :)

failed: true }; - const getReferenceProfile = () => { return {{{reference_profile}}} } - let searchString = ""; function debounce(func, wait, immediate) { @@ -657,6 +640,17 @@

Hold on! :)

}; } + function filterNotification() { + const $notifCircleContainer = $(".notif-circle-container") + const $boxes = $('.wl_filter-options>.form-check>input[name=checkbox]:checked'); + const item = Object.values($boxes).find(function(value) { return $(value)[0] === undefined}); + if (item === undefined) { + $notifCircleContainer.removeClass("d-none") + } else { + $notifCircleContainer.addClass("d-none") + } + } + function handleSearch() { const tableBodyChildren = $alertList.children; @@ -696,7 +690,7 @@

Hold on! :)

$($(".form-check-input")[$(".form-check-input").length - 1]).prop( "checked", false ); } handleSearch(); - + }); $nonDiscrete.addEventListener("change", (event) => { @@ -725,6 +719,33 @@

Hold on! :)

}); } + function checkedBoxes() { + const $boxes = $('input[name=checkbox]:checked'); + const $notifCircleContainer = $(".notif-circle-container") + + if ($boxes.length) { + $notifCircleContainer.removeClass("d-none") + } + } + + function openFilter() { + const $filterOptions = $(".dropdown-container"); + const $notifCircleContainer = $(".notif-circle-container") + const filterClass = $filterOptions.attr("class"); + + if (filterClass.indexOf("d-none") > 0) { + $notifCircleContainer.addClass("d-none") + $filterOptions.removeClass("d-none"); + $(".filter-icon").addClass("d-none") + $(".close-filter-icon").removeClass("d-none") + } else { + $filterOptions.addClass("d-none"); + $(".close-filter-icon").addClass("d-none") + $(".filter-icon").removeClass("d-none") + checkedBoxes() + } + } + // Invoke functions -- keep in mind invokation order registerHandlebarHelperFunctions(); initHandlebarsTemplate(); From 3e5880b2a8581b969f4151194587ffbb191aea3b Mon Sep 17 00:00:00 2001 From: ssxcho Date: Wed, 16 Feb 2022 05:31:29 +0400 Subject: [PATCH 084/119] fix vertical scroll --- ...-hbs-cdn-all-in-jupyter-distribution-chart.html | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html index 7f77bb5390..ad13dc50f3 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html @@ -40,6 +40,7 @@ margin: 0; padding: 0; box-sizing: border-box; + overflow-y: hidden; } /* Screen on smaller screens */ @@ -119,7 +120,8 @@ width: 98%; justify-content: space-between; margin: 10px; - margin-top: 20px; + margin-top: 15px; + bottom: 0; } .chart-box-title p{ @@ -199,7 +201,7 @@

Hold on! :)

this.CHART_HEIGHT = this.SVG_HEIGHT - this.MARGIN.TOP - this.MARGIN.BOTTOM; this.svgEl = d3.create("svg") .attr("preserveAspectRatio", "xMinYMin meet") - .attr("viewBox", `20 0 ${$(window).width()} ${$(window).height()-50}`) + .attr("viewBox", `20 0 ${$(window).width()} ${$(window).height()-55}`) .classed("svg-content-responsive", true) this.maxYValue = d3.max(data, (d) => Math.abs(d.axisY)); this.xScale = d3 @@ -208,8 +210,8 @@

Hold on! :)

.range([this.MARGIN.LEFT, this.MARGIN.LEFT + this.CHART_WIDTH]); this.yScale = d3 .scaleLinear() - .domain([0, this.maxYValue * 1.02]) - .range([this.CHART_HEIGHT, 0]); + .domain([0, this.maxYValue * 1.4]) + .range([this.SVG_HEIGHT, 0]); } } @@ -252,7 +254,7 @@

Hold on! :)

let yFormat, xFormat; - const sizes = new GenerateChartParams($(window).height()-50, $(window).width(), histogramData) + const sizes = new GenerateChartParams($(window).height()-55, $(window).width(), histogramData) const { MARGIN, SVG_WIDTH, @@ -324,7 +326,7 @@

Hold on! :)

return svgEl._groups[0][0].outerHTML; } - const profileFromCSVfile = {{{reference_profile_from_whylogs}}} + const profileFromCSVfile = {{{reference_profile_from_whylogs}}} Handlebars.registerHelper("getDoubleHistogramChart",(column,key) => { const columnKey = key.data.key From 9f2af1f332bf57e383f56d427241035ab15aa249 Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Wed, 16 Feb 2022 05:48:10 +0400 Subject: [PATCH 085/119] remove hardcoded values, connect summary to statistics --- examples/Profile_Viewer_In_Notebook.ipynb | 57 +-- ...x-hbs-cdn-all-in-for-jupyter-notebook.html | 222 +++++++++++- ...cdn-all-in-jupyter-constraints-report.html | 40 +-- ...in-jupyter-feature-summary-statistics.html | 38 +- ...ll-in-jupyter-full-summary-statistics.html | 328 ------------------ src/whylogs/viz/jupyter_notebook_viz.py | 17 - .../viz/utils/profile_viz_calculations.py | 15 + 7 files changed, 244 insertions(+), 473 deletions(-) delete mode 100644 src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-full-summary-statistics.html diff --git a/examples/Profile_Viewer_In_Notebook.ipynb b/examples/Profile_Viewer_In_Notebook.ipynb index 8bf6036019..b961a5e873 100644 --- a/examples/Profile_Viewer_In_Notebook.ipynb +++ b/examples/Profile_Viewer_In_Notebook.ipynb @@ -110,7 +110,7 @@ " for _ in range(500):\n", " logger.log({\"uniform_integers\": np.random.randint(0,50)})\n", " logger.log({\"strings\": fake.name()})\n", - " logger.log({\"mixture_distribution\": np.random.choice(distribution, 1)[0]})\n", + " logger.log({\"mixture_distribution\": np.random.choice(distribution, 1)[0]}) \n", " logger.log({\"nulls\": None})\n", " logger.log({\"moah_data\": 1})\n", " logger.log({\"moah_data\": 1})\n", @@ -119,7 +119,8 @@ " return logger.profile\n", " \n", "target_profile = profile_generator()\n", - "reference_profile = profile_generator() " + "\n", + "reference_profile = profile_generator()" ] }, { @@ -213,7 +214,9 @@ "cell_type": "code", "execution_count": null, "id": "250bb662", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "visualization.double_histogram(feature_names=\"uniform_integers\")" @@ -263,42 +266,6 @@ "###### `prefered_cell_height`: height in `px` for generated visualization cell " ] }, - { - "cell_type": "markdown", - "id": "78db1538", - "metadata": {}, - "source": [ - "### Feature Summary Statistics" - ] - }, - { - "cell_type": "markdown", - "id": "4dc0e3f6", - "metadata": {}, - "source": [ - "You can get set of useful statistics for features by passing the profile and feature names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fb77bcd5", - "metadata": {}, - "outputs": [], - "source": [ - "visualization.feature_summary_statistics(feature_name=\"mixture_distribution\", profile=\"reference\")" - ] - }, - { - "cell_type": "markdown", - "id": "8b483a1d", - "metadata": {}, - "source": [ - "###### `*feature_name`: Any feature name from your profiled dataset\n", - "###### `profile_name`: `\"target\"` or `\"reference\"`\n", - "###### `prefered_cell_height`: height in `px` for generated visualization cell " - ] - }, { "cell_type": "markdown", "id": "a3469ba2", @@ -427,19 +394,11 @@ "###### `preferred_path`: save path `default:` `/html_reports` located in whylogs directory\n", "###### `html_file_name`: name of the file `default:` name of the dataset followed by timestamp of the profile" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6d3bf231", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -453,7 +412,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html index f5925cb852..7eccaa127a 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html @@ -91,7 +91,6 @@ padding: 0; box-sizing: border-box; font-family: "Asap", Arial, Helvetica, sans-serif; - color: var(--secondaryLight1000); } /* @@ -503,6 +502,93 @@ background-color: #F2994A; } + + .header-title { + font-size: 26px; + font-weight: 700; + color: #444444; + } + + .statistic-number-title { + font-family: Arial; + font-weight: normal; + font-size: 14px; + line-height: 20px; + color: #6C757D; + } + + .statistic-number { + font-family: Arial; + font-weight: bold; + font-size: 20px; + line-height: 140%; + display: flex; + align-items: center; + color: #0E7384; + } + + .statistic-measurement { + font-size: 15px !important; + margin-left: 3px; + } + + .statistic-measurement-percent { + font-size: 15px !important; + } + + .tooltip-full-number { + position: relative; + display: inline-block; + } + + .tooltip-full-number .tooltiptext { + visibility: hidden; + background-color: #0E7384; + color: #fff; + text-align: center; + border-radius: 6px; + padding: 3px; + position: absolute; + z-index: 1; + top: 0; + left: 100%; + margin-left: 5px; + opacity: 0; + transition: opacity 0.5s; + font-size: 13px; + font-weight: normal; + line-height: 100%; + } + + .tooltip-full-number:hover .tooltiptext { + visibility: visible; + opacity: 1; + } + + .display-flex { + display: flex; + } + + .justify-content-space-between { + justify-content: space-between; + } + + .justify-content-center { + justify-content: center; + } + + .align-items-center { + align-items: center; + } + + .padding-right-30 { + padding-right: 30px; + } + + .text-color { + color: var(--secondaryLight1000); + } + @media screen and (min-width: 500px) { .desktop-content { display: block; @@ -521,6 +607,26 @@ {{{{raw}}}}
+
+
+
+ Profile Summary: {{getProfileName this}}_{{getProfileTimeStamp this}} +
+
+
+
Observations
+
{{{observations this}}}
+
+
+
Missing Cells
+
+ {{{missingCells this}}} +
{{{missingCellsPercentage this}}}
+
+
+
+
+
@@ -529,7 +635,11 @@

- Drift detected in 65 of 131 features + Drift detected in + + of + + features

@@ -678,7 +788,6 @@

Hold on! :)

- \ No newline at end of file + diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-constraints-report.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-constraints-report.html index bd111a3d0d..069cd2f21f 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-constraints-report.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-constraints-report.html @@ -374,16 +374,7 @@ Constraints Report

-
-
-
Columns with constraints
-
{{columnsWithConstraints this}}
-
-
-
Columns without constraints
-
{{columnsWithoutConstraints this}}
-
-
+
@@ -513,9 +504,8 @@

Hold on! :)

} } - const alertListElement = (text, status) => { - const firstWord = text.split(' ')[0] - const newText = text.split(' ').slice(1).join(' ') + const alertListElement = (name, text, status) => { + return ( `
Hold on! :) >
${ + name && alertListItemStatus( status, - `${firstWord}`, - `${firstWord}` + `${name}`, + `${name}` ) } - ${newText} + ${text}
${ alertListItemStatus( @@ -554,11 +545,10 @@

Hold on! :)

Handlebars.registerHelper("alertLIst", function (column) { let alertListItem = column.map((value) => { - if (value[1][0]) { - return alertListElement(`${value[0]} ${value[1][0][0]}`, value[1][0][value[1][0].length - 1] === 0 || (failedConstraints++, false)) + return alertListElement(value[0], value[1][0][0], value[1][0][value[1][0].length - 1] === 0 || (failedConstraints++, false)) } else { - return alertListElement(value[0], value[value.length - 1] === 0 || (failedConstraints++, false)) + return alertListElement('', value[0], value[value.length - 1] === 0 || (failedConstraints++, false)) } }) $(document).ready(() => { @@ -568,18 +558,6 @@

Hold on! :)

}) return alertListItem.join(' ') }); - - Handlebars.registerHelper("getPassedConstraints", function (column) { - // return datas.filter(({status}) => status === true).length - }); - - Handlebars.registerHelper("columnsWithConstraints", function () { - return randomNumbers(200); - }); - - Handlebars.registerHelper("columnsWithoutConstraints", function () { - return randomNumbers(200); - }); } function openFilter() { diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html index 5b5b607b29..00946c9849 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html @@ -191,10 +191,6 @@
Missing
{{missing this}}
-
-
Infinite
-
{{infinite}}
-
Mean
{{mean this}}
@@ -207,18 +203,6 @@
Maximum
{{maximum this}}
-
-
Zeros
-
{{zeros}}
-
-
-
Negative
-
{{negative}}
-
-
-
Memory size
-
{{memorySize}}
-
@@ -324,8 +308,6 @@

Hold on! :)

return parseFloat(number).toFixed(decimals); } - const randomNumbers = (range) => Math.floor(Math.random() * range) - Handlebars.registerHelper("variance", function (column) { const feture = Object.values(column)[1] if (feture.variance) { @@ -359,7 +341,7 @@

Hold on! :)

} return "0"; }); - + Handlebars.registerHelper("ninetyFifthPercentile", function (column) { const feture = Object.values(column)[1] @@ -404,7 +386,7 @@

Hold on! :)

} return "0"; }); - + Handlebars.registerHelper("distinct", function (column) { const feture = Object.values(column)[1] @@ -423,10 +405,6 @@

Hold on! :)

return "0"; }); - Handlebars.registerHelper("infinite", function () { - return randomNumbers(50); - }); - Handlebars.registerHelper("mean", function (column) { const feture = Object.values(column)[1] @@ -463,18 +441,6 @@

Hold on! :)

return "0"; }); - Handlebars.registerHelper("zeros", function () { - return randomNumbers(50); - }); - - Handlebars.registerHelper("negative", function () { - return randomNumbers(50); - }); - - Handlebars.registerHelper("memorySize", function () { - return randomNumbers(50); - }); - Handlebars.registerHelper("getProfileTimeStamp", function (column) { return formatLabelDate(+column.properties.dataTimestamp) }); diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-full-summary-statistics.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-full-summary-statistics.html deleted file mode 100644 index fcc31b8090..0000000000 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-full-summary-statistics.html +++ /dev/null @@ -1,328 +0,0 @@ - - - - - - - - - - Profile Viewer | whylogs - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py index 142ae0b8ad..552b4579de 100644 --- a/src/whylogs/viz/jupyter_notebook_viz.py +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -23,14 +23,12 @@ class NotebookProfileViewer: SUMMARY_REPORT_TEMPLATE_NAME = 'index-hbs-cdn-all-in-for-jupyter-notebook.html' - SUMMARY_STATISTICS_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-full-summary-statistics.html' DOUBLE_HISTOGRAM_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-distribution-chart.html' FEATURE_STATISTICS_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html' CONSTRAINTS_REPORT_TEMPLATE_NAME = 'index-hbs-cdn-all-in-jupyter-constraints-report.html' PAGE_SIZES = { SUMMARY_REPORT_TEMPLATE_NAME: '1000px', DOUBLE_HISTOGRAM_TEMPLATE_NAME: '277px', - SUMMARY_STATISTICS_TEMPLATE_NAME: '250px', FEATURE_STATISTICS_TEMPLATE_NAME: '650px', CONSTRAINTS_REPORT_TEMPLATE_NAME: '750PX' } @@ -152,21 +150,6 @@ def double_histogram(self, feature_names, preferred_cell_height=None): ) return None - def summary_statistics(self, profile='reference', preferred_cell_height=None): - template = self.__get_compiled_template(self.SUMMARY_STATISTICS_TEMPLATE_NAME) - if self.reference_profiles and profile.lower() == 'reference': - profile_statistics = self.reference_profile_jsons[0] - else: - profile_statistics = self.target_profile_jsons[0] - rendered_template = template({ - "profile_summary_statistics_from_whylogs": profile_statistics - }) - return self.__display_rendered_template( - rendered_template, - self.SUMMARY_STATISTICS_TEMPLATE_NAME, - preferred_cell_height - ) - def feature_summary_statistics(self, feature_name, profile='reference', preferred_cell_height=None): template = self.__get_compiled_template(self.FEATURE_STATISTICS_TEMPLATE_NAME) if self.reference_profiles and profile.lower() == 'reference': diff --git a/src/whylogs/viz/utils/profile_viz_calculations.py b/src/whylogs/viz/utils/profile_viz_calculations.py index 1ac90a5912..62ab0e732b 100644 --- a/src/whylogs/viz/utils/profile_viz_calculations.py +++ b/src/whylogs/viz/utils/profile_viz_calculations.py @@ -21,8 +21,16 @@ def add_drift_val_to_ref_profile_json(target_profile, reference_profile, referen reference_profile_json : Reference profile summary serialized json with drift value for every feature """ # QUESTION: Should this function need to change behaviour to add drift into target profile? + observations = 0 + missing_cells = 0 + total_count = 0 for target_col_name in target_profile.columns.keys(): target_col = target_profile.columns[target_col_name] + observations += target_col.counters.to_protobuf().count + null_count = target_col.to_summary().counters.null_count.value + missing_cells += null_count if null_count else 0 + total_count += target_col.to_summary().counters.count + if target_col_name in reference_profile.columns: ref_col = reference_profile.columns[target_col_name] target_type = target_col.schema_tracker.to_summary().inferred_type.type @@ -53,6 +61,13 @@ def add_drift_val_to_ref_profile_json(target_profile, reference_profile, referen reference_profile_json['columns'][target_col_name]['drift_from_ref'] = chi_squared_p_value.chi_squared_test else: reference_profile_json['columns'][target_col_name]['drift_from_ref'] = None + reference_profile_json['properties']['observations'] = observations + reference_profile_json['properties']['missing_cells'] = missing_cells + reference_profile_json['properties']['total_count'] = total_count + reference_profile_json['properties']['missing_percentage'] = ( + missing_cells / total_count + ) * 100 if total_count else 0 + return reference_profile_json From 0cb19b6f22d478b2705afd05098158639d83d69a Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Wed, 16 Feb 2022 05:51:18 +0400 Subject: [PATCH 086/119] remove summary statistics report --- examples/Profile_Viewer_In_Notebook.ipynb | 35 ----------------------- 1 file changed, 35 deletions(-) diff --git a/examples/Profile_Viewer_In_Notebook.ipynb b/examples/Profile_Viewer_In_Notebook.ipynb index b961a5e873..735d64a304 100644 --- a/examples/Profile_Viewer_In_Notebook.ipynb +++ b/examples/Profile_Viewer_In_Notebook.ipynb @@ -231,41 +231,6 @@ "###### `preferred_cell_height`: height in `px` for generated visualization cell " ] }, - { - "cell_type": "markdown", - "id": "d318761d", - "metadata": {}, - "source": [ - "### Summary Statistics" - ] - }, - { - "cell_type": "markdown", - "id": "32974704", - "metadata": {}, - "source": [ - "You can get set of useful statistics for both `target` and `reference` by passing the name" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d68c5e6a", - "metadata": {}, - "outputs": [], - "source": [ - "visualization.summary_statistics(profile=\"target\")" - ] - }, - { - "cell_type": "markdown", - "id": "2fc9c79e", - "metadata": {}, - "source": [ - "###### `profile_name`: `\"target\"` or `\"reference\"`\n", - "###### `prefered_cell_height`: height in `px` for generated visualization cell " - ] - }, { "cell_type": "markdown", "id": "a3469ba2", From 611255ee330f159abd9fde329ab5da7e8420332c Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Wed, 16 Feb 2022 21:14:19 +0400 Subject: [PATCH 087/119] add sum, change colors --- examples/Profile_Viewer_In_Notebook.ipynb | 38 ++++ ...x-hbs-cdn-all-in-for-jupyter-notebook.html | 213 +++++++++--------- ...in-jupyter-feature-summary-statistics.html | 107 ++++----- src/whylogs/viz/jupyter_notebook_viz.py | 146 ++++-------- .../viz/utils/profile_viz_calculations.py | 87 +++---- 5 files changed, 286 insertions(+), 305 deletions(-) diff --git a/examples/Profile_Viewer_In_Notebook.ipynb b/examples/Profile_Viewer_In_Notebook.ipynb index 735d64a304..94884285d2 100644 --- a/examples/Profile_Viewer_In_Notebook.ipynb +++ b/examples/Profile_Viewer_In_Notebook.ipynb @@ -231,6 +231,44 @@ "###### `preferred_cell_height`: height in `px` for generated visualization cell " ] }, + { + "cell_type": "markdown", + "id": "78db1538", + "metadata": {}, + "source": [ + "### Feature Statistics" + ] + }, + { + "cell_type": "markdown", + "id": "4dc0e3f6", + "metadata": {}, + "source": [ + "You can get set of useful statistics for features by passing the profile and feature names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb77bcd5", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "visualization.feature_statistics(feature_name=\"mixture_distribution\", profile=\"reference\")" + ] + }, + { + "cell_type": "markdown", + "id": "8b483a1d", + "metadata": {}, + "source": [ + "###### `*feature_name`: Any feature name from your profiled dataset\n", + "###### `profile_name`: `\"target\"` or `\"reference\"`\n", + "###### `prefered_cell_height`: height in `px` for generated visualization cell " + ] + }, { "cell_type": "markdown", "id": "a3469ba2", diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html index 7eccaa127a..a0c3eb6889 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html @@ -788,6 +788,7 @@

Hold on! :)

+ + + + + + + + + + + + + From dfcdf812781d7847f6bf5762d42c6539c22a10a6 Mon Sep 17 00:00:00 2001 From: Perch2005 <78492631+Perch2005@users.noreply.github.com> Date: Sun, 20 Feb 2022 22:48:02 +0400 Subject: [PATCH 100/119] Categorical drift report: Differenced Bar chart jupyter visualisation --- ...-cdn-all-in-jupyter-differenced-chart.html | 375 ++++++++++++++++++ 1 file changed, 375 insertions(+) create mode 100644 src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-differenced-chart.html diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-differenced-chart.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-differenced-chart.html new file mode 100644 index 0000000000..34c16768e0 --- /dev/null +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-differenced-chart.html @@ -0,0 +1,375 @@ + + + + + + + + + + Profile Viewer | whylogs + + + + + + + + + + + + + + + + + + + + + + From 1b7ff9d5e158ff94baf8dc159f23b6407d3183a9 Mon Sep 17 00:00:00 2001 From: Perch2005 <78492631+Perch2005@users.noreply.github.com> Date: Sun, 20 Feb 2022 23:00:45 +0400 Subject: [PATCH 101/119] Add definition (ranges) for the severe, moderate, low drift categories. --- ...x-hbs-cdn-all-in-for-jupyter-notebook.html | 78 +++++++++++++------ 1 file changed, 54 insertions(+), 24 deletions(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html index be9ce04ac0..d272bbd416 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html @@ -389,6 +389,16 @@ color: #000000; } + .drift-detection-info-drifts-item-range { + font-family: Arial; + font-style: normal; + font-weight: bold; + font-size: 14px; + line-height: 14px; + color: #6C757D; + padding-top: 5px; + } + .drift-detection-search-input { display: flex; align-items: center; @@ -468,7 +478,7 @@ display: inline-block; position: relative; width: 85%; - padding-bottom: 15%; + padding-bottom: 17%; vertical-align: top; overflow: hidden; } @@ -569,6 +579,10 @@ display: flex; } + .flex-direction-column { + flex-direction: column; + } + .justify-content-space-between { justify-content: space-between; } @@ -926,34 +940,41 @@

Hold on! :)

return format(date); } - const driftCountElement = (driftCount, driftColor, driftName) => ` -
-
-
-

${driftCount}

-

${driftName}

+ const driftCountElement = (driftCount, driftColor, driftName, driftRange) => ` +
+
+
+
+

${driftCount}

+

${driftName}

+
+

(${driftRange})

` const drifts = { severe: { count: 0, + range: "0 - 0.05", name: "Severe drift", colorClass: "severe-drift-circle-color" }, moderate: { count: 0, + range: "0.05 - 0.3", name: "Moderate drift", colorClass: "moderate-drift-circle-color" }, mild: { count: 0, + range: "0.3 - 0.6", name: "Mild drift", colorClass: "mild-drift-circle-color" }, minimal: { count: 0, + range: "0.6 - 1", name: "Minimal drift", colorClass: "minimal-drift-circle-color" } @@ -1050,12 +1071,28 @@

Hold on! :)

return properties.properties.tags.name }); - let driftCount = 0; + let driftCount = 0; + const diffFromRefTableElement = (driftFromRefNumber, circleColor) => ` +
+ ${driftFromRefNumber} +
+
+ ` + + const cheqValueTypeNumber = (profile, profileValue) => { + let validValue; + if (profile && profileValue !== undefined && typeof profileValue === "number") { + return true + } else if (profileValue !== undefined && typeof profileValue !== "number") { + return false + } + } Handlebars.registerHelper("getDiffFromRef", function (column, key) { const columnKey = key.data.key const {drift_from_ref} = referenceProfile.columns[columnKey] - if (referenceProfile && referenceProfile.columns[columnKey].drift_from_ref !== undefined && typeof drift_from_ref === "number") { + + if (cheqValueTypeNumber(referenceProfile, drift_from_ref)) { driftCount++ const driftFromRefNumber = drift_from_ref % 1 ? fixNumberTo(drift_from_ref, 2) : drift_from_ref const circleColor = Object.values(drifts)[countOfDrifts(driftFromRefNumber, 3)].colorClass @@ -1065,28 +1102,21 @@

Hold on! :)

$(".all-features").html(Object.keys(referenceProfile.columns).length) }) - return ` -
- ${driftFromRefNumber} -
-
- ` - } else if (referenceProfile.columns[columnKey].drift_from_ref !== undefined && typeof drift_from_ref !== "number") { + return diffFromRefTableElement(driftFromRefNumber, circleColor) + + } else if (cheqValueTypeNumber(referenceProfile, drift_from_ref) !== undefined) { driftCount++ Object.values(drifts)[0].count++ - return `
-

undefined

-
-
`; - } else { - return '

-

'; + return diffFromRefTableElement("undefined", "severe-drift-circle-color") } + + return '

-

'; }); $(document).ready(() => - Object.values(drifts).map(({count, name, colorClass}) =>{ + Object.values(drifts).map(({count, name, colorClass, range}) =>{ $("#drift-detection-info-drifts") - .append(driftCountElement(count, colorClass, name)) + .append(driftCountElement(count, colorClass, name, range)) }) ) From 6a66040b84f4fb2c1af8603e3a1167d8bcbb1944 Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Mon, 21 Feb 2022 12:46:24 +0400 Subject: [PATCH 102/119] fixes in templates, structural changes --- examples/Profile_Viewer_In_Notebook.ipynb | 6952 +---------------- ...-cdn-all-in-jupyter-differenced-chart.html | 42 +- src/whylogs/viz/jupyter_notebook_viz.py | 79 +- .../viz/utils/profile_viz_calculations.py | 149 +- 4 files changed, 263 insertions(+), 6959 deletions(-) diff --git a/examples/Profile_Viewer_In_Notebook.ipynb b/examples/Profile_Viewer_In_Notebook.ipynb index 337789a6fe..e988a3680f 100644 --- a/examples/Profile_Viewer_In_Notebook.ipynb +++ b/examples/Profile_Viewer_In_Notebook.ipynb @@ -19,22 +19,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "29e48da5", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: faker in /home/perch/.cache/pypoetry/virtualenvs/whylogs-yhfFv6et-py3.8/lib/python3.8/site-packages (13.0.0)\n", - "Requirement already satisfied: python-dateutil>=2.4 in /home/perch/.cache/pypoetry/virtualenvs/whylogs-yhfFv6et-py3.8/lib/python3.8/site-packages (from faker) (2.8.2)\n", - "Requirement already satisfied: six>=1.5 in /home/perch/.cache/pypoetry/virtualenvs/whylogs-yhfFv6et-py3.8/lib/python3.8/site-packages (from python-dateutil>=2.4->faker) (1.16.0)\n", - "Requirement already satisfied: pybars3 in /home/perch/.cache/pypoetry/virtualenvs/whylogs-yhfFv6et-py3.8/lib/python3.8/site-packages (0.9.7)\n", - "Requirement already satisfied: PyMeta3>=0.5.1 in /home/perch/.cache/pypoetry/virtualenvs/whylogs-yhfFv6et-py3.8/lib/python3.8/site-packages (from pybars3) (0.5.1)\n" - ] - } - ], + "outputs": [], "source": [ "!pip install faker\n", "!pip install pybars3" @@ -42,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "5d40e474", "metadata": {}, "outputs": [], @@ -56,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "2c3ff3a2", "metadata": {}, "outputs": [], @@ -87,7 +75,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "41f8a0f9", "metadata": {}, "outputs": [], @@ -111,18 +99,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "ea4ce6a6", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARN: Missing config\n" - ] - } - ], + "outputs": [], "source": [ "session = get_or_create_session()\n", "def profile_generator():\n", @@ -166,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "7dbff14f", "metadata": {}, "outputs": [], @@ -202,5181 +182,12 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "911147eb", "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/html": [ - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "visualization.summary_drift_report(preferred_cell_height=\"1000px\")" ] @@ -5402,422 +213,19 @@ "id": "28b512ca", "metadata": {}, "source": [ - "You can get double histogram for any of your features" + "You can get double histogram for numerical features" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "250bb662", "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/html": [ - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "visualization.double_histogram(feature_names=\"strings\")" + "visualization.double_histogram(feature_names=\"uniform_integers\")" ] }, { @@ -5829,6 +237,80 @@ "###### `preferred_cell_height`: height in `px` for generated visualization cell " ] }, + { + "cell_type": "markdown", + "id": "193e306d", + "metadata": {}, + "source": [ + "### Distribution chart" + ] + }, + { + "cell_type": "markdown", + "id": "6f4931fb", + "metadata": {}, + "source": [ + "You can get distirubtion chart for categorical features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ccc7c73e", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "visualization.distribution_chart(feature_names=\"strings\")" + ] + }, + { + "cell_type": "markdown", + "id": "cd97db45", + "metadata": {}, + "source": [ + "###### `*feature_names`: string or list of strings containing names of the features for which you want to see double histogram\n", + "###### `preferred_cell_height`: height in `px` for generated visualization cell " + ] + }, + { + "cell_type": "markdown", + "id": "408e1c5b", + "metadata": {}, + "source": [ + "### Differenced distribution chart" + ] + }, + { + "cell_type": "markdown", + "id": "4d65b0b4", + "metadata": {}, + "source": [ + "You can get differenced distirubtion chart for categorical features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d26b2fa2", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "visualization.differenced_distribution_chart(feature_names=\"strings\")" + ] + }, + { + "cell_type": "markdown", + "id": "92451fbf", + "metadata": {}, + "source": [ + "###### `*feature_names`: string or list of strings containing names of the features for which you want to see double histogram\n", + "###### `preferred_cell_height`: height in `px` for generated visualization cell " + ] + }, { "cell_type": "markdown", "id": "78db1538", @@ -5847,500 +329,12 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "fb77bcd5", "metadata": { "scrolled": false }, - "outputs": [ - { - "data": { - "text/html": [ - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "visualization.feature_statistics(feature_name=\"mixture_distribution\", profile=\"reference\")" ] @@ -6365,7 +359,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "dafab41d", "metadata": {}, "outputs": [], @@ -6428,766 +422,10 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "4a2786f8", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "visualization.constraints_report(dc)" ] @@ -7210,7 +448,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "980d7236", "metadata": {}, "outputs": [], @@ -7257,7 +495,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-differenced-chart.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-differenced-chart.html index 34c16768e0..aca9afe5e1 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-differenced-chart.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-differenced-chart.html @@ -140,7 +140,7 @@ .bar.positive { fill: #369BACB2; } - + .bar.negative { fill: #2683C9E5; } @@ -204,7 +204,7 @@

Hold on! :)

const filterAndSortChartData = (overlappedHistogramData, histogramData) => { return overlappedHistogramData - .map((d) => + .map((d) => histogramData .filter((ref) => d.axisX === ref.axisX )[0]) .sort((a, b) => { @@ -234,7 +234,7 @@

Hold on! :)

this.CHART_HEIGHT = this.SVG_HEIGHT - this.MARGIN.TOP - this.MARGIN.BOTTOM; this.svgEl = d3.create("svg") .attr("preserveAspectRatio", "xMinYMin meet") - .attr("viewBox", `0 0 ${$(window).width()} ${$(window).height()-55}`) + .attr("viewBox", `0 -10 ${$(window).width()} ${$(window).height()-55}`) .classed("svg-content-responsive", true) this.maxYValue = d3.max(data, (d) => Math.abs(d.axisY)); this.xScale = d3 @@ -260,14 +260,6 @@

Hold on! :)

return data } - function CheckNumberSummary(column) { - if (column.numberSummary) { - return true - } else { - return false - } - } - function generatePositiveNegativeChart(histogramData, overlappedHistogramData) { const data = filterAndSortChartData(chartData(histogramData), chartData(overlappedHistogramData)).map(({axisY, axisX}, index) => { const findIndex = chartData(histogramData).findIndex((value) => value.axisX === axisX) @@ -277,7 +269,7 @@

Hold on! :)

let yFormat, xFormat; - + const sizes = new GenerateChartParams($(window).height()-55, $(window).width(), chartData(histogramData), chartData(overlappedHistogramData)) let { MARGIN, @@ -288,17 +280,23 @@

Hold on! :)

xScale, svgEl } = sizes - - const y0 = Math.max(Math.abs(d3.min(data)), Math.abs(d3.max(data))); - + + const maxY = Math.abs(d3.max(data)); + const minY = Math.abs(d3.min(data)); + let positiveY, + negativeY; + + positiveY = Math.ceil(maxY) % 1 ? maxY + 2*(maxY/(maxY*10)) : maxY + 2*(maxY/(maxY/10)) + negativeY = Math.ceil(minY) % 1 ? minY + 2*(minY/(minY*10)) : minY + 2*(minY/(minY/10)) + const yScale = d3.scaleLinear() - .domain([-y0, y0 ]) + .domain([-negativeY, positiveY ]) .range([CHART_HEIGHT,0]) - + const xAxis = d3.axisBottom(xScale).ticks(SVG_WIDTH / 80, xFormat).tickSizeOuter(0); - const yAxis = d3.axisLeft(yScale).ticks(CHART_HEIGHT / 20, yFormat); + const yAxis = d3.axisLeft(yScale).ticks(CHART_HEIGHT / 30, yFormat); yFormat = yScale.tickFormat(100, yFormat); - + svgEl.append("g") .attr("transform", `translate(${MARGIN.LEFT}, 0)`) .call(yAxis) @@ -311,7 +309,7 @@

Hold on! :)

.attr("y", 10) .attr("fill", "currentColor") .attr("text-anchor", "start")); - + svgEl .append("g") .attr("transform", `translate(0,${SVG_HEIGHT - MARGIN.BOTTOM})`) @@ -323,7 +321,7 @@

Hold on! :)

.attr("y", 27) .attr("fill", "currentColor") .attr("text-anchor", "end")); - + svgEl.selectAll(".bar") .data(data) .enter() @@ -335,7 +333,7 @@

Hold on! :)

.attr("x", function(d, i) { return xScale(i) }) .attr("height", function(d) { return Math.abs(yScale(d) - yScale(0)); }) .attr("width", xScale.bandwidth()); - + return svgEl._groups[0][0].outerHTML; } diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py index a5e0e9e9cf..02edf72029 100644 --- a/src/whylogs/viz/jupyter_notebook_viz.py +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -12,28 +12,28 @@ from .utils.profile_viz_calculations import ( add_drift_val_to_ref_profile_json, - calculate_coefficient_of_variation, - calculate_quantile_statistics_for_single_feature, - calculate_sum, - calculate_variance, + add_feature_statistics, ) _MY_DIR = os.path.realpath(os.path.dirname(__file__)) -TYPES = InferredType.Type logger = logging.getLogger(__name__) -numerical_types = (TYPES.INTEGRAL, TYPES.FRACTIONAL) +numerical_types = (InferredType.Type.INTEGRAL, InferredType.Type.FRACTIONAL) class NotebookProfileViewer: SUMMARY_REPORT_TEMPLATE_NAME = "index-hbs-cdn-all-in-for-jupyter-notebook.html" DOUBLE_HISTOGRAM_TEMPLATE_NAME = "index-hbs-cdn-all-in-jupyter-distribution-chart.html" + DISTRIBUTION_CHART_TEMPLATE_NAME = "index-hbs-cdn-all-in-jupyter-bar-chart.html" + DIFFERENCED_CHART_TEMPLATE_NAME = "index-hbs-cdn-all-in-jupyter-differenced-chart.html" FEATURE_STATISTICS_TEMPLATE_NAME = "index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html" CONSTRAINTS_REPORT_TEMPLATE_NAME = "index-hbs-cdn-all-in-jupyter-constraints-report.html" PAGE_SIZES = { SUMMARY_REPORT_TEMPLATE_NAME: "1000px", DOUBLE_HISTOGRAM_TEMPLATE_NAME: "277px", + DISTRIBUTION_CHART_TEMPLATE_NAME: "277px", + DIFFERENCED_CHART_TEMPLATE_NAME: "277px", FEATURE_STATISTICS_TEMPLATE_NAME: "650px", CONSTRAINTS_REPORT_TEMPLATE_NAME: "750PX", } @@ -41,7 +41,6 @@ class NotebookProfileViewer: def __init__(self, target_profiles: List[DatasetProfile] = None, reference_profiles: List[DatasetProfile] = None): self.target_profiles = target_profiles self.reference_profiles = reference_profiles - # create json output from profiles if self.target_profiles: if len(self.target_profiles) > 1: logger.warning("More than one profile not implemented yet, default to first profile in the list ") @@ -63,26 +62,32 @@ def __get_compiled_template(self, template_name): logger.debug("Unable to load pybars; install pybars3 to load profile from directly from the current session ") with open(template_path, "r") as file_with_template: source = file_with_template.read() - # compile templated files compiler = Compiler() template = compiler.compile(source) return template - def __pull_feature_data(self, profile, profile_jsons, feature_name): - profile_features = json.loads(profile_jsons[0]) - feature_data = {} - feature_data["properties"] = profile_features.get("properties") - feature_data[feature_name] = profile_features.get("columns").get(feature_name) - feature_data[feature_name]["sum"] = calculate_sum(profile_features, feature_name) - feature_data[feature_name]["variance"] = calculate_variance(profile_features, feature_name) - feature_data[feature_name]["coefficient_of_variation"] = calculate_coefficient_of_variation(profile_features, feature_name) - feature_data[feature_name]["quantile_statistics"] = calculate_quantile_statistics_for_single_feature(profile, profile_features, feature_name) - return feature_data + def __display_feature_chart(self, feature_names, template_name, preferred_cell_height=None): + if type(feature_names) is not list: + feature_names = [feature_names] + template = self.__get_compiled_template(template_name) + if self.reference_profiles: + target_profile_columns = json.loads(self.target_profile_jsons[0]).get("columns") + reference_profile_columns = json.loads(self.reference_profile_jsons[0]).get("columns") + target_profile_features, reference_profile_features = {}, {} + for feature_name in feature_names: + target_profile_features[feature_name] = target_profile_columns.get(feature_name) + reference_profile_features[feature_name] = reference_profile_columns.get(feature_name) + distribution_chart = template( + {"profile_from_whylogs": json.dumps(target_profile_features), "reference_profile_from_whylogs": json.dumps(reference_profile_features)} + ) + return self.__display_rendered_template(distribution_chart, template_name, preferred_cell_height) + else: + logger.warning("This method has to get both target and reference profiles, with valid feature title") + return None def __display_rendered_template(self, template, template_name, height): if not height: height = self.PAGE_SIZES[template_name] - # convert html to iframe and return it wrapped in Ipython...HTML() iframe = f"""
""" return HTML(iframe) @@ -95,23 +100,13 @@ def summary_drift_report(self, preferred_cell_height=None): return self.__display_rendered_template(template(profiles_summary), self.SUMMARY_REPORT_TEMPLATE_NAME, preferred_cell_height) def double_histogram(self, feature_names, preferred_cell_height=None): - if type(feature_names) is not list: - feature_names = [feature_names] - template = self.__get_compiled_template(self.DOUBLE_HISTOGRAM_TEMPLATE_NAME) - if self.reference_profiles: - target_profile_columns = json.loads(self.target_profile_jsons[0]).get("columns") - reference_profile_columns = json.loads(self.reference_profile_jsons[0]).get("columns") - target_profile_features, reference_profile_features = {}, {} - for feature_name in feature_names: - target_profile_features[feature_name] = target_profile_columns.get(feature_name) - reference_profile_features[feature_name] = reference_profile_columns.get(feature_name) - distribution_chart = template( - {"profile_from_whylogs": json.dumps(target_profile_features), "reference_profile_from_whylogs": json.dumps(reference_profile_features)} - ) - return self.__display_rendered_template(distribution_chart, self.DOUBLE_HISTOGRAM_TEMPLATE_NAME, preferred_cell_height) - else: - logger.warning("This method has to get both target and reference profiles, with valid feature title") - return None + return self.__display_feature_chart(feature_names, self.DOUBLE_HISTOGRAM_TEMPLATE_NAME, preferred_cell_height) + + def distribution_chart(self, feature_names, preferred_cell_height=None): + return self.__display_feature_chart(feature_names, self.DISTRIBUTION_CHART_TEMPLATE_NAME, preferred_cell_height) + + def differenced_distribution_chart(self, feature_names, preferred_cell_height=None): + return self.__display_feature_chart(feature_names, self.DIFFERENCED_CHART_TEMPLATE_NAME, preferred_cell_height) def feature_statistics(self, feature_name, profile="reference", preferred_cell_height=None): template = self.__get_compiled_template(self.FEATURE_STATISTICS_TEMPLATE_NAME) @@ -125,7 +120,7 @@ def feature_statistics(self, feature_name, profile="reference", preferred_cell_h rendered_template = template( { "profile_feature_statistics_from_whylogs": json.dumps( - self.__pull_feature_data(selected_profile.get(feature_name), selected_profile_json, feature_name) + add_feature_statistics(selected_profile.get(feature_name), selected_profile_json, feature_name) ) } ) @@ -134,6 +129,11 @@ def feature_statistics(self, feature_name, profile="reference", preferred_cell_h logger.warning("Quantile and descriptive statistics can be calculated for numerical features only!") return None + def constraints_report(self, constraints, preferred_cell_height=None): + template = self.__get_compiled_template(self.CONSTRAINTS_REPORT_TEMPLATE_NAME) + rendered_template = template({"constraints_report": json.dumps(constraints.report())}) + return self.__display_rendered_template(rendered_template, self.CONSTRAINTS_REPORT_TEMPLATE_NAME, preferred_cell_height) + def download(self, html, preferred_path=None, html_file_name=None): if not html_file_name: if self.reference_profiles: @@ -149,8 +149,3 @@ def download(self, html, preferred_path=None, html_file_name=None): saved_html.write(html.data) saved_html.close() return None - - def constraints_report(self, constraints, preferred_cell_height=None): - template = self.__get_compiled_template(self.CONSTRAINTS_REPORT_TEMPLATE_NAME) - rendered_template = template({"constraints_report": json.dumps(constraints.report())}) - return self.__display_rendered_template(rendered_template, self.CONSTRAINTS_REPORT_TEMPLATE_NAME, preferred_cell_height) diff --git a/src/whylogs/viz/utils/profile_viz_calculations.py b/src/whylogs/viz/utils/profile_viz_calculations.py index 38091edc3b..b842072efd 100644 --- a/src/whylogs/viz/utils/profile_viz_calculations.py +++ b/src/whylogs/viz/utils/profile_viz_calculations.py @@ -1,3 +1,5 @@ +import json + from whylogs.core.summaryconverters import ( compute_chi_squared_test_p_value, ks_test_compute_p_value, @@ -5,8 +7,94 @@ ) from whylogs.proto import InferredType, ReferenceDistributionDiscreteMessage -TYPES = InferredType.Type -categorical_types = (TYPES.INTEGRAL, TYPES.STRING, TYPES.BOOLEAN) +categorical_types = (InferredType.Type.INTEGRAL, InferredType.Type.STRING, InferredType.Type.BOOLEAN) + + +def __calculate_variance(profile_jsons, feature_name): + """ + Calculates variance for single feature + + Parameters + ---------- + profile_jsons: Profile summary serialized json + feature_name: Name of feature + + Returns + ------- + variance : Calculated variance for feature + """ + feature = profile_jsons.get("columns").get(feature_name) + variance = feature.get("numberSummary").get("stddev") ** 2 if feature.get("numberSummary") is not None else 0 + return variance + + +def __calculate_coefficient_of_variation(profile_jsons, feature_name): + """ + Calculates coefficient of variation for single feature + + Parameters + ---------- + profile_jsons: Profile summary serialized json + feature_name: Name of feature + + Returns + ------- + coefficient_of_variation : Calculated coefficient of variation for feature + """ + feature = profile_jsons.get("columns").get(feature_name) + coefficient_of_variation = ( + feature.get("numberSummary").get("stddev") / feature.get("numberSummary").get("mean") if feature.get("numberSummary") is not None else 0 + ) + return coefficient_of_variation + + +def __calculate_sum(profile_jsons, feature_name): + """ + Calculates sum for single feature + + Parameters + ---------- + profile_jsons: Profile summary serialized json + feature_name: Name of feature + + Returns + ------- + coefficient_of_variation : Calculated sum for feature + """ + feature = profile_jsons.get("columns").get(feature_name) + feature_number_summary = feature.get("numberSummary") + if feature_number_summary: + sum = feature_number_summary.get("mean") * int(feature.get("counters").get("count")) + else: + sum = 0 + return sum + + +def __calculate_quantile_statistics(feature, profile_jsons, feature_name): + """ + Calculates sum for single feature + + Parameters + ---------- + profile_jsons: Profile summary serialized json + feature_name: Name of feature + + Returns + ------- + coefficient_of_variation : Calculated sum for feature + """ + quantile_statistics = {} + feature_number_summary = profile_jsons.get("columns").get(feature_name).get("numberSummary") + if feature.number_tracker and feature.number_tracker.histogram.get_n() > 0: + kll_sketch = feature.number_tracker.histogram + quantile_statistics["fifth_percentile"] = single_quantile_from_sketch(kll_sketch, quantile=0.05).quantile + quantile_statistics["q1"] = single_quantile_from_sketch(kll_sketch, quantile=0.25).quantile + quantile_statistics["median"] = single_quantile_from_sketch(kll_sketch, quantile=0.5).quantile + quantile_statistics["q3"] = single_quantile_from_sketch(kll_sketch, quantile=0.75).quantile + quantile_statistics["ninety_fifth_percentile"] = single_quantile_from_sketch(kll_sketch, quantile=0.95).quantile + quantile_statistics["range"] = feature_number_summary.get("max") - feature_number_summary.get("min") + quantile_statistics["iqr"] = quantile_statistics["q3"] - quantile_statistics["q1"] + return quantile_statistics def add_drift_val_to_ref_profile_json(target_profile, reference_profile, reference_profile_json): @@ -23,7 +111,6 @@ def add_drift_val_to_ref_profile_json(target_profile, reference_profile, referen ------- reference_profile_json : Reference profile summary serialized json with drift value for every feature """ - # QUESTION: Should this function need to change behaviour to add drift into target profile? observations = 0 missing_cells = 0 total_count = 0 @@ -68,40 +155,26 @@ def add_drift_val_to_ref_profile_json(target_profile, reference_profile, referen return reference_profile_json -def calculate_variance(profile_jsons, feature_name): - feature = profile_jsons.get("columns").get(feature_name) - variance = feature.get("numberSummary").get("stddev") ** 2 if feature.get("numberSummary") is not None else 0 - return variance - - -def calculate_coefficient_of_variation(profile_jsons, feature_name): - feature = profile_jsons.get("columns").get(feature_name) - coefficient_of_variation = ( - feature.get("numberSummary").get("stddev") / feature.get("numberSummary").get("mean") if feature.get("numberSummary") is not None else 0 - ) - return coefficient_of_variation - - -def calculate_sum(profile_jsons, feature_name): - feature = profile_jsons.get("columns").get(feature_name) - feature_number_summary = feature.get("numberSummary") - if feature_number_summary: - sum = feature_number_summary.get("mean") * int(feature.get("counters").get("count")) - else: - sum = 0 - return sum +def add_feature_statistics(feature, profile_jsons, feature_name): + """ + Calculates different values for feature statistics + Parameters + ---------- + feature: + profile_jsons: Profile summary serialized json + feature_name: Name of feature -def calculate_quantile_statistics_for_single_feature(feature, profile_jsons, feature_name): - quantile_statistics = {} - feature_number_summary = profile_jsons.get("columns").get(feature_name).get("numberSummary") - if feature.number_tracker and feature.number_tracker.histogram.get_n() > 0: - kll_sketch = feature.number_tracker.histogram - quantile_statistics["fifth_percentile"] = single_quantile_from_sketch(kll_sketch, quantile=0.05).quantile - quantile_statistics["q1"] = single_quantile_from_sketch(kll_sketch, quantile=0.25).quantile - quantile_statistics["median"] = single_quantile_from_sketch(kll_sketch, quantile=0.5).quantile - quantile_statistics["q3"] = single_quantile_from_sketch(kll_sketch, quantile=0.75).quantile - quantile_statistics["ninety_fifth_percentile"] = single_quantile_from_sketch(kll_sketch, quantile=0.95).quantile - quantile_statistics["range"] = feature_number_summary.get("max") - feature_number_summary.get("min") - quantile_statistics["iqr"] = quantile_statistics["q3"] - quantile_statistics["q1"] - return quantile_statistics + Returns + ------- + feature: Feature data with appended values for statistics report + """ + profile_features = json.loads(profile_jsons[0]) + feature_with_statistics = {} + feature_with_statistics["properties"] = profile_features.get("properties") + feature_with_statistics[feature_name] = profile_features.get("columns").get(feature_name) + feature_with_statistics[feature_name]["sum"] = __calculate_sum(profile_features, feature_name) + feature_with_statistics[feature_name]["variance"] = __calculate_variance(profile_features, feature_name) + feature_with_statistics[feature_name]["coefficient_of_variation"] = __calculate_coefficient_of_variation(profile_features, feature_name) + feature_with_statistics[feature_name]["quantile_statistics"] = __calculate_quantile_statistics(feature, profile_features, feature_name) + return feature_with_statistics From d35ec6f305e372bea3f581d7f27d2799ed1cc726 Mon Sep 17 00:00:00 2001 From: Perch2005 <78492631+Perch2005@users.noreply.github.com> Date: Mon, 21 Feb 2022 15:58:40 +0400 Subject: [PATCH 103/119] fix bins height --- src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-bar-chart.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-bar-chart.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-bar-chart.html index 8268f0c144..1e192b5ef2 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-bar-chart.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-bar-chart.html @@ -299,7 +299,7 @@

Hold on! :)

yFormat = yScale.tickFormat(100, yFormat); svgEl.append("g") - .attr("transform", `translate(${MARGIN.LEFT}, ${MARGIN.TOP})`) + .attr("transform", `translate(${MARGIN.LEFT}, 0)`) .call(yAxis) .call(g => g.select(".domain").remove()) .call(g => g.selectAll(".tick line") From 3c09971c8ab636a0ead1dd3148d87e373f5b44cf Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Mon, 21 Feb 2022 16:24:35 +0400 Subject: [PATCH 104/119] transform X axis 90 deg for double histogram --- ...cdn-all-in-jupyter-distribution-chart.html | 89 +++++++++---------- src/whylogs/viz/jupyter_notebook_viz.py | 2 +- 2 files changed, 45 insertions(+), 46 deletions(-) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html index dfce9c40f9..0ada18e170 100644 --- a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html +++ b/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html @@ -188,7 +188,7 @@

Hold on! :)

//helper fun class GenerateChartParams { - constructor(height, width, data, bottomMargin=30, topMargin=5) { + constructor(height, width, targetData, referenceData, bottomMargin=30, topMargin=5) { this.MARGIN = { TOP: topMargin, RIGHT: 5, @@ -201,12 +201,16 @@

Hold on! :)

this.CHART_HEIGHT = this.SVG_HEIGHT - this.MARGIN.TOP - this.MARGIN.BOTTOM; this.svgEl = d3.create("svg") .attr("preserveAspectRatio", "xMinYMin meet") - .attr("viewBox", `20 0 ${$(window).width()} ${$(window).height()-55}`) + .attr("viewBox", `20 0 ${$(window).width()} ${$(window).height()-30}`) .classed("svg-content-responsive", true) - this.maxYValue = d3.max(data, (d) => Math.abs(d.axisY)); + this.maxYValue = d3.max(targetData, (d) => Math.abs(d.axisY)); + const mergedData = targetData.concat(referenceData).map(a => { + return a.axisX + }) + const mergedAndSortedData = mergedData.sort(function(a, b) { return a - b; }); this.xScale = d3 .scaleBand() - .domain(data.map((d) => d.axisX)) + .domain(mergedAndSortedData.map((d) => d)) .range([this.MARGIN.LEFT, this.MARGIN.LEFT + this.CHART_WIDTH]); this.yScale = d3 .scaleLinear() @@ -217,56 +221,37 @@

Hold on! :)

function chartData(column, startIndex) { const data = []; - if (column.numberSummary.isDiscrete) { - column.frequentItems.items.forEach((item, index) => { - data.push({ - axisY: item.estimate, - axisX: index, - }); - }); - } else { - for (let i = 0; i referenceProfileMin ? - chartsStartPoint.push(Math.ceil(targetedProfileMin-referenceProfileMin), 0): - chartsStartPoint.push(0, Math.ceil(referenceProfileMin-targetedProfileMin)) - histogramData = chartData(data, chartsStartPoint[0]) - overlappedHistogramData = chartData(referenceData, chartsStartPoint[1]) - } + const targetedProfileMin = targetData.numberSummary.histogram.min < 0 ? + targetData.numberSummary.histogram.min.toFixed(1) * 10 * -1 : + targetData.numberSummary.histogram.min.toFixed(1) * 10 + + const referenceProfileMin = referenceData.numberSummary.histogram.min < 0 ? + referenceData.numberSummary.histogram.min.toFixed(1) * 10 * -1 : + referenceData.numberSummary.histogram.min.toFixed(1) * 10 + + const chartsStartPoint = [] + targetedProfileMin > referenceProfileMin ? + chartsStartPoint.push(Math.ceil(targetedProfileMin-referenceProfileMin), 0): + chartsStartPoint.push(0, Math.ceil(referenceProfileMin-targetedProfileMin)) + histogramData = chartData(targetData, chartsStartPoint[0], 'original_profile') + overlappedHistogramData = chartData(referenceData, chartsStartPoint[1], 'ref_profile') let yFormat, xFormat; - const sizes = new GenerateChartParams($(window).height()-55, $(window).width(), histogramData) + const sizes = new GenerateChartParams($(window).height()-55, $(window).width(), histogramData, overlappedHistogramData) const { MARGIN, SVG_WIDTH, @@ -296,6 +281,21 @@

Hold on! :)

.attr("fill", "currentColor") .attr("text-anchor", "start")); + svgEl.append("g") + .attr("transform", `translate(0,${SVG_HEIGHT - MARGIN.BOTTOM})`) + .call(xAxis) + .selectAll("text") + .attr("dy", "-.6em") + .attr("dx", "-.8em") + .attr("transform", "rotate(-90)") + .style("text-anchor", "end") + .call(g => g.select(".domain").remove()) + .call(g => g.selectAll(".tick line").remove()) + .call(g => g.append("text") + .attr("fill", "currentColor") + .attr("text-anchor", "end")) + .style("font-size", "10") + const gChart = svgEl.append("g"); gChart .selectAll(".bar") @@ -323,7 +323,6 @@

Hold on! :)

.attr("y", (d) => yScale(d.axisY) + MARGIN.TOP) .attr("fill", "#369BAC") .style("opacity", "0.6"); - return svgEl._groups[0][0].outerHTML; } diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py index 02edf72029..d2a55b3142 100644 --- a/src/whylogs/viz/jupyter_notebook_viz.py +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -31,7 +31,7 @@ class NotebookProfileViewer: CONSTRAINTS_REPORT_TEMPLATE_NAME = "index-hbs-cdn-all-in-jupyter-constraints-report.html" PAGE_SIZES = { SUMMARY_REPORT_TEMPLATE_NAME: "1000px", - DOUBLE_HISTOGRAM_TEMPLATE_NAME: "277px", + DOUBLE_HISTOGRAM_TEMPLATE_NAME: "300px", DISTRIBUTION_CHART_TEMPLATE_NAME: "277px", DIFFERENCED_CHART_TEMPLATE_NAME: "277px", FEATURE_STATISTICS_TEMPLATE_NAME: "650px", From 044d45aa9e961ac24e99e48ee994c96a43a77742 Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Tue, 22 Feb 2022 02:22:40 +0400 Subject: [PATCH 105/119] add method for setting profiles --- examples/Profile_Viewer_In_Notebook.ipynb | 4 +- src/whylogs/viz/jupyter_notebook_viz.py | 47 +++++++++---------- .../viz/utils/profile_viz_calculations.py | 6 +-- 3 files changed, 27 insertions(+), 30 deletions(-) diff --git a/examples/Profile_Viewer_In_Notebook.ipynb b/examples/Profile_Viewer_In_Notebook.ipynb index e988a3680f..f581536ccc 100644 --- a/examples/Profile_Viewer_In_Notebook.ipynb +++ b/examples/Profile_Viewer_In_Notebook.ipynb @@ -151,8 +151,8 @@ "metadata": {}, "outputs": [], "source": [ - "# should we allow multiple profiles?\n", - "visualization = NotebookProfileViewer(target_profiles=[target_profile], reference_profiles=[reference_profile])" + "visualization = NotebookProfileViewer()\n", + "visualization.set_profiles(target_profile=target_profile, reference_profile=reference_profile)" ] }, { diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py index d2a55b3142..a69ac54e3b 100644 --- a/src/whylogs/viz/jupyter_notebook_viz.py +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -2,7 +2,6 @@ import json import logging import os -from typing import List from IPython.core.display import HTML @@ -38,16 +37,6 @@ class NotebookProfileViewer: CONSTRAINTS_REPORT_TEMPLATE_NAME: "750PX", } - def __init__(self, target_profiles: List[DatasetProfile] = None, reference_profiles: List[DatasetProfile] = None): - self.target_profiles = target_profiles - self.reference_profiles = reference_profiles - if self.target_profiles: - if len(self.target_profiles) > 1: - logger.warning("More than one profile not implemented yet, default to first profile in the list ") - self.target_profile_jsons = [message_to_json(each_prof.to_summary()) for each_prof in self.target_profiles] - if self.reference_profiles: - self.reference_profile_jsons = [message_to_json(each_prof.to_summary()) for each_prof in self.reference_profiles] - def __get_template_path(self, html_file_name): template_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer", html_file_name)) return template_path @@ -70,9 +59,9 @@ def __display_feature_chart(self, feature_names, template_name, preferred_cell_h if type(feature_names) is not list: feature_names = [feature_names] template = self.__get_compiled_template(template_name) - if self.reference_profiles: - target_profile_columns = json.loads(self.target_profile_jsons[0]).get("columns") - reference_profile_columns = json.loads(self.reference_profile_jsons[0]).get("columns") + if self._reference_profile: + target_profile_columns = json.loads(self._target_profile_json).get("columns") + reference_profile_columns = json.loads(self._reference_profile_json).get("columns") target_profile_features, reference_profile_features = {}, {} for feature_name in feature_names: target_profile_features[feature_name] = target_profile_columns.get(feature_name) @@ -91,11 +80,19 @@ def __display_rendered_template(self, template, template_name, height): iframe = f"""
""" return HTML(iframe) + def set_profiles(self, target_profile: DatasetProfile = None, reference_profile: DatasetProfile = None): + self._target_profile = target_profile + self._reference_profile = reference_profile + if self._target_profile: + self._target_profile_json = message_to_json(self._target_profile.to_summary()) + if self._reference_profile: + self._reference_profile_json = message_to_json(self._reference_profile.to_summary()) + def summary_drift_report(self, preferred_cell_height=None): - reference_profile = add_drift_val_to_ref_profile_json(self.target_profiles[0], self.reference_profiles[0], json.loads(self.reference_profile_jsons[0])) + reference_profile = add_drift_val_to_ref_profile_json(self._target_profile, self._reference_profile, json.loads(self._reference_profile_json)) template = self.__get_compiled_template(self.SUMMARY_REPORT_TEMPLATE_NAME) - profiles_summary = {"profile_from_whylogs": self.target_profile_jsons[0]} - if self.reference_profiles: + profiles_summary = {"profile_from_whylogs": self._target_profile_json} + if self._reference_profile: profiles_summary["reference_profile_from_whylogs"] = json.dumps(reference_profile) return self.__display_rendered_template(template(profiles_summary), self.SUMMARY_REPORT_TEMPLATE_NAME, preferred_cell_height) @@ -110,12 +107,12 @@ def differenced_distribution_chart(self, feature_names, preferred_cell_height=No def feature_statistics(self, feature_name, profile="reference", preferred_cell_height=None): template = self.__get_compiled_template(self.FEATURE_STATISTICS_TEMPLATE_NAME) - if self.reference_profiles and profile.lower() == "reference": - selected_profile_json = self.reference_profile_jsons - selected_profile = self.reference_profiles[0].columns + if self._reference_profile and profile.lower() == "reference": + selected_profile_json = self._reference_profile_json + selected_profile = self._reference_profile.columns else: - selected_profile_json = self.target_profile_jsons - selected_profile = self.target_profiles[0].columns + selected_profile_json = self._target_profile_json + selected_profile = self._target_profile.columns if selected_profile.get(feature_name).schema_tracker.to_summary().inferred_type.type in numerical_types: rendered_template = template( { @@ -136,10 +133,10 @@ def constraints_report(self, constraints, preferred_cell_height=None): def download(self, html, preferred_path=None, html_file_name=None): if not html_file_name: - if self.reference_profiles: - html_file_name = self.reference_profiles[0].dataset_timestamp + if self._reference_profile: + html_file_name = self._reference_profile.dataset_timestamp else: - html_file_name = self.target_profiles[0].dataset_timestamp + html_file_name = self._target_profile.dataset_timestamp if preferred_path: path = os.path.expanduser(preferred_path) else: diff --git a/src/whylogs/viz/utils/profile_viz_calculations.py b/src/whylogs/viz/utils/profile_viz_calculations.py index b842072efd..7d1f1d5a9d 100644 --- a/src/whylogs/viz/utils/profile_viz_calculations.py +++ b/src/whylogs/viz/utils/profile_viz_calculations.py @@ -155,21 +155,21 @@ def add_drift_val_to_ref_profile_json(target_profile, reference_profile, referen return reference_profile_json -def add_feature_statistics(feature, profile_jsons, feature_name): +def add_feature_statistics(feature, profile_json, feature_name): """ Calculates different values for feature statistics Parameters ---------- feature: - profile_jsons: Profile summary serialized json + profile_json: Profile summary serialized json feature_name: Name of feature Returns ------- feature: Feature data with appended values for statistics report """ - profile_features = json.loads(profile_jsons[0]) + profile_features = json.loads(profile_json) feature_with_statistics = {} feature_with_statistics["properties"] = profile_features.get("properties") feature_with_statistics[feature_name] = profile_features.get("columns").get(feature_name) From cedb4979181b40bba0d92252001ef5449d6ad7bd Mon Sep 17 00:00:00 2001 From: ssxcho Date: Tue, 22 Feb 2022 05:38:13 +0400 Subject: [PATCH 106/119] add tests for jupyter notebook visualization --- tests/unit/viz/test_jupyter_notebook_viz.py | 208 ++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 tests/unit/viz/test_jupyter_notebook_viz.py diff --git a/tests/unit/viz/test_jupyter_notebook_viz.py b/tests/unit/viz/test_jupyter_notebook_viz.py new file mode 100644 index 0000000000..c96ecb2f9b --- /dev/null +++ b/tests/unit/viz/test_jupyter_notebook_viz.py @@ -0,0 +1,208 @@ +import datetime +import os + +import numpy as np + +from faker import Faker +from collections import OrderedDict + +from whylogs import get_or_create_session +from whylogs.viz import NotebookProfileViewer +from whylogs.core.statistics.constraints import ( + columnsMatchSetConstraint, + columnValuesInSetConstraint, + ValueConstraint, + columnValuesUniqueWithinRow, + columnPairValuesInSetConstraint, + sumOfRowValuesOfMultipleColumnsEqualsConstraint, + DatasetConstraints, + SummaryConstraint, + SummaryConstraints, + Op, +) + + +def __generate_target_profile(): + + session = get_or_create_session() + locales = OrderedDict([ + ('en-US', 1), + ('fr-FR', 2), + ('ja_JP', 2), + ]) + fake = Faker(locales) + with session.logger("mytestytest", dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger: + for _ in range(5): + logger.log({"strings": fake.name()}) + logger.log({"uniform_integers": np.random.randint(0, 50)}) + logger.log({"nulls": None}) + + return logger.profile + + +def __generate_reference_profile(): + + session = get_or_create_session() + locales = OrderedDict([ + ('en-US', 1), + ('fr-FR', 2), + ('ja_JP', 2), + ]) + fake = Faker(locales) + with session.logger("mytestytest", dataset_timestamp=datetime.datetime(2021, 6, 2)) as logger: + for _ in range(5): + logger.log({"strings": fake.name()}) + logger.log({"uniform_integers": np.random.randint(0, 50)}) + logger.log({"nulls": None}) + + return logger.profile + + +def _get_sample_dataset_constraints(): + cvisc = columnValuesInSetConstraint(value_set={2, 5, 8}) + ltc = ValueConstraint(Op.LT, 1) + + min_gt_constraint = SummaryConstraint("min", Op.GT, value=100) + max_le_constraint = SummaryConstraint("max", Op.LE, value=5) + + set1 = set(["col1", "col2"]) + columns_match_constraint = columnsMatchSetConstraint(set1) + + val_set = {(1, 2), (3, 5)} + col_set = ["A", "B"] + mcv_constraints = [ + columnValuesUniqueWithinRow(column_A="A", verbose=True), + columnPairValuesInSetConstraint(column_A="A", column_B="B", value_set=val_set), + sumOfRowValuesOfMultipleColumnsEqualsConstraint(columns=col_set, value=100), + ] + + return DatasetConstraints( + None, + value_constraints={"annual_inc": [cvisc, ltc]}, + summary_constraints={"annual_inc": [max_le_constraint, min_gt_constraint]}, + table_shape_constraints=[columns_match_constraint], + multi_column_value_constraints=mcv_constraints, + ) + + +def test_notebook_profile_viewer_set_profiles(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + + +def test_summary_drift_report_without_preferred_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.summary_drift_report() + + +def test_summary_drift_report_with_preferred_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.summary_drift_report() + + +def test_feature_statistics_not_passing_profile_type(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.feature_statistics('uniform_integers') + + +def test_feature_statistics_passing_profile_type(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.feature_statistics('uniform_integers', 'target') + + +def test_feature_statistics_passing_profile_type_and_prefered_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.feature_statistics('uniform_integers', 'target', '1000px') + + +def test_download_passing_all_arguments(tmpdir): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + + download = viz.download(viz.summary_drift_report(), tmpdir, html_file_name='foo') + assert os.path.exists(tmpdir + "/foo.html") + + +def test_constraints_report_without_preferred_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + dc = _get_sample_dataset_constraints() + viz.constraints_report(dc) + + +def test_constraints_report_with_preferred_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + dc = _get_sample_dataset_constraints() + viz.constraints_report(dc, preferred_cell_height='1000px') + + +def test_double_histogram_without_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.double_histogram('uniform_integers') + + +def test_double_histogram_with_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.double_histogram('uniform_integers', '1000px') + + +def test_distribution_chart_without_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.distribution_chart('strings') + + +def test_distribution_chart_with_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.distribution_chart('strings', '1000px') + + +def test_difference_distribution_chart_without_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.difference_distribution_chart('strings') + + +def test_difference_distribution_chart_with_height(): + target_profile = __generate_target_profile() + reference_profile = __generate_reference_profile() + viz = NotebookProfileViewer() + viz.set_profiles(target_profile=target_profile, reference_profile=reference_profile) + viz.difference_distribution_chart('strings', '1000px') From 2935d0f9a2db46bf7ca681dc9152eaedee9d1da5 Mon Sep 17 00:00:00 2001 From: ssxcho Date: Tue, 22 Feb 2022 05:41:53 +0400 Subject: [PATCH 107/119] changes paths for html templates, fix typing error --- src/whylogs/viz/browser_viz.py | 21 ++++++++++++------ src/whylogs/viz/jupyter_notebook_viz.py | 29 ++++++++++++++++--------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/whylogs/viz/browser_viz.py b/src/whylogs/viz/browser_viz.py index fea9b80393..3822f76453 100644 --- a/src/whylogs/viz/browser_viz.py +++ b/src/whylogs/viz/browser_viz.py @@ -21,26 +21,32 @@ def profile_viewer(profiles: List[DatasetProfile] = None, reference_profiles: Li except ImportError as e: Compiler = None logger.debug(str(e)) - logger.debug("Unable to load pybars; install pybars3 to load profile from directly from the current session ") + logger.debug( + "Unable to load pybars; install pybars3 to load profile from directly from the current session ") - index_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer", "index.html")) + index_path = os.path.abspath(os.path.join( + _MY_DIR, os.pardir, "viewer/templates", "index.html")) webbrowser.open_new_tab(f"file:{index_path}#") return None # create json output from profiles if profiles: if len(profiles) > 1: - logger.warning("More than one profile not implemented yet, default to first profile in the list ") + logger.warning( + "More than one profile not implemented yet, default to first profile in the list ") profile_jsons = [message_to_json(each_prof.to_summary()) for each_prof in profiles] if reference_profiles: - reference_profile_jsons = [message_to_json(each_prof.to_summary()) for each_prof in reference_profiles] + reference_profile_jsons = [message_to_json( + each_prof.to_summary()) for each_prof in reference_profiles] else: - index_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer", "index.html")) + index_path = os.path.abspath(os.path.join( + _MY_DIR, os.pardir, "viewer/templates", "index.html")) webbrowser.open_new_tab(f"file:{index_path}#") return None - index_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer", "index-hbs-cdn-all-in.html")) + index_path = os.path.abspath(os.path.join( + _MY_DIR, os.pardir, "viewer/templates", "index-hbs-cdn-all-in.html")) with open(index_path, "r") as file_with_template: source = file_with_template.read() @@ -50,7 +56,8 @@ def profile_viewer(profiles: List[DatasetProfile] = None, reference_profiles: Li template = compiler.compile(source) # replace handlebars for json profiles if reference_profiles: - output_index = template({"profile_from_whylogs": profile_jsons[0], "reference_profile": reference_profile_jsons[0]}) + output_index = template( + {"profile_from_whylogs": profile_jsons[0], "reference_profile": reference_profile_jsons[0]}) else: output_index = template({"profile_from_whylogs": profile_jsons[0]}) diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py index a69ac54e3b..bae1aa330c 100644 --- a/src/whylogs/viz/jupyter_notebook_viz.py +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -38,7 +38,9 @@ class NotebookProfileViewer: } def __get_template_path(self, html_file_name): - template_path = os.path.abspath(os.path.join(_MY_DIR, os.pardir, "viewer", html_file_name)) + template_path = os.path.abspath(os.path.join( + _MY_DIR, os.pardir, "viewer/templates", html_file_name)) + print(_MY_DIR) return template_path def __get_compiled_template(self, template_name): @@ -48,7 +50,8 @@ def __get_compiled_template(self, template_name): except ImportError as e: Compiler = None logger.debug(str(e)) - logger.debug("Unable to load pybars; install pybars3 to load profile from directly from the current session ") + logger.debug( + "Unable to load pybars; install pybars3 to load profile from directly from the current session ") with open(template_path, "r") as file_with_template: source = file_with_template.read() compiler = Compiler() @@ -65,13 +68,16 @@ def __display_feature_chart(self, feature_names, template_name, preferred_cell_h target_profile_features, reference_profile_features = {}, {} for feature_name in feature_names: target_profile_features[feature_name] = target_profile_columns.get(feature_name) - reference_profile_features[feature_name] = reference_profile_columns.get(feature_name) + reference_profile_features[feature_name] = reference_profile_columns.get( + feature_name) distribution_chart = template( - {"profile_from_whylogs": json.dumps(target_profile_features), "reference_profile_from_whylogs": json.dumps(reference_profile_features)} + {"profile_from_whylogs": json.dumps( + target_profile_features), "reference_profile_from_whylogs": json.dumps(reference_profile_features)} ) return self.__display_rendered_template(distribution_chart, template_name, preferred_cell_height) else: - logger.warning("This method has to get both target and reference profiles, with valid feature title") + logger.warning( + "This method has to get both target and reference profiles, with valid feature title") return None def __display_rendered_template(self, template, template_name, height): @@ -89,7 +95,8 @@ def set_profiles(self, target_profile: DatasetProfile = None, reference_profile: self._reference_profile_json = message_to_json(self._reference_profile.to_summary()) def summary_drift_report(self, preferred_cell_height=None): - reference_profile = add_drift_val_to_ref_profile_json(self._target_profile, self._reference_profile, json.loads(self._reference_profile_json)) + reference_profile = add_drift_val_to_ref_profile_json( + self._target_profile, self._reference_profile, json.loads(self._reference_profile_json)) template = self.__get_compiled_template(self.SUMMARY_REPORT_TEMPLATE_NAME) profiles_summary = {"profile_from_whylogs": self._target_profile_json} if self._reference_profile: @@ -102,7 +109,7 @@ def double_histogram(self, feature_names, preferred_cell_height=None): def distribution_chart(self, feature_names, preferred_cell_height=None): return self.__display_feature_chart(feature_names, self.DISTRIBUTION_CHART_TEMPLATE_NAME, preferred_cell_height) - def differenced_distribution_chart(self, feature_names, preferred_cell_height=None): + def difference_distribution_chart(self, feature_names, preferred_cell_height=None): return self.__display_feature_chart(feature_names, self.DIFFERENCED_CHART_TEMPLATE_NAME, preferred_cell_height) def feature_statistics(self, feature_name, profile="reference", preferred_cell_height=None): @@ -117,13 +124,15 @@ def feature_statistics(self, feature_name, profile="reference", preferred_cell_h rendered_template = template( { "profile_feature_statistics_from_whylogs": json.dumps( - add_feature_statistics(selected_profile.get(feature_name), selected_profile_json, feature_name) + add_feature_statistics(selected_profile.get( + feature_name), selected_profile_json, feature_name) ) } ) return self.__display_rendered_template(rendered_template, self.FEATURE_STATISTICS_TEMPLATE_NAME, preferred_cell_height) else: - logger.warning("Quantile and descriptive statistics can be calculated for numerical features only!") + logger.warning( + "Quantile and descriptive statistics can be calculated for numerical features only!") return None def constraints_report(self, constraints, preferred_cell_height=None): @@ -138,7 +147,7 @@ def download(self, html, preferred_path=None, html_file_name=None): else: html_file_name = self._target_profile.dataset_timestamp if preferred_path: - path = os.path.expanduser(preferred_path) + path = os.path.join(os.path.expanduser(preferred_path), str(html_file_name) + ".html") else: path = os.path.join(os.pardir, "html_reports", str(html_file_name) + ".html") full_path = os.path.abspath(path) From d8707262ae42f7ec0cfada6c5779ba823388cff8 Mon Sep 17 00:00:00 2001 From: ssxcho Date: Tue, 22 Feb 2022 05:46:43 +0400 Subject: [PATCH 108/119] fix typing error, remove print from code --- examples/Profile_Viewer_In_Notebook.ipynb | 6 +++--- src/whylogs/viz/jupyter_notebook_viz.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/Profile_Viewer_In_Notebook.ipynb b/examples/Profile_Viewer_In_Notebook.ipynb index f581536ccc..ee2876a96a 100644 --- a/examples/Profile_Viewer_In_Notebook.ipynb +++ b/examples/Profile_Viewer_In_Notebook.ipynb @@ -299,7 +299,7 @@ }, "outputs": [], "source": [ - "visualization.differenced_distribution_chart(feature_names=\"strings\")" + "visualization.difference_distribution_chart(feature_names=\"strings\")" ] }, { @@ -481,7 +481,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -495,7 +495,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.8.10" } }, "nbformat": 4, diff --git a/src/whylogs/viz/jupyter_notebook_viz.py b/src/whylogs/viz/jupyter_notebook_viz.py index bae1aa330c..e95124c45d 100644 --- a/src/whylogs/viz/jupyter_notebook_viz.py +++ b/src/whylogs/viz/jupyter_notebook_viz.py @@ -40,7 +40,6 @@ class NotebookProfileViewer: def __get_template_path(self, html_file_name): template_path = os.path.abspath(os.path.join( _MY_DIR, os.pardir, "viewer/templates", html_file_name)) - print(_MY_DIR) return template_path def __get_compiled_template(self, template_name): From ca59a19c3901f80a51447d0d49546e6f036db660 Mon Sep 17 00:00:00 2001 From: ssxcho Date: Tue, 22 Feb 2022 05:51:31 +0400 Subject: [PATCH 109/119] move html templates to new directory --- .../index-hbs-cdn-all-in-for-jupyter-notebook.html | 0 .../{ => templates}/index-hbs-cdn-all-in-jupyter-bar-chart.html | 0 .../index-hbs-cdn-all-in-jupyter-constraints-report.html | 0 .../index-hbs-cdn-all-in-jupyter-differenced-chart.html | 0 .../index-hbs-cdn-all-in-jupyter-distribution-chart.html | 0 .../index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html | 0 src/whylogs/viewer/{ => templates}/index-hbs-cdn-all-in.html | 0 src/whylogs/viewer/{ => templates}/index-hbs-library-all-in.html | 0 src/whylogs/viewer/{ => templates}/index-hbs.html | 0 src/whylogs/viewer/{ => templates}/index.html | 0 10 files changed, 0 insertions(+), 0 deletions(-) rename src/whylogs/viewer/{ => templates}/index-hbs-cdn-all-in-for-jupyter-notebook.html (100%) rename src/whylogs/viewer/{ => templates}/index-hbs-cdn-all-in-jupyter-bar-chart.html (100%) rename src/whylogs/viewer/{ => templates}/index-hbs-cdn-all-in-jupyter-constraints-report.html (100%) rename src/whylogs/viewer/{ => templates}/index-hbs-cdn-all-in-jupyter-differenced-chart.html (100%) rename src/whylogs/viewer/{ => templates}/index-hbs-cdn-all-in-jupyter-distribution-chart.html (100%) rename src/whylogs/viewer/{ => templates}/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html (100%) rename src/whylogs/viewer/{ => templates}/index-hbs-cdn-all-in.html (100%) rename src/whylogs/viewer/{ => templates}/index-hbs-library-all-in.html (100%) rename src/whylogs/viewer/{ => templates}/index-hbs.html (100%) rename src/whylogs/viewer/{ => templates}/index.html (100%) diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-for-jupyter-notebook.html similarity index 100% rename from src/whylogs/viewer/index-hbs-cdn-all-in-for-jupyter-notebook.html rename to src/whylogs/viewer/templates/index-hbs-cdn-all-in-for-jupyter-notebook.html diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-bar-chart.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html similarity index 100% rename from src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-bar-chart.html rename to src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-constraints-report.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-constraints-report.html similarity index 100% rename from src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-constraints-report.html rename to src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-constraints-report.html diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-differenced-chart.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-differenced-chart.html similarity index 100% rename from src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-differenced-chart.html rename to src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-differenced-chart.html diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html similarity index 100% rename from src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-distribution-chart.html rename to src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html similarity index 100% rename from src/whylogs/viewer/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html rename to src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-feature-summary-statistics.html diff --git a/src/whylogs/viewer/index-hbs-cdn-all-in.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in.html similarity index 100% rename from src/whylogs/viewer/index-hbs-cdn-all-in.html rename to src/whylogs/viewer/templates/index-hbs-cdn-all-in.html diff --git a/src/whylogs/viewer/index-hbs-library-all-in.html b/src/whylogs/viewer/templates/index-hbs-library-all-in.html similarity index 100% rename from src/whylogs/viewer/index-hbs-library-all-in.html rename to src/whylogs/viewer/templates/index-hbs-library-all-in.html diff --git a/src/whylogs/viewer/index-hbs.html b/src/whylogs/viewer/templates/index-hbs.html similarity index 100% rename from src/whylogs/viewer/index-hbs.html rename to src/whylogs/viewer/templates/index-hbs.html diff --git a/src/whylogs/viewer/index.html b/src/whylogs/viewer/templates/index.html similarity index 100% rename from src/whylogs/viewer/index.html rename to src/whylogs/viewer/templates/index.html From 0de161a17d2a2169d9c68e856646ecb634651716 Mon Sep 17 00:00:00 2001 From: Jirayr-Solvee Date: Tue, 22 Feb 2022 16:25:11 +0400 Subject: [PATCH 110/119] show axis value instead of letter key --- ...ndex-hbs-cdn-all-in-jupyter-bar-chart.html | 77 ++++++++++++------- ...-cdn-all-in-jupyter-differenced-chart.html | 53 +++++++++---- 2 files changed, 89 insertions(+), 41 deletions(-) diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html index 1e192b5ef2..be31ab718f 100644 --- a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html @@ -140,7 +140,7 @@ .bar.positive { fill: #369BACB2; } - + .bar.negative { fill: #2683C9E5; } @@ -199,11 +199,15 @@

Hold on! :)

- - + + + From 773fba5fc5974822714b4da1dfe06f157ccb28e3 Mon Sep 17 00:00:00 2001 From: Perch2005 <78492631+Perch2005@users.noreply.github.com> Date: Sat, 26 Feb 2022 04:11:49 +0400 Subject: [PATCH 114/119] change double histogram generating logic --- ...cdn-all-in-jupyter-distribution-chart.html | 179 +++++++++++------- 1 file changed, 106 insertions(+), 73 deletions(-) diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html index 0ada18e170..233e074967 100644 --- a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html @@ -117,7 +117,7 @@ } .chart-box-title { - width: 98%; + width: 88%; justify-content: space-between; margin: 10px; margin-top: 15px; @@ -132,6 +132,17 @@ line-height: 16px; color: #4F595B; } + + .bar { + font: 10px sans-serif; + } + + .bar path, + .bar line { + fill: none; + stroke: #000; + shape-rendering: crispEdges; + } @media screen and (min-width: 500px) { .desktop-content { @@ -154,11 +165,11 @@

{{@key}}

-
+
Current
-
+
Reference
@@ -201,57 +212,64 @@

Hold on! :)

this.CHART_HEIGHT = this.SVG_HEIGHT - this.MARGIN.TOP - this.MARGIN.BOTTOM; this.svgEl = d3.create("svg") .attr("preserveAspectRatio", "xMinYMin meet") - .attr("viewBox", `20 0 ${$(window).width()} ${$(window).height()-30}`) + .attr("viewBox", `0 0 ${$(window).width()+100} ${$(window).height()-30}`) .classed("svg-content-responsive", true) this.maxYValue = d3.max(targetData, (d) => Math.abs(d.axisY)); - const mergedData = targetData.concat(referenceData).map(a => { - return a.axisX + this.minYValue = d3.min(targetData, (d) => Math.abs(d.axisY)); + const mergedReferenceData = referenceData.map(({axisX, axisY}) => { + return {axisX, axisY} + }) + const mergedTargetedData = targetData.map(({axisX, axisY}) => { + return {axisX, axisY} }) - const mergedAndSortedData = mergedData.sort(function(a, b) { return a - b; }); + + this.charts2 = mergedReferenceData.concat(mergedTargetedData) + this.charts2 = this.charts2.sort(function(a, b) { return a - b; }); + this.xScale = d3 - .scaleBand() - .domain(mergedAndSortedData.map((d) => d)) - .range([this.MARGIN.LEFT, this.MARGIN.LEFT + this.CHART_WIDTH]); - this.yScale = d3 - .scaleLinear() - .domain([0, this.maxYValue * 1.2]) - .range([this.CHART_HEIGHT, 0]); + .scaleLinear() + .domain([d3.min(this.charts2, function(d) { return parseFloat(d.axisX); }), d3.max(this.charts2, function(d) { return parseFloat(d.axisX) + 2; })]) // I was too lazy to do basic math of max(bin_start + width for each distribution) + .range([0, this.CHART_WIDTH ]); + this.svgEl.append("g") + .attr("transform", "translate("+ this.MARGIN.LEFT +"," + this.SVG_HEIGHT + ")") + .call(d3.axisBottom(this.xScale)); + this.yScale = d3.scaleLinear() + .range([this.CHART_HEIGHT , 0]) + this.yScale.domain([d3.min(this.charts2, function(d) { return parseFloat(d.axisY); }), d3.max(this.charts2, function(d) { return parseFloat(d.axisY); })]); } } - function chartData(column, startIndex) { + function chartData(column) { const data = []; - for (let i = 0; i + $(".desktop-content").html(` +

+ Something went wrong. Please try again. +

+ `) + ) + } + return data } function generateDoubleHistogramChart(targetData, referenceData) { let histogramData = [], overlappedHistogramData = []; + let yFormat; - const targetedProfileMin = targetData.numberSummary.histogram.min < 0 ? - targetData.numberSummary.histogram.min.toFixed(1) * 10 * -1 : - targetData.numberSummary.histogram.min.toFixed(1) * 10 - - const referenceProfileMin = referenceData.numberSummary.histogram.min < 0 ? - referenceData.numberSummary.histogram.min.toFixed(1) * 10 * -1 : - referenceData.numberSummary.histogram.min.toFixed(1) * 10 - - const chartsStartPoint = [] - targetedProfileMin > referenceProfileMin ? - chartsStartPoint.push(Math.ceil(targetedProfileMin-referenceProfileMin), 0): - chartsStartPoint.push(0, Math.ceil(referenceProfileMin-targetedProfileMin)) - histogramData = chartData(targetData, chartsStartPoint[0], 'original_profile') - overlappedHistogramData = chartData(referenceData, chartsStartPoint[1], 'ref_profile') + histogramData = chartData(targetData) + overlappedHistogramData = chartData(referenceData) - let yFormat, - xFormat; - const sizes = new GenerateChartParams($(window).height()-55, $(window).width(), histogramData, overlappedHistogramData) + const sizes = new GenerateChartParams($(window).height()-80, $(window).width(), histogramData, overlappedHistogramData) const { MARGIN, SVG_WIDTH, @@ -260,16 +278,17 @@

Hold on! :)

CHART_HEIGHT, svgEl, maxYValue, + minYValue, xScale, yScale } = sizes - const xAxis = d3.axisBottom(xScale).ticks(SVG_WIDTH, xFormat).tickSizeOuter(0); + const rectColors = ["#44C0E7", "#F5843C"] const yAxis = d3.axisLeft(yScale).ticks(SVG_HEIGHT / 40, yFormat); yFormat = yScale.tickFormat(100, yFormat); svgEl.append("g") - .attr("transform", `translate(${MARGIN.LEFT}, ${MARGIN.TOP})`) + .attr("transform", `translate(${MARGIN.LEFT}, ${MARGIN.BOTTOM})`) .call(yAxis) .call(g => g.select(".domain").remove()) .call(g => g.selectAll(".tick line") @@ -279,50 +298,64 @@

Hold on! :)

.attr("x", -MARGIN.LEFT) .attr("y", 10) .attr("fill", "currentColor") - .attr("text-anchor", "start")); - - svgEl.append("g") - .attr("transform", `translate(0,${SVG_HEIGHT - MARGIN.BOTTOM})`) - .call(xAxis) - .selectAll("text") - .attr("dy", "-.6em") - .attr("dx", "-.8em") - .attr("transform", "rotate(-90)") - .style("text-anchor", "end") - .call(g => g.select(".domain").remove()) - .call(g => g.selectAll(".tick line").remove()) - .call(g => g.append("text") - .attr("fill", "currentColor") - .attr("text-anchor", "end")) - .style("font-size", "10") + .attr("text-anchor", "start")) + + svgEl.append("text") + .attr("transform", + "translate(" + (CHART_WIDTH/2) + " ," + + (CHART_HEIGHT + MARGIN.TOP + 75) + ")") + .style("text-anchor", "middle") + .text("Values") + .style("font-size", "15") + .style("opacity", "0.6") + + svgEl.append("text") + .attr("transform", "rotate(-90)") + .attr("y", 0) + .attr("x", 0 - (SVG_HEIGHT / 2)) + .attr("dy", "1em") + .style("text-anchor", "middle") + .text("Counts") + .style("font-size", "15") + .style("opacity", "0.6") + + const width_b1 = histogramData[1].axisX - histogramData[0].axisX; + const width_b2 = overlappedHistogramData[1].axisX - overlappedHistogramData[0].axisX; const gChart = svgEl.append("g"); gChart + .attr("transform", "translate("+ MARGIN.LEFT +",0)") .selectAll(".bar") .data(histogramData) .enter() .append("rect") + .style("stroke", "#021826") .classed("bar", true) - .attr("width", xScale.bandwidth()) - .attr("height", (d) => ((CHART_HEIGHT - yScale(d.axisY)) < 0 || d.axisY === 0 ? 0 : (CHART_HEIGHT - yScale(d.axisY)))) - .attr("x", (d) => xScale(d.axisX)) - .attr("y", (d) => yScale(d.axisY) + MARGIN.TOP) - .attr("fill", "#2683C9") - .style("opacity","0.6"); + .attr("width", function(d) { return xScale(width_b1) - 1; }) + .attr("height", (d) => CHART_HEIGHT - yScale(d.axisY)) + .attr("x", 1) + .attr("transform", function(d) { return "translate(" + xScale(d.axisX) + "," + 0 + ")"; }) + .attr("y", (d) => yScale(d.axisY) + MARGIN.TOP + MARGIN.BOTTOM) + .attr("fill", rectColors[0]) + .style("opacity","0.8") const gChart1 = svgEl.append("g"); gChart1 - .selectAll(".bar") - .data(overlappedHistogramData) - .enter() - .append("rect") - .classed("bar", true) - .attr("width", xScale.bandwidth()) - .attr("height", (d) => ((CHART_HEIGHT - yScale(d.axisY)) < 0 || d.axisY === 0 ? 0 : (CHART_HEIGHT - yScale(d.axisY)))) - .attr("x", (d) => xScale(d.axisX)) - .attr("y", (d) => yScale(d.axisY) + MARGIN.TOP) - .attr("fill", "#369BAC") - .style("opacity", "0.6"); + .attr("transform", "translate("+ MARGIN.LEFT +",0)") + .selectAll(".bar") + .data(overlappedHistogramData) + .enter() + .append("rect") + .style("stroke", "#021826") + .classed("bar", true) + .attr("width", function(d) { return xScale(width_b1) - 1; }) + .attr("height", (d) => CHART_HEIGHT - yScale(d.axisY)) + .attr("x", 1) + .attr("transform", function(d) { return "translate(" + xScale(d.axisX) + "," + 0 + ")"; }) + .attr("y", (d) => yScale(d.axisY) + MARGIN.TOP + MARGIN.BOTTOM) + .attr("fill", rectColors[1]) + .style("opacity","0.8") + return svgEl._groups[0][0].outerHTML; } From 5300be6daab5a43ec68db5c55b52a5d870ea9d57 Mon Sep 17 00:00:00 2001 From: Perch2005 <78492631+Perch2005@users.noreply.github.com> Date: Sat, 26 Feb 2022 04:14:01 +0400 Subject: [PATCH 115/119] add error handling --- ...cdn-all-in-jupyter-distribution-chart.html | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html index 233e074967..bab944d520 100644 --- a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-distribution-chart.html @@ -144,6 +144,15 @@ shape-rendering: crispEdges; } + .error-message { + display: flex; + justify-content: center; + align-items: center; + color: rgb(255, 114, 71); + font-size: 30px; + font-weight: 900; + } + @media screen and (min-width: 500px) { .desktop-content { display: block; @@ -362,11 +371,21 @@

Hold on! :)

const profileFromCSVfile = {{{reference_profile_from_whylogs}}} Handlebars.registerHelper("getDoubleHistogramChart",(column,key) => { - const columnKey = key.data.key - if (profileFromCSVfile) { - return generateDoubleHistogramChart ( - column, - profileFromCSVfile[columnKey] + const columnKey = key.data.key + try { + if (profileFromCSVfile) { + return generateDoubleHistogramChart ( + column, + profileFromCSVfile[columnKey] + ) + } + } catch (err) { + $(document).ready(() => + $(".desktop-content").html(` +

+ Something went wrong. Please try again. +

+ `) ) } }); From b936bf85307fd113fdf1445e1f13b14bf7bedb6f Mon Sep 17 00:00:00 2001 From: Perch2005 <78492631+Perch2005@users.noreply.github.com> Date: Sat, 26 Feb 2022 04:17:53 +0400 Subject: [PATCH 116/119] change differenced bar chart logic --- ...-cdn-all-in-jupyter-differenced-chart.html | 59 +++++++++++++------ 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-differenced-chart.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-differenced-chart.html index abf9959e81..32192537ff 100644 --- a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-differenced-chart.html +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-differenced-chart.html @@ -204,7 +204,10 @@

Hold on! :)

const findAndDeleteUndefined = (axisData) => { const undefinedAxisIndex = axisData.findIndex((axis) => axis === undefined) - return [...axisData.slice(0, undefinedAxisIndex), ...axisData.slice(undefinedAxisIndex + 1)] + if (undefinedAxisIndex > 0) { + return [...axisData.slice(0, undefinedAxisIndex), ...axisData.slice(undefinedAxisIndex + 1)] + } + return axisData } const filterAndSortChartData = (overlappedHistogramData, histogramData) => { @@ -246,7 +249,7 @@

Hold on! :)

this.maxYValue = d3.max(data, (d) => Math.abs(d.axisY)); this.xScale = d3 .scaleBand() - .domain(filterAndSortChartData(data, referenceData).map((sortedCounts) => sortedCounts.axisX)) + .domain(filterAndSortChartData(data, referenceData).map((sortedCounts) => sortedCounts && sortedCounts.axisX)) .rangeRound([this.MARGIN.LEFT, this.SVG_WIDTH]) .padding([0.1]); this.yScale = d3 @@ -258,21 +261,42 @@

Hold on! :)

function chartData(column) { const data = []; + if (column.stringSummary?.frequent) { Object.entries(column.stringSummary.frequent.items).forEach(([key, {value, estimate}], index) => { data.push({ axisY: estimate, axisX: value, }); }); + } else if (column.stringSummary?.charPosTracker){ + Object.entries(column.stringSummary.charPosTracker.charPosMap).forEach(([key, {count}], index) => { + data.push({ + axisY: count, + axisX: key, + }); + }); + } else { + $(document).ready(() => + $(".desktop-content").html(` +

+ Something went wrong. Please try again. +

+ `) + ) + } + return data } function generatePositiveNegativeChart(histogramData, overlappedHistogramData) { - const data = filterAndSortChartData(chartData(histogramData), chartData(overlappedHistogramData)).map((axis, index) => { - const findIndex = chartData(histogramData).findIndex((value) => value.axisX === axis.axisX) - const difference = axis.axisY - chartData(histogramData)[findIndex].axisY - return [difference] - }).flat() + const data = filterAndSortChartData(chartData(histogramData), chartData(overlappedHistogramData)).map((axis, index) => { + if (axis) { + const findIndex = chartData(histogramData).findIndex((value) => value.axisX === axis.axisX) + const difference = axis.axisY - chartData(histogramData)[findIndex].axisY + return [difference] + } + return 0; + }).flat() let yFormat, xFormat; @@ -288,13 +312,14 @@

Hold on! :)

svgEl } = sizes + const rectColors = ["bar positive", "bar negative"] const maxY = Math.abs(d3.max(data)); const minY = Math.abs(d3.min(data)); let positiveY = Math.ceil(maxY) % 1 ? maxY + 2*(maxY/(maxY*10)) : maxY + 2*(maxY/(maxY/10)), negativeY = Math.ceil(minY) % 1 ? minY + 2*(minY/(minY*10)) : minY + 2*(minY/(minY/10)); const yScale = d3.scaleLinear() - .domain([-negativeY, positiveY ]) + .domain([-negativeY, positiveY || 0]) .range([CHART_HEIGHT,0]) const xAxis = d3.axisBottom(xScale).ticks(SVG_WIDTH / 80, xFormat).tickSizeOuter(0); @@ -314,15 +339,15 @@

Hold on! :)

.attr("fill", "currentColor") .attr("text-anchor", "start")); - svgEl.append("text") + svgEl.append("text") .attr("transform", - "translate(" + (CHART_WIDTH/2) + " ," + + "translate(" + (CHART_WIDTH/2) + " ," + (CHART_HEIGHT + MARGIN.TOP + 30) + ")") .style("text-anchor", "middle") .text("Values") .style("font-size", "15") .style("opacity", "0.6") - + svgEl.append("text") .attr("transform", "rotate(-90)") .attr("y", 0) @@ -330,7 +355,7 @@

Hold on! :)

.attr("dy", "1em") .style("text-anchor", "middle") .text("Counts") - .style("font-size", "15") + .style("font-size", "15") .style("opacity", "0.6") svgEl @@ -348,14 +373,14 @@

Hold on! :)

svgEl.selectAll(".bar") .data(data) .enter() - .append("g") - .attr("transform", function(d, i) { return "translate(" + xScale(filterAndSortChartData(chartData(histogramData), chartData(overlappedHistogramData))[i].axisX) + ",0)"; }) .append("rect") - .attr("class", function(d) { return d < 0 ? "bar negative" : "bar positive"; }) + .attr("class", function(d) { return d < 0 ? rectColors[0] : rectColors[1]; }) .attr("y", function(d) { return yScale(Math.max(0, d)); }) - .attr("x", function(d, i) { return xScale(i) }) + .attr("x", function(d, i) { return xScale(filterAndSortChartData(chartData(histogramData), chartData(overlappedHistogramData))[i]?.axisX) }) .attr("height", function(d) { return Math.abs(yScale(d) - yScale(0)); }) - .attr("width", xScale.bandwidth()); + .attr("width", xScale.bandwidth()) + .style("opacity", "0.8") + return svgEl._groups[0][0].outerHTML; } From c9e5f6929cecdcf336dd2677662936aad6cc1b9b Mon Sep 17 00:00:00 2001 From: Perch2005 <78492631+Perch2005@users.noreply.github.com> Date: Sat, 26 Feb 2022 04:22:25 +0400 Subject: [PATCH 117/119] change bar chart generating logic --- ...ndex-hbs-cdn-all-in-jupyter-bar-chart.html | 74 ++++++++++++------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html index be31ab718f..c212ba499c 100644 --- a/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html +++ b/src/whylogs/viewer/templates/index-hbs-cdn-all-in-jupyter-bar-chart.html @@ -145,7 +145,6 @@ fill: #2683C9E5; } - @media screen and (min-width: 500px) { .desktop-content { display: block; @@ -200,6 +199,7 @@

Hold on! :)