Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/deploy-docs-prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ jobs:
- name: Set up uv
uses: astral-sh/setup-uv@v5

- name: Set up uv
uses: astral-sh/setup-uv@v5

- name: Generate Python library docs
run: |
cd site/_source/validmind-library
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/deploy-docs-staging.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ jobs:
- name: Set up uv
uses: astral-sh/setup-uv@v5

- name: Set up uv
uses: astral-sh/setup-uv@v5

- name: Generate Python library docs
run: |
cd site/_source/validmind-library
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/publish-llm-markdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ jobs:
- name: Set up uv
uses: astral-sh/setup-uv@v5

- name: Set up uv
uses: astral-sh/setup-uv@v5

- name: Generate Python library docs
run: |
cd _source/validmind-library
Expand Down
18 changes: 3 additions & 15 deletions site/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@ SRC_ROOT := _source
SRC_DIR := $(SRC_ROOT)/validmind-library
DEST_DIR_NB := notebooks
DEST_DIR_PYTHON := validmind
DEST_DIR_TESTS := tests
SRC_ROOT := _source
SRC_DIR := $(SRC_ROOT)/validmind-library

# Define .PHONY target for help section
.PHONY: help add-copyright clean clone copy-installation copy-release-notes delete-demo-branch deploy-demo-branch deploy-prod deploy-staging docker-build docker-serve docker-site docker-site-lite docs-site execute generate-sitemap get-api-json get-source kind-serve kind-stop kind-restart kind-logs notebooks python-docs release-notes render-llm template-schema-docs test-descriptions verify-copyright yearly-releases
.PHONY: help add-copyright clean clone copy-installation copy-release-notes delete-demo-branch deploy-demo-branch deploy-prod deploy-staging docker-build docker-serve docker-site docker-site-lite docs-site execute generate-sitemap get-api-json get-source kind-serve kind-stop kind-restart kind-logs notebooks python-docs release-notes render-llm template-schema-docs verify-copyright yearly-releases

# Help section
help:
Expand Down Expand Up @@ -42,7 +41,7 @@ help:
@echo " generate-sitemap Generate a sitemap for the static HTML site"
@echo " execute Execute a Jupyter Notebook or notebook directory"
@echo " get-api-json Download Swagger JSON specs from ValidMind APIs into reference/"
@echo " get-source Get all source files (clean, clone, copy-installation, copy-release-notes, notebooks, python-docs, test-descriptions, get-api-json)"
@echo " get-source Get all source files (clean, clone, copy-installation, copy-release-notes, notebooks, python-docs, get-api-json)"
@echo " kind-serve Set up Kind cluster for ValidMind docs"
@echo " kind-stop Stop Kind cluster for ValidMind docs"
@echo " kind-restart Restart Kind cluster for ValidMind docs"
Expand All @@ -53,7 +52,6 @@ help:
@echo " Examples: TAG=cmvm/25.07 or TAG=validmind-library/v2.8.22"
@echo " template-schema-docs Generate template schema documentation from backend"
@echo " render-llm Render site to GFM markdown for LLM ingestion (mirrors CI)"
@echo " test-descriptions Copy the ValidMind tests docs into tests/"
@echo " verify-copyright Verify that all .qmd and .yml/.yaml files have copyright headers"
@echo " yearly-releases Collate releases by year into a listing landing and update releases sidebar"

Expand Down Expand Up @@ -284,7 +282,7 @@ get-api-json:
@curl -s -o reference/rapidoc-min.js "https://unpkg.com/rapidoc/dist/rapidoc-min.js" || echo "Failed to fetch RapiDoc library"

# Get all source files
get-source: clean clone copy-installation copy-release-notes notebooks python-docs test-descriptions template-schema-docs
get-source: clean clone copy-installation copy-release-notes notebooks python-docs template-schema-docs

# Requires that you've run `make docker-build`
kind-serve:
Expand Down Expand Up @@ -430,16 +428,6 @@ template-schema-docs:
@python -m pip install -q json-schema-for-humans
@BACKEND_ROOT=$(SRC_ROOT)/backend python ../scripts/generate_template_schema_docs.py

test-descriptions:
@echo "\nUpdating test descriptions source ..."
@cd _source/validmind-library && make install && poetry run python scripts/extract_descriptions.py validmind/tests
@cd ../../
@rm -rf $(DEST_DIR_TESTS)
@mkdir -p $(DEST_DIR_TESTS)
@cp -r $(SRC_DIR)/build/_test_descriptions/validmind/tests/. $(DEST_DIR_TESTS)
@echo "Copying _metadata.yml into tests/ ..."
@cp developer/_metadata.yml $(DEST_DIR_TESTS)/_metadata.yml

verify-copyright:
@echo "\nVerifying copyright headers in .qmd and .yml/.yaml files ..."
@cd .. && python site/scripts/verify_copyright_qmd.py
Expand Down
2 changes: 1 addition & 1 deletion site/about/contributing/using-the-documentation.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ Resources for developers integrating {{< var vm.product >}} into their workflows

- [{{< var validmind.developer >}}](/developer/validmind-library.qmd) — Python library overview and installation
- [Code samples](/developer/samples-jupyter-notebooks.qmd) — Jupyter notebooks for common use cases
- [Test descriptions](/developer/test-descriptions.qmd) — Reference for available validation tests
- [{{< var vm.product >}} test sandbox](/developer/how-to/test-sandbox.qmd) — Reference for available validation tests
- [{{< var validmind.api >}}](/validmind/validmind.qmd) — Python API documentation
- [Public REST API](/reference/validmind-rest-api-vm.qmd) — REST API for platform integrations

Expand Down
2 changes: 1 addition & 1 deletion site/about/use-cases/eu-ai-act.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ Integrate all components into a complete compliance workflow addressing Articles

[^6]: [Work with document templates](/guide/templates/working-with-document-templates.qmd)

[^7]: [Test descriptions](/developer/test-descriptions.qmd)
[^7]: [{{< var vm.product >}} test sandbox](/developer/how-to/test-sandbox.qmd)

[^8]: [Work with content blocks](/guide/documentation/work-with-content-blocks.qmd)

Expand Down
6 changes: 2 additions & 4 deletions site/developer/_sidebar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ website:
contents:
- notebooks/how_to/tests/explore_tests/explore_tests.ipynb
- notebooks/how_to/tests/explore_tests/explore_test_suites.ipynb
- developer/how-to/test-sandbox.qmd
- section: "Run tests"
contents:
- notebooks/how_to/tests/run_tests/1-run_dataset-based_tests.ipynb
Expand Down Expand Up @@ -110,9 +109,8 @@ website:
contents: "notebooks/use_cases/validation/**/*.ipynb"
- text: "---"
- text: "Reference"
- text: "Test descriptions"
file: developer/test-descriptions.qmd
contents: tests/**
- text: "{{< var vm.product >}} test sandbox"
file: developer/how-to/test-sandbox.qmd
- text: "{{< var validmind.api >}}"
file: validmind/validmind.qmd
# USING THE VARIABLE IN THE LINK TEXT MESSES UP THE MOBILE VIEW & BREADCRUMB
Expand Down
35 changes: 28 additions & 7 deletions site/developer/how-to/test-sandbox.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,40 @@
# Copyright © 2023-2026 ValidMind Inc. All rights reserved.
# Refer to the LICENSE file in the root of this repository for details.
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
title: "Test sandbox <sup>[beta]{.smallcaps}</sup>"
date: last-modified
pagetitle: "{{< var vm.product >}} test sandbox"
title-block-style: none
bread-crumbs: false
page-layout: full
aliases:
- /guide/test-sandbox.html
- /developer/model-testing/test-sandbox.html
- /developer/test-descriptions.html
- /guide/test-descriptions.html
- /developer/model-testing/test-descriptions.html
---

<!--- TO DO
- Ordering of notebooks if we want them to appear in a specific sequence
--->
Explore our interactive sandbox to see which tests are available in the {{< var validmind.developer >}} and how you can use them in your own code.
```{=html}
<style>
/* Close the gap between the pink banner and the test-sandbox iframe */
#title-block-header { display: none !important; }
#quarto-document-content { padding-top: 0 !important; margin-top: 0 !important; }
#quarto-document-content > .column-screen:first-child { margin-top: 0 !important; }
#quarto-document-content > .column-screen iframe { display: block; }

/* Hide the secondary-nav toggle bar only on desktop;
keep it on mobile/tablet so the sidebar remains reachable. */
@media (min-width: 992px) {
.quarto-secondary-nav { display: none !important; }
}

/* Quarto sidebar scroll-visibility strip under the navbar (this page only) */
#quarto-sidebarnav-toggle {
display: none !important;
}
</style>
```

::: {.column-screen-right}
::: {.column-screen}

```{=html}
<iframe
Expand Down
13 changes: 8 additions & 5 deletions site/developer/how-to/testing-overview.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ listing:
sort: false
fields: [title, description]
contents:
- test-sandbox.qmd
- ../test-descriptions.qmd
- title: "{{< var vm.product >}} test sandbox"
description: "Tests that are available as part of the {{< var validmind.developer >}}, grouped by type of validation or monitoring test."
path: test-sandbox.qmd
- id: test-basics
type: grid
grid-columns: 2
Expand Down Expand Up @@ -100,9 +101,7 @@ listing:

- Thresholds, such as `min_percent_threshold`, `disparity_tolerance`
- Metrics to compute, such as `metrics=["fnr", "fpr", "tpr"]`
- Any setting that isn't a dataset or record

Use `vm.tests.describe_test("test_id")` or check the [test descriptions](/developer/test-descriptions.qmd) page to see what parameters a test accepts.
- Any setting that isn't a dataset or model[^1]

**Outputs**
: Tests return results like tables and plots. Tables can be lists of dictionaries or pandas DataFrames; plots can be matplotlib or plotly figures.
Expand Down Expand Up @@ -232,3 +231,7 @@ Learn more about using the other features of the {{< var validmind.developer >}}
:::{#testing-next}
:::

<!-- FOOTNOTES -->

[^1]: Use `vm.tests.describe_test("test_id")` for programmatic parameter details, or browse the [{{< var vm.product >}} test sandbox](test-sandbox.qmd).

2 changes: 1 addition & 1 deletion site/developer/supported-records-and-frameworks.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ listing:
fields: [title, description]
contents:
- /how-to/testing-overview.qmd
- test-descriptions.qmd
- how-to/test-sandbox.qmd
- /how-to/feature-overview.qmd
- samples-jupyter-notebooks.qmd
---
Expand Down
67 changes: 0 additions & 67 deletions site/developer/test-descriptions.qmd

This file was deleted.

14 changes: 7 additions & 7 deletions site/faq/_faq-explainability.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -->
<span id="explanability"></span>
Yes, {{< var vm.product >}} includes explainability-related testing and documentation as part of our offerings. Our approach incorporates a comprehensive suite of tests designed to evaluate model interpretability and identify potential risks, ensuring transparency and reliability in model outcomes.

Below is an overview of our key explainability-related tests:
Below is an overview of our key explainability-related tests (browse names and descriptions in the [{{< var vm.product >}} test sandbox](/developer/how-to/test-sandbox.qmd)):

- **Features AUC**^[[FeaturesAUC](/tests/model_validation/FeaturesAUC.md)] — Assesses the discriminatory power of individual features in binary classification models, providing insights into how well each feature differentiates between classes. This test supports explainability by isolating the contribution of each feature to the classification task.
- **Feature Importance**^[[FeatureImportance](/tests/model_validation/sklearn/FeatureImportance.md)] — Generates feature importance scores to identify and compare impactful features across different models and datasets. By highlighting the relative significance of features, this test clarifies how inputs influence model predictions.
- **Overfit Diagnosis**^[[OverfitDiagnosis](/tests/model_validation/sklearn/OverfitDiagnosis.md)] — Detects potential overfitting by comparing performance between training and testing sets for specific feature segments, highlighting areas of significant deviation. This test aids explainability by revealing where model behavior is inconsistent, offering insights into its generalization capability.
- **Permutation Feature Importance**^[[PermutationFeatureImportance](/tests/model_validation/sklearn/PermutationFeatureImportance.md)] — Measures feature significance by analyzing the impact of randomly rearranging feature values on model performance. This test quantifies the dependency of model performance on each feature, making it clear which inputs drive the predictions.
- **SHAP Global Importance**^[[SHAPGlobalImportance](/tests/model_validation/sklearn/SHAPGlobalImportance.md)] — Uses SHAP (SHapley Additive exPlanations) values to assign global importance to features, offering a clear explanation of model outcomes and supporting risk identification. SHAP values provide a mathematically sound attribution of model predictions to specific features, enhancing interpretability.
- **Weakspots Diagnosis**^[[WeakspotsDiagnosis](/tests/model_validation/sklearn/WeakspotsDiagnosis.md)] — Identifies and visualizes regions of suboptimal model performance across the feature space, highlighting areas that may require further attention. This test explains where and why the model struggles by connecting poor performance to specific feature regions.
- **Features AUC** — Assesses the discriminatory power of individual features in binary classification models, providing insights into how well each feature differentiates between classes. This test supports explainability by isolating the contribution of each feature to the classification task.
- **Feature Importance** — Generates feature importance scores to identify and compare impactful features across different models and datasets. By highlighting the relative significance of features, this test clarifies how inputs influence model predictions.
- **Overfit Diagnosis** — Detects potential overfitting by comparing performance between training and testing sets for specific feature segments, highlighting areas of significant deviation. This test aids explainability by revealing where model behavior is inconsistent, offering insights into its generalization capability.
- **Permutation Feature Importance** — Measures feature significance by analyzing the impact of randomly rearranging feature values on model performance. This test quantifies the dependency of model performance on each feature, making it clear which inputs drive the predictions.
- **SHAP Global Importance** — Uses SHAP (SHapley Additive exPlanations) values to assign global importance to features, offering a clear explanation of model outcomes and supporting risk identification. SHAP values provide a mathematically sound attribution of model predictions to specific features, enhancing interpretability.
- **Weakspots Diagnosis** — Identifies and visualizes regions of suboptimal model performance across the feature space, highlighting areas that may require further attention. This test explains where and why the model struggles by connecting poor performance to specific feature regions.

::: {.callout}
## When logged for documentation, each test automatically generates a comprehensive report as soon as it is executed.
Expand Down
4 changes: 3 additions & 1 deletion site/faq/faq-testing.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ listing:
fields: [title, description]
contents:
- ../developer/how-to/testing-overview.qmd
- ../developer/test-descriptions.qmd
- title: "{{< var vm.product >}} test sandbox"
description: "Tests that are available as part of the {{< var validmind.developer >}}, grouped by type of validation or monitoring test."
path: ../developer/how-to/test-sandbox.qmd
- ../guide/monitoring/ongoing-monitoring.qmd
categories: ["testing", "model documentation", "customization", "custom data", "explainability", "ongoing monitoring", "validmind library"]
---
Expand Down
7 changes: 5 additions & 2 deletions site/guide/monitoring/ongoing-monitoring.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@ listing:
- ../../notebooks/use_cases/ongoing_monitoring/application_scorecard_ongoing_monitoring.ipynb
# - ../../notebooks/use_cases/credit_risk/application_scorecard_full_suite.ipynb
- id: ongoing-monitoring-tests
contents: "../../tests/ongoing_monitoring/*.md"
contents:
- title: "{{< var vm.product >}} Test Sandbox"
description: "Tests that are available as part of the {{< var validmind.developer >}}, grouped by type of validation or monitoring test."
path: ../../developer/how-to/test-sandbox.qmd
type: grid
grid-columns: 3
grid-columns: 2
max-description-length: 250
page-size: 150
fields: [title, description]
Expand Down
Loading