validmind · github-actions · May 23, 2026 · May 23, 2026 · May 23, 2026
diff --git a/.github/workflows/deploy-docs-prod.yaml b/.github/workflows/deploy-docs-prod.yaml
@@ -78,6 +78,9 @@ jobs:
       - name: Set up uv
         uses: astral-sh/setup-uv@v5
 
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v5
+
       - name: Generate Python library docs
         run: |
           cd site/_source/validmind-library

diff --git a/.github/workflows/deploy-docs-staging.yaml b/.github/workflows/deploy-docs-staging.yaml
@@ -78,6 +78,9 @@ jobs:
       - name: Set up uv
         uses: astral-sh/setup-uv@v5
 
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v5
+
       - name: Generate Python library docs
         run: |
           cd site/_source/validmind-library

diff --git a/.github/workflows/publish-llm-markdown.yaml b/.github/workflows/publish-llm-markdown.yaml
@@ -42,6 +42,9 @@ jobs:
       - name: Set up uv
         uses: astral-sh/setup-uv@v5
 
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v5
+
       - name: Generate Python library docs
         run: |
           cd _source/validmind-library

diff --git a/site/Makefile b/site/Makefile
@@ -9,12 +9,11 @@ SRC_ROOT := _source
 SRC_DIR := $(SRC_ROOT)/validmind-library
 DEST_DIR_NB := notebooks
 DEST_DIR_PYTHON := validmind
-DEST_DIR_TESTS := tests
 SRC_ROOT := _source
 SRC_DIR := $(SRC_ROOT)/validmind-library
 
 # Define .PHONY target for help section
-.PHONY: help add-copyright clean clone copy-installation copy-release-notes delete-demo-branch deploy-demo-branch deploy-prod deploy-staging docker-build docker-serve docker-site docker-site-lite docs-site execute generate-sitemap get-api-json get-source kind-serve kind-stop kind-restart kind-logs notebooks python-docs release-notes render-llm template-schema-docs test-descriptions verify-copyright yearly-releases
+.PHONY: help add-copyright clean clone copy-installation copy-release-notes delete-demo-branch deploy-demo-branch deploy-prod deploy-staging docker-build docker-serve docker-site docker-site-lite docs-site execute generate-sitemap get-api-json get-source kind-serve kind-stop kind-restart kind-logs notebooks python-docs release-notes render-llm template-schema-docs verify-copyright yearly-releases
 
 # Help section
 help:
@@ -42,7 +41,7 @@ help:
 	@echo "  generate-sitemap         Generate a sitemap for the static HTML site"
 	@echo "  execute                  Execute a Jupyter Notebook or notebook directory"
 	@echo "  get-api-json             Download Swagger JSON specs from ValidMind APIs into reference/"
-	@echo "  get-source               Get all source files (clean, clone, copy-installation, copy-release-notes, notebooks, python-docs, test-descriptions, get-api-json)"
+	@echo "  get-source               Get all source files (clean, clone, copy-installation, copy-release-notes, notebooks, python-docs, get-api-json)"
 	@echo "  kind-serve               Set up Kind cluster for ValidMind docs"
 	@echo "  kind-stop                Stop Kind cluster for ValidMind docs"
 	@echo "  kind-restart             Restart Kind cluster for ValidMind docs"
@@ -53,7 +52,6 @@ help:
 	@echo "                           Examples: TAG=cmvm/25.07 or TAG=validmind-library/v2.8.22"
 	@echo "  template-schema-docs     Generate template schema documentation from backend"
 	@echo "  render-llm               Render site to GFM markdown for LLM ingestion (mirrors CI)"
-	@echo "  test-descriptions        Copy the ValidMind tests docs into tests/"
 	@echo "  verify-copyright         Verify that all .qmd and .yml/.yaml files have copyright headers"
 	@echo "  yearly-releases          Collate releases by year into a listing landing and update releases sidebar"
 
@@ -284,7 +282,7 @@ get-api-json:
 	@curl -s -o reference/rapidoc-min.js "https://unpkg.com/rapidoc/dist/rapidoc-min.js" || echo "Failed to fetch RapiDoc library"
 
 # Get all source files
-get-source: clean clone copy-installation copy-release-notes notebooks python-docs test-descriptions template-schema-docs
+get-source: clean clone copy-installation copy-release-notes notebooks python-docs template-schema-docs
 
 # Requires that you've run `make docker-build`
 kind-serve:
@@ -430,16 +428,6 @@ template-schema-docs:
 	@python -m pip install -q json-schema-for-humans
 	@BACKEND_ROOT=$(SRC_ROOT)/backend python ../scripts/generate_template_schema_docs.py
 
-test-descriptions:
-	@echo "\nUpdating test descriptions source ..."
-	@cd _source/validmind-library && make install && poetry run python scripts/extract_descriptions.py validmind/tests
-	@cd ../../
-	@rm -rf $(DEST_DIR_TESTS)
-	@mkdir -p $(DEST_DIR_TESTS)
-	@cp -r $(SRC_DIR)/build/_test_descriptions/validmind/tests/. $(DEST_DIR_TESTS)
-	@echo "Copying _metadata.yml into tests/ ..."
-	@cp developer/_metadata.yml $(DEST_DIR_TESTS)/_metadata.yml
-
 verify-copyright:
 	@echo "\nVerifying copyright headers in .qmd and .yml/.yaml files ..."
 	@cd .. && python site/scripts/verify_copyright_qmd.py

diff --git a/site/about/contributing/using-the-documentation.qmd b/site/about/contributing/using-the-documentation.qmd
@@ -65,7 +65,7 @@ Resources for developers integrating {{< var vm.product >}} into their workflows
 
 - [{{< var validmind.developer >}}](/developer/validmind-library.qmd) — Python library overview and installation
 - [Code samples](/developer/samples-jupyter-notebooks.qmd) — Jupyter notebooks for common use cases
-- [Test descriptions](/developer/test-descriptions.qmd) — Reference for available validation tests
+- [{{< var vm.product >}} test sandbox](/developer/how-to/test-sandbox.qmd) — Reference for available validation tests
 - [{{< var validmind.api >}}](/validmind/validmind.qmd) — Python API documentation
 - [Public REST API](/reference/validmind-rest-api-vm.qmd) — REST API for platform integrations
 

diff --git a/site/about/use-cases/eu-ai-act.qmd b/site/about/use-cases/eu-ai-act.qmd
@@ -237,7 +237,7 @@ Integrate all components into a complete compliance workflow addressing Articles
 
 [^6]: [Work with document templates](/guide/templates/working-with-document-templates.qmd)
 
-[^7]: [Test descriptions](/developer/test-descriptions.qmd)
+[^7]: [{{< var vm.product >}} test sandbox](/developer/how-to/test-sandbox.qmd)
 
 [^8]: [Work with content blocks](/guide/documentation/work-with-content-blocks.qmd)
 

diff --git a/site/developer/_sidebar.yaml b/site/developer/_sidebar.yaml
@@ -58,7 +58,6 @@ website:
               contents:
                 - notebooks/how_to/tests/explore_tests/explore_tests.ipynb
                 - notebooks/how_to/tests/explore_tests/explore_test_suites.ipynb
-                - developer/how-to/test-sandbox.qmd
             - section: "Run tests"
               contents:
                 - notebooks/how_to/tests/run_tests/1-run_dataset-based_tests.ipynb
@@ -110,9 +109,8 @@ website:
               contents: "notebooks/use_cases/validation/**/*.ipynb"
         - text: "---"
         - text: "Reference"
-        - text: "Test descriptions"
-          file: developer/test-descriptions.qmd
-          contents: tests/**
+        - text: "{{< var vm.product >}} test sandbox"
+          file: developer/how-to/test-sandbox.qmd
         - text: "{{< var validmind.api >}}"
           file: validmind/validmind.qmd
         # USING THE VARIABLE IN THE LINK TEXT MESSES UP THE MOBILE VIEW & BREADCRUMB

diff --git a/site/developer/how-to/test-sandbox.qmd b/site/developer/how-to/test-sandbox.qmd
@@ -2,19 +2,40 @@
 # Copyright © 2023-2026 ValidMind Inc. All rights reserved.
 # Refer to the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-title: "Test sandbox <sup>[beta]{.smallcaps}</sup>"
-date: last-modified
+pagetitle: "{{< var vm.product >}} test sandbox"
+title-block-style: none
+bread-crumbs: false
+page-layout: full
 aliases:
   - /guide/test-sandbox.html
   - /developer/model-testing/test-sandbox.html
+  - /developer/test-descriptions.html
+  - /guide/test-descriptions.html
+  - /developer/model-testing/test-descriptions.html
 ---
 
-<!--- TO DO
-- Ordering of notebooks if we want them to appear in a specific sequence
---->
-Explore our interactive sandbox to see which tests are available in the {{< var validmind.developer >}} and how you can use them in your own code.
+```{=html}
+<style>
+  /* Close the gap between the pink banner and the test-sandbox iframe */
+  #title-block-header { display: none !important; }
+  #quarto-document-content { padding-top: 0 !important; margin-top: 0 !important; }
+  #quarto-document-content > .column-screen:first-child { margin-top: 0 !important; }
+  #quarto-document-content > .column-screen iframe { display: block; }
+
+  /* Hide the secondary-nav toggle bar only on desktop;
+     keep it on mobile/tablet so the sidebar remains reachable. */
+  @media (min-width: 992px) {
+    .quarto-secondary-nav { display: none !important; }
+  }
+
+  /* Quarto sidebar scroll-visibility strip under the navbar (this page only) */
+  #quarto-sidebarnav-toggle {
+    display: none !important;
+  }
+</style>
+```
 
-::: {.column-screen-right}
+::: {.column-screen}
 
 ```{=html}
 <iframe

diff --git a/site/developer/how-to/testing-overview.qmd b/site/developer/how-to/testing-overview.qmd
@@ -24,8 +24,9 @@ listing:
     sort: false
     fields: [title, description]
     contents:
-    - test-sandbox.qmd
-    - ../test-descriptions.qmd
+    - title: "{{< var vm.product >}} test sandbox"
+      description: "Tests that are available as part of the {{< var validmind.developer >}}, grouped by type of validation or monitoring test."
+      path: test-sandbox.qmd
   - id: test-basics
     type: grid
     grid-columns: 2
@@ -100,9 +101,7 @@ listing:
 
     - Thresholds, such as `min_percent_threshold`, `disparity_tolerance`
     - Metrics to compute, such as `metrics=["fnr", "fpr", "tpr"]`
-    - Any setting that isn't a dataset or record
-
-Use `vm.tests.describe_test("test_id")` or check the [test descriptions](/developer/test-descriptions.qmd) page to see what parameters a test accepts.
+    - Any setting that isn't a dataset or model[^1]
 
 **Outputs**
 : Tests return results like tables and plots. Tables can be lists of dictionaries or pandas DataFrames; plots can be matplotlib or plotly figures.
@@ -232,3 +231,7 @@ Learn more about using the other features of the {{< var validmind.developer >}}
 :::{#testing-next}
 :::
 
+<!-- FOOTNOTES -->
+
+[^1]: Use `vm.tests.describe_test("test_id")` for programmatic parameter details, or browse the [{{< var vm.product >}} test sandbox](test-sandbox.qmd).
+
diff --git a/site/developer/supported-records-and-frameworks.qmd b/site/developer/supported-records-and-frameworks.qmd
@@ -19,7 +19,7 @@ listing:
     fields: [title, description]
     contents:
     - /how-to/testing-overview.qmd
-    - test-descriptions.qmd
+    - how-to/test-sandbox.qmd
     - /how-to/feature-overview.qmd
     - samples-jupyter-notebooks.qmd
 ---

diff --git a/site/developer/test-descriptions.qmd b/site/developer/test-descriptions.qmd
diff --git a/site/faq/_faq-explainability.qmd b/site/faq/_faq-explainability.qmd
@@ -6,14 +6,14 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -->
 <span id="explanability"></span>
 Yes, {{< var vm.product >}} includes explainability-related testing and documentation as part of our offerings. Our approach incorporates a comprehensive suite of tests designed to evaluate model interpretability and identify potential risks, ensuring transparency and reliability in model outcomes. 
 
-Below is an overview of our key explainability-related tests:
+Below is an overview of our key explainability-related tests (browse names and descriptions in the [{{< var vm.product >}} test sandbox](/developer/how-to/test-sandbox.qmd)):
 
-- **Features AUC**^[[FeaturesAUC](/tests/model_validation/FeaturesAUC.md)] — Assesses the discriminatory power of individual features in binary classification models, providing insights into how well each feature differentiates between classes. This test supports explainability by isolating the contribution of each feature to the classification task.
-- **Feature Importance**^[[FeatureImportance](/tests/model_validation/sklearn/FeatureImportance.md)] — Generates feature importance scores to identify and compare impactful features across different models and datasets. By highlighting the relative significance of features, this test clarifies how inputs influence model predictions.
-- **Overfit Diagnosis**^[[OverfitDiagnosis](/tests/model_validation/sklearn/OverfitDiagnosis.md)] — Detects potential overfitting by comparing performance between training and testing sets for specific feature segments, highlighting areas of significant deviation. This test aids explainability by revealing where model behavior is inconsistent, offering insights into its generalization capability.
-- **Permutation Feature Importance**^[[PermutationFeatureImportance](/tests/model_validation/sklearn/PermutationFeatureImportance.md)] — Measures feature significance by analyzing the impact of randomly rearranging feature values on model performance. This test quantifies the dependency of model performance on each feature, making it clear which inputs drive the predictions.
-- **SHAP Global Importance**^[[SHAPGlobalImportance](/tests/model_validation/sklearn/SHAPGlobalImportance.md)] — Uses SHAP (SHapley Additive exPlanations) values to assign global importance to features, offering a clear explanation of model outcomes and supporting risk identification. SHAP values provide a mathematically sound attribution of model predictions to specific features, enhancing interpretability.
-- **Weakspots Diagnosis**^[[WeakspotsDiagnosis](/tests/model_validation/sklearn/WeakspotsDiagnosis.md)] — Identifies and visualizes regions of suboptimal model performance across the feature space, highlighting areas that may require further attention. This test explains where and why the model struggles by connecting poor performance to specific feature regions.
+- **Features AUC** — Assesses the discriminatory power of individual features in binary classification models, providing insights into how well each feature differentiates between classes. This test supports explainability by isolating the contribution of each feature to the classification task.
+- **Feature Importance** — Generates feature importance scores to identify and compare impactful features across different models and datasets. By highlighting the relative significance of features, this test clarifies how inputs influence model predictions.
+- **Overfit Diagnosis** — Detects potential overfitting by comparing performance between training and testing sets for specific feature segments, highlighting areas of significant deviation. This test aids explainability by revealing where model behavior is inconsistent, offering insights into its generalization capability.
+- **Permutation Feature Importance** — Measures feature significance by analyzing the impact of randomly rearranging feature values on model performance. This test quantifies the dependency of model performance on each feature, making it clear which inputs drive the predictions.
+- **SHAP Global Importance** — Uses SHAP (SHapley Additive exPlanations) values to assign global importance to features, offering a clear explanation of model outcomes and supporting risk identification. SHAP values provide a mathematically sound attribution of model predictions to specific features, enhancing interpretability.
+- **Weakspots Diagnosis** — Identifies and visualizes regions of suboptimal model performance across the feature space, highlighting areas that may require further attention. This test explains where and why the model struggles by connecting poor performance to specific feature regions.
 
 ::: {.callout}
 ## When logged for documentation, each test automatically generates a comprehensive report as soon as it is executed. 

diff --git a/site/faq/faq-testing.qmd b/site/faq/faq-testing.qmd
@@ -15,7 +15,9 @@ listing:
     fields: [title, description]
     contents:
     - ../developer/how-to/testing-overview.qmd
-    - ../developer/test-descriptions.qmd
+    - title: "{{< var vm.product >}} test sandbox"
+      description: "Tests that are available as part of the {{< var validmind.developer >}}, grouped by type of validation or monitoring test."
+      path: ../developer/how-to/test-sandbox.qmd
     - ../guide/monitoring/ongoing-monitoring.qmd
 categories: ["testing", "model documentation", "customization", "custom data", "explainability", "ongoing monitoring", "validmind library"]
 ---

diff --git a/site/guide/monitoring/ongoing-monitoring.qmd b/site/guide/monitoring/ongoing-monitoring.qmd
@@ -27,9 +27,12 @@ listing:
       - ../../notebooks/use_cases/ongoing_monitoring/application_scorecard_ongoing_monitoring.ipynb
       # - ../../notebooks/use_cases/credit_risk/application_scorecard_full_suite.ipynb
   - id: ongoing-monitoring-tests
-    contents: "../../tests/ongoing_monitoring/*.md"
+    contents: 
+    - title: "{{< var vm.product >}} Test Sandbox"
+      description: "Tests that are available as part of the {{< var validmind.developer >}}, grouped by type of validation or monitoring test."
+      path: ../../developer/how-to/test-sandbox.qmd
     type: grid
-    grid-columns: 3
+    grid-columns: 2
     max-description-length: 250
     page-size: 150
     fields: [title, description]