diff --git a/.github/actions/demo-notebook/action.yml b/.github/actions/demo-notebook/action.yml index 1018155ed8..d2833736c4 100644 --- a/.github/actions/demo-notebook/action.yml +++ b/.github/actions/demo-notebook/action.yml @@ -37,15 +37,15 @@ runs: exit 1 fi - - name: Execute ONLY the ValidMind for model development series with heap development + - name: Execute ONLY the ValidMind for development series with heap development shell: bash if: ${{ steps.find_dev_env.outcome == 'success' }} run: | cd site cp ../${{ inputs.dev_env }} ../.env source ../.env - quarto render --profile exe-demo notebooks/EXECUTED/model_development &> render_errors.log || { - echo "Execute for ValidMind for model development series failed"; + quarto render --profile exe-demo notebooks/EXECUTED/development &> render_errors.log || { + echo "Execute for ValidMind for development series failed"; cat render_errors.log; exit 1; } @@ -59,15 +59,15 @@ runs: exit 1 fi - - name: Execute ONLY the ValidMind for model validation series with heap development + - name: Execute ONLY the ValidMind for validation series with heap development shell: bash if: ${{ steps.find_valid_env.outcome == 'success' }} run: | cd site cp ../${{ inputs.valid_env }} ../.env source ../.env - quarto render --profile exe-demo notebooks/EXECUTED/model_validation &> render_errors.log || { - echo "Execute for ValidMind for model validation series failed"; + quarto render --profile exe-demo notebooks/EXECUTED/validation &> render_errors.log || { + echo "Execute for ValidMind for validation series failed"; cat render_errors.log; exit 1; } \ No newline at end of file diff --git a/.github/actions/prod-notebook/action.yml b/.github/actions/prod-notebook/action.yml index e18a2e802c..c75c309d94 100644 --- a/.github/actions/prod-notebook/action.yml +++ b/.github/actions/prod-notebook/action.yml @@ -37,15 +37,15 @@ runs: exit 1 fi - - name: Execute ONLY the ValidMind for model development series with heap production + - name: Execute ONLY the ValidMind for development series with heap production shell: bash if: ${{ steps.find_dev_env.outcome == 'success' }} run: | cd site cp ../${{ inputs.dev_env }} ../.env source ../.env - quarto render --profile exe-prod notebooks/EXECUTED/model_development &> render_errors.log || { - echo "Execute for ValidMind for model development series failed"; + quarto render --profile exe-prod notebooks/EXECUTED/development &> render_errors.log || { + echo "Execute for ValidMind for development series failed"; cat render_errors.log; exit 1; } @@ -59,15 +59,15 @@ runs: exit 1 fi - - name: Execute ONLY the ValidMind for model validation series with heap production + - name: Execute ONLY the ValidMind for validation series with heap production shell: bash if: ${{ steps.find_valid_env.outcome == 'success' }} run: | cd site cp ../${{ inputs.valid_env }} ../.env source ../.env - quarto render --profile exe-prod notebooks/EXECUTED/model_validation &> render_errors.log || { - echo "Execute for ValidMind for model validation series failed"; + quarto render --profile exe-prod notebooks/EXECUTED/validation &> render_errors.log || { + echo "Execute for ValidMind for validation series failed"; cat render_errors.log; exit 1; } diff --git a/.github/actions/staging-notebook/action.yml b/.github/actions/staging-notebook/action.yml index 0d8e967ddd..698dd86dcd 100644 --- a/.github/actions/staging-notebook/action.yml +++ b/.github/actions/staging-notebook/action.yml @@ -37,15 +37,15 @@ runs: exit 1 fi - - name: Execute ONLY the ValidMind for model development series with heap staging + - name: Execute ONLY the ValidMind for development series with heap staging shell: bash if: ${{ steps.find_dev_env.outcome == 'success' }} run: | cd site cp ../${{ inputs.dev_env }} ../.env source ../.env - quarto render --profile exe-staging notebooks/EXECUTED/model_development &> render_errors.log || { - echo "Execute for ValidMind for model development series failed"; + quarto render --profile exe-staging notebooks/EXECUTED/development &> render_errors.log || { + echo "Execute for ValidMind for development series failed"; cat render_errors.log; exit 1; } @@ -59,15 +59,15 @@ runs: exit 1 fi - - name: Execute ONLY the ValidMind for model validation series with heap staging + - name: Execute ONLY the ValidMind for validation series with heap staging shell: bash if: ${{ steps.find_valid_env.outcome == 'success' }} run: | cd site cp ../${{ inputs.valid_env }} ../.env source ../.env - quarto render --profile exe-staging notebooks/EXECUTED/model_validation &> render_errors.log || { - echo "Execute for ValidMind for model validation series failed"; + quarto render --profile exe-staging notebooks/EXECUTED/validation &> render_errors.log || { + echo "Execute for ValidMind for validation series failed"; cat render_errors.log; exit 1; } diff --git a/.github/workflows/deploy-docs-prod.yaml b/.github/workflows/deploy-docs-prod.yaml index 24697c2f30..cce53a8fd1 100644 --- a/.github/workflows/deploy-docs-prod.yaml +++ b/.github/workflows/deploy-docs-prod.yaml @@ -75,10 +75,11 @@ jobs: with: version: ${{ vars.QUARTO_VERSION }} - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python3 - - echo "$HOME/.local/bin" >> $GITHUB_PATH + - name: Set up uv + uses: astral-sh/setup-uv@v5 + + - name: Set up uv + uses: astral-sh/setup-uv@v5 - name: Generate Python library docs run: | @@ -91,8 +92,7 @@ jobs: - name: Generate template schema docs run: | - pip install json-schema-for-humans - BACKEND_ROOT=site/_source/backend python scripts/generate_template_schema_docs.py + BACKEND_ROOT=site/_source/backend uv run --with json-schema-for-humans python scripts/generate_template_schema_docs.py - name: Populate installation run: cp -r site/_source/installation/site/installation site/installation diff --git a/.github/workflows/deploy-docs-staging.yaml b/.github/workflows/deploy-docs-staging.yaml index 4c8f8fb69b..158faaff7a 100644 --- a/.github/workflows/deploy-docs-staging.yaml +++ b/.github/workflows/deploy-docs-staging.yaml @@ -75,10 +75,11 @@ jobs: with: version: ${{ vars.QUARTO_VERSION }} - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python3 - - echo "$HOME/.local/bin" >> $GITHUB_PATH + - name: Set up uv + uses: astral-sh/setup-uv@v5 + + - name: Set up uv + uses: astral-sh/setup-uv@v5 - name: Generate Python library docs run: | @@ -91,8 +92,7 @@ jobs: - name: Generate template schema docs run: | - pip install json-schema-for-humans - BACKEND_ROOT=site/_source/backend python scripts/generate_template_schema_docs.py + BACKEND_ROOT=site/_source/backend uv run --with json-schema-for-humans python scripts/generate_template_schema_docs.py - name: Populate installation run: cp -r site/_source/installation/site/installation site/installation diff --git a/.github/workflows/execute-pr-preview.yaml b/.github/workflows/execute-pr-preview.yaml index 7fcd1d9675..a1b93b3fa8 100644 --- a/.github/workflows/execute-pr-preview.yaml +++ b/.github/workflows/execute-pr-preview.yaml @@ -76,7 +76,7 @@ jobs: echo VM_API_MODEL=${{ secrets.PLATFORM_VALID_MODEL }} >> valid.env # Only execute the demo notebooks for training if .env files are created - - name: Execute demo ValidMind for model development and validation series + - name: Execute demo ValidMind for development and validation series if: ${{ vars.ENABLE_DEMO_NOTEBOOK == 'true' && steps.create_dev_env.outcome == 'success' && steps.create_valid_env.outcome == 'success' }} uses: ./.github/actions/demo-notebook id: execute_demo_notebook @@ -99,8 +99,8 @@ jobs: with: script: | const base = `https://docs-staging.validmind.ai/pr_previews/${{ github.head_ref }}`; - const devUrl = `${base}/notebooks/EXECUTED/model_development/1-set_up_validmind.html`; - const valUrl = `${base}/notebooks/EXECUTED/model_validation/1-set_up_validmind_for_validation.html`; + const devUrl = `${base}/notebooks/EXECUTED/development/1-set_up_validmind.html`; + const valUrl = `${base}/notebooks/EXECUTED/validation/1-set_up_validmind_for_validation.html`; // Delete old preview comments const { data: comments } = await github.rest.issues.listComments({ @@ -120,8 +120,8 @@ jobs: let comment = `## Execute training notebooks for PRs\n\n`; comment += `✓ INFO: Live previews are available —\n\n`; - comment += `- [Open model development series](${devUrl})\n`; - comment += `- [Open model validation series](${valUrl})\n`; + comment += `- [Open development series](${devUrl})\n`; + comment += `- [Open validation series](${valUrl})\n`; await github.rest.issues.createComment({ owner: context.repo.owner, diff --git a/.github/workflows/execute-prod.yaml b/.github/workflows/execute-prod.yaml index 5fa62313da..93d3f9b049 100644 --- a/.github/workflows/execute-prod.yaml +++ b/.github/workflows/execute-prod.yaml @@ -59,7 +59,7 @@ jobs: echo VM_API_MODEL=${{ secrets.PLATFORM_VALID_MODEL }} >> valid.env # Only execute the production notebooks for training if .env files are created - - name: Execute production ValidMind for model development and validation series + - name: Execute production ValidMind for development and validation series if: ${{ steps.create_dev_env.outcome == 'success' && steps.create_valid_env.outcome == 'success' }} uses: ./.github/actions/prod-notebook id: execute-prod-notebook diff --git a/.github/workflows/execute-staging.yaml b/.github/workflows/execute-staging.yaml index b0b88b1d81..d692e5d149 100644 --- a/.github/workflows/execute-staging.yaml +++ b/.github/workflows/execute-staging.yaml @@ -59,7 +59,7 @@ jobs: echo VM_API_MODEL=${{ secrets.PLATFORM_VALID_MODEL }} >> valid.env # Only execute the staging notebooks for training if .env files are created - - name: Execute staging ValidMind for model development and validation series + - name: Execute staging ValidMind for development and validation series if: ${{ steps.create_dev_env.outcome == 'success' && steps.create_valid_env.outcome == 'success' }} uses: ./.github/actions/staging-notebook id: execute-staging-notebook diff --git a/.github/workflows/lighthouse-check.yaml b/.github/workflows/lighthouse-check.yaml index d96685de1c..de78a2ce5d 100644 --- a/.github/workflows/lighthouse-check.yaml +++ b/.github/workflows/lighthouse-check.yaml @@ -1,598 +1,421 @@ name: Lighthouse check on: - pull_request: - types: [opened, synchronize, ready_for_review] + workflow_dispatch: + inputs: + mode: + description: "Audit mode" + required: true + default: "changed" + type: choice + options: + - "changed" + - "depth" + depth: + description: "Sitemap depth when mode is depth (0–2)" + required: true + default: "0" + type: choice + options: + - "0" + - "1" + - "2" + pr_number: + description: "Pull request number to audit" + required: true + type: string permissions: + contents: read issues: write pull-requests: write - -env: - # To change the default depth level: - # 0 — Top-level navigation only (e.g. /index.html, /guide/guides.html, /developer/validmind-library.html, etc.) - # 1 — All first-level subdirectories (e.g. /guide/*.html) - # 2 — All second-level subdirectories (e.g. /guide/attestation/*.html) - # Note: While the crawler technically supports deeper levels, expect the workflow to take >2-12 hours to complete - DEFAULT_DEPTH: '0' + actions: read jobs: lighthouse: runs-on: ubuntu-latest - if: github.event.pull_request.draft == false + if: github.event_name == 'workflow_dispatch' steps: - - name: Wait for validation workflow to complete - uses: actions/github-script@v6 - with: - script: | - const maxWaitTime = 45 * 60 * 1000; // 45 minutes in milliseconds - const pollInterval = 60 * 1000; // 60 seconds in milliseconds - const startTime = Date.now(); - - console.log(`Waiting for "Validate docs site" workflow to complete for PR #${context.issue.number}`); - console.log(`Head SHA: ${context.payload.pull_request.head.sha}`); - - while (Date.now() - startTime < maxWaitTime) { - try { - // Get workflow runs for the validate-docs-site workflow - const { data: runs } = await github.rest.actions.listWorkflowRunsForRepo({ - owner: context.repo.owner, - repo: context.repo.repo, - workflow_id: 'validate-docs-site.yaml', - head_sha: context.payload.pull_request.head.sha, - per_page: 5 + - name: Resolve PR context + id: pr + uses: actions/github-script@v6 + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + + async function getPr(prNumber) { + const { data: pr } = await github.rest.pulls.get({ + owner, + repo, + pull_number: prNumber, }); - - console.log(`Found ${runs.workflow_runs.length} workflow runs for this commit`); - - if (runs.workflow_runs.length > 0) { - // Get the most recent run - const latestRun = runs.workflow_runs[0]; - console.log(`Latest run: ${latestRun.id}, status: ${latestRun.status}, conclusion: ${latestRun.conclusion}`); - - if (latestRun.status === 'completed') { - if (latestRun.conclusion === 'success') { - console.log('✅ Validation workflow completed successfully'); - break; - } else { - throw new Error(`❌ Validation workflow failed with conclusion: ${latestRun.conclusion}`); - } - } else if (latestRun.status === 'in_progress' || latestRun.status === 'queued') { - console.log(`⏳ Validation workflow is ${latestRun.status}, continuing to wait...`); - } else { - console.log(`⚠️ Unexpected status: ${latestRun.status}`); - } - } else { - console.log('⏳ No workflow runs found yet, validation may not have started...'); + if (pr.draft) { + core.setFailed('Skipping Lighthouse for draft PR'); + return null; } - - console.log(`Elapsed time: ${Math.round((Date.now() - startTime) / 1000 / 60)} minutes`); - await new Promise(resolve => setTimeout(resolve, pollInterval)); - - } catch (error) { - console.error('Error checking workflow status:', error); - throw error; + const labels = (pr.labels || []).map(l => l.name); + const fullAudit = labels.includes('lighthouse:full'); + return { + number: pr.number, + head_ref: pr.head.ref, + head_sha: pr.head.sha, + base_ref: pr.base.ref, + full_audit: fullAudit, + }; } + + const prNumber = parseInt('${{ inputs.pr_number }}', 10); + const info = await getPr(prNumber); + if (!info) return; + + let mode = '${{ inputs.mode }}'; + let depth = '${{ inputs.depth }}'; + if (info.full_audit && mode === 'changed') { + mode = 'depth'; + depth = '2'; + } + + core.setOutput('number', String(info.number)); + core.setOutput('head_ref', info.head_ref); + core.setOutput('head_sha', info.head_sha); + core.setOutput('base_ref', info.base_ref); + core.setOutput('mode', mode); + core.setOutput('depth', depth); + core.setOutput('full_audit', String(info.full_audit)); + + - name: Check out repository + uses: actions/checkout@v4 + with: + ref: ${{ steps.pr.outputs.head_sha }} + fetch-depth: 0 + + - name: Set environment + run: | + echo "PREVIEW_URL=https://docs-staging.validmind.ai/pr_previews/${{ steps.pr.outputs.head_ref }}" >> $GITHUB_ENV + echo "COMMIT_SHA=${{ steps.pr.outputs.head_sha }}" >> $GITHUB_ENV + echo "COMMIT_SHA_SHORT=$(echo ${{ steps.pr.outputs.head_sha }} | cut -c1-7)" >> $GITHUB_ENV + echo "LIGHTHOUSE_MODE=${{ steps.pr.outputs.mode }}" >> $GITHUB_ENV + echo "LIGHTHOUSE_DEPTH=${{ steps.pr.outputs.depth }}" >> $GITHUB_ENV + echo "PR_NUMBER=${{ steps.pr.outputs.number }}" >> $GITHUB_ENV + + - name: Check for PR preview URL + id: check_preview + run: | + check_url() { + local url=$1 + local status + status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" "$url") + echo "Checking $url — status: $status" + [ "$status" -eq 200 ] } - - // Check if we timed out - if (Date.now() - startTime >= maxWaitTime) { - throw new Error('⏰ Timed out waiting for validation workflow to complete'); - } - - - name: Check out repository - uses: actions/checkout@v4 - - - name: Get commit SHA - id: get_sha - run: | - echo "COMMIT_SHA=$(git rev-parse HEAD)" >> $GITHUB_ENV - echo "COMMIT_SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_ENV - - - name: Set PR preview URL - id: set_url - run: | - echo "PREVIEW_URL=https://docs-staging.validmind.ai/pr_previews/${{ github.head_ref }}" >> $GITHUB_ENV - echo "DEPTH=${{ env.DEFAULT_DEPTH }}" >> $GITHUB_ENV - - - name: Check for PR preview URL and sitemap - id: check_preview - run: | - # Function to check if URL returns HTTP 200 - check_url() { - local url=$1 - local status - status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" "$url") - echo "Checking $url — status: $status" - [ "$status" -eq 200 ] - } - - echo "Waiting for preview site to become available ..." - for i in {1..60}; do - if check_url "$PREVIEW_URL/index.html"; then - echo "Info: Preview site is now available" - break + + echo "Waiting for preview site to become available ..." + for i in $(seq 1 30); do + if check_url "$PREVIEW_URL/index.html"; then + echo "Info: Preview site is now available" + break + fi + if [ "$i" -eq 30 ]; then + echo "Error: Preview URL did not become available after 30 minutes" + exit 1 + fi + echo "Attempt $i/30: waiting 1 minute..." + sleep 60 + done + + if ! check_url "$PREVIEW_URL/sitemap.xml"; then + echo "Error: Sitemap missing at $PREVIEW_URL/sitemap.xml" + exit 1 fi - - if [ $i -eq 60 ]; then - echo "Error: Preview URL did not become available after 60 minutes at $PREVIEW_URL/index.html" + + echo "preview_exists=true" >> $GITHUB_OUTPUT + + - name: Install Python dependencies + if: steps.check_preview.outputs.preview_exists == 'true' + run: | + python -m pip install --upgrade pip + pip install requests + + - name: Generate URLs to check + if: steps.check_preview.outputs.preview_exists == 'true' + id: generate_urls + env: + INSTALLATION_USER: ${{ secrets.INSTALLATION_USER }} + INSTALLATION_PW: ${{ secrets.INSTALLATION_PW }} + run: | + cd site/scripts + python lighthouse_urls.py \ + --mode "$LIGHTHOUSE_MODE" \ + --base-ref "${{ steps.pr.outputs.base_ref }}" \ + --depth "$LIGHTHOUSE_DEPTH" \ + --preview-url "$PREVIEW_URL" \ + --output ../../lhci-urls.txt \ + --metadata ../../lighthouse-metadata.json \ + --skip-file ../../lighthouse-skip.txt + + if [ -f ../../lighthouse-skip.txt ]; then + echo "skip=true" >> $GITHUB_OUTPUT + echo "No site pages to audit in this PR." + exit 0 + fi + + if [ ! -s ../../lhci-urls.txt ]; then + echo "Error: No URLs were generated." exit 1 fi - - echo "Attempt $i/60: Preview site not ready yet, waiting 1 minute..." - sleep 60 - done - - if ! check_url "$PREVIEW_URL/sitemap.xml"; then - echo "Error: Sitemap does not exist at $PREVIEW_URL/sitemap.xml" - exit 1 - fi - - echo "Debug: Checking installation page with URL-based auth..." - auth_url="https://${{ secrets.INSTALLATION_USER }}:${{ secrets.INSTALLATION_PW }}@docs-staging.validmind.ai/pr_previews/${{ github.head_ref }}/installation/index.html" - status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" --anyauth "$auth_url") - echo "Checking $auth_url — status: $status" - if [ "$status" -ne 200 ]; then - echo "Error: Installation page is not accessible with authentication at $auth_url" - exit 1 - fi - - echo "Info: Successfully accessed password-protected installation page" - - echo "preview_exists=true" >> $GITHUB_OUTPUT - - - name: Install Lighthouse CI - if: steps.check_preview.outputs.preview_exists == 'true' - run: npm install -g @lhci/cli - - - name: Install required Python packages - if: steps.check_preview.outputs.preview_exists == 'true' - run: | - python -m pip install --upgrade pip - pip install requests beautifulsoup4 - - - name: Generate URLs to check - if: steps.check_preview.outputs.preview_exists == 'true' - id: generate_urls - run: | - BASE_URL="$PREVIEW_URL" - - # Create a Python script to crawl the site - cat > crawl.py << 'EOF' - import requests - from bs4 import BeautifulSoup - import sys - from urllib.parse import urljoin, urlparse - import json - import xml.etree.ElementTree as ET - import base64 - import os - - # Define root pages to check - ROOT_PAGES = [ - "index.html", - "get-started/get-started.html", - "guide/guides.html", - "developer/validmind-library.html", - "support/support.html", - "releases/all-releases.html", - "training/training.html" - ] - - def get_auth_headers(): - # Only use auth for installation pages - if 'installation/' in url: - # Create auth headers from environment variables - auth_string = base64.b64encode(f"{os.environ['INSTALLATION_USER']}:{os.environ['INSTALLATION_PW']}".encode()).decode() - return {"Authorization": f"Basic {auth_string}"} - return {} - - def get_url_depth(url): - # Parse the URL to get just the path - path = urlparse(url).path - # Remove .html extension for depth calculation - path = path.replace('.html', '') - # Remove any leading/trailing slashes - path = path.strip('/') - - # Split into segments and count non-empty ones - segments = [x for x in path.split('/') if x] - - # For PR preview URLs, we need to skip the first 5 segments: - # /pr_previews/username/branch/name/ - if 'pr_previews' in path: - # Skip the first 5 segments (pr_previews/username/branch/name/) - segments = segments[5:] - - # Debug the depth calculation - # print(f"URL depth calculation - Path: {path}, Segments: {segments}, Depth: {len(segments)}", file=sys.stderr) - - return len(segments) - - def get_urls_from_sitemap(sitemap_url, max_depth): - try: - print(f"Fetching sitemap from {sitemap_url}", file=sys.stderr) - # Don't use auth for sitemap - response = requests.get(sitemap_url) - print(f"Sitemap response status: {response.status_code}", file=sys.stderr) - if response.status_code == 200: - print(f"Sitemap content: {response.text[:500]}...", file=sys.stderr) - root = ET.fromstring(response.content) - # Get all URLs from sitemap - all_urls = set() - - for url in root.findall('.//{http://www.sitemaps.org/schemas/sitemap/0.9}url'): - loc = url.find('{http://www.sitemaps.org/schemas/sitemap/0.9}loc') - if loc is not None: - full_url = loc.text - parsed_url = urlparse(full_url) - - # Extract the path part after the base URL - path = parsed_url.path - # Remove leading slash if present - path = path.lstrip('/') - - # Only include .html files - if path.endswith('.html'): - # Check depth - if get_url_depth(path) <= max_depth: - # Remove any segments that match the PR preview path - segments = path.split('/') - # Keep only the segments after the PR preview path - pr_preview_index = -1 - for i, segment in enumerate(segments): - if segment == 'pr_previews': - pr_preview_index = i - break - if pr_preview_index >= 0: - segments = segments[pr_preview_index + 4:] # Skip pr_previews/username/branch/name - path = '/'.join(segments) - all_urls.add(path) - print(f"Found URL in sitemap: {path}", file=sys.stderr) - - print(f"Found {len(all_urls)} URLs in sitemap:", file=sys.stderr) - for url in sorted(all_urls): - print(f" {url}", file=sys.stderr) - return sorted(list(all_urls)) - else: - print(f"Failed to fetch sitemap: {response.status_code}", file=sys.stderr) - except Exception as e: - print(f"Error processing sitemap {sitemap_url}: {str(e)}", file=sys.stderr) - return [] - - def get_links(url, max_depth, visited=None): - if visited is None: - visited = set() - - current_depth = get_url_depth(url) - print(f"Checking URL {url} at depth {current_depth}", file=sys.stderr) - - if current_depth > max_depth or url in visited: - print(f"Skipping {url} - depth {current_depth} > {max_depth} or already visited", file=sys.stderr) - return set() - - visited.add(url) - links = set() - - try: - print(f"Fetching {url}", file=sys.stderr) - headers = get_auth_headers() - response = requests.get(url, headers=headers) - print(f"Response status: {response.status_code}", file=sys.stderr) - if response.status_code == 200: - soup = BeautifulSoup(response.text, 'html.parser') - print(f"Found {len(soup.find_all('a', href=True))} links on page", file=sys.stderr) - - for a in soup.find_all('a', href=True): - href = a['href'] - print(f"Processing link: {href}", file=sys.stderr) - - # Skip external links and anchors - if href.startswith('#') or href.startswith('http'): - print(f"Skipping external/anchor link: {href}", file=sys.stderr) - continue - - # Convert relative URLs to absolute - full_url = urljoin(url, href) - print(f"Converted to full URL: {full_url}", file=sys.stderr) - - # Only include URLs from the same base domain - if urlparse(full_url).netloc == urlparse(url).netloc: - # Extract just the path part - path = urlparse(full_url).path - # Remove leading slash if present - path = path.lstrip('/') - - # Only include .html files - if path.endswith('.html'): - print(f"Found HTML link: {path}", file=sys.stderr) - links.add(path) - # Only recursively get links if we haven't hit max depth - if get_url_depth(path) < max_depth: - print(f"Recursively checking {path} at depth {get_url_depth(path)}", file=sys.stderr) - links.update(get_links(full_url, max_depth, visited)) - else: - print(f"Skipping recursive check for {path} - at max depth", file=sys.stderr) - else: - print(f"Skipping external domain link: {href}", file=sys.stderr) - except Exception as e: - print(f"Error processing {url}: {str(e)}", file=sys.stderr) - - return links - - # Get command line arguments - base_url = sys.argv[1] - max_depth = int(sys.argv[2]) - - print(f"Base URL: {base_url}", file=sys.stderr) - print(f"Max depth: {max_depth}", file=sys.stderr) - - # Get all URLs - all_urls = set() - - if max_depth == 0: - # For depth 0, only check ROOT_PAGES - print("Depth is 0, only checking ROOT_PAGES", file=sys.stderr) - for root in ROOT_PAGES: - all_urls.add(root) - print(f"Added root page: {root}", file=sys.stderr) - else: - # For depth > 0, use sitemap - print(f"Depth is {max_depth}, using sitemap", file=sys.stderr) - sitemap_url = f"{base_url}/sitemap.xml" - sitemap_urls = get_urls_from_sitemap(sitemap_url, max_depth) - print(f"Found {len(sitemap_urls)} URLs in sitemap", file=sys.stderr) - all_urls.update(sitemap_urls) - - # Print URLs to stdout, ensuring proper URL construction - print(f"Total URLs found: {len(all_urls)}", file=sys.stderr) - for url in sorted(all_urls): - # Remove any leading slashes from the URL to avoid double slashes - url = url.lstrip('/') - # Construct the full URL by joining base_url and url with a single slash - full_url = f"{base_url.rstrip('/')}/{url}" - print(full_url) - print(f"Added URL: {full_url}", file=sys.stderr) - EOF - - # Run the crawler - python crawl.py "$BASE_URL" "$DEPTH" > lhci-urls.txt - - echo "Lighthouse will check the following URLs:" - cat lhci-urls.txt - echo -e "\nTotal number of URLs: $(wc -l < lhci-urls.txt)" - - # Verify we have URLs - if [ ! -s lhci-urls.txt ]; then - echo "Error: No URLs were generated. Check the debug output above." - exit 1 - fi - - - name: Create Lighthouse config - if: steps.check_preview.outputs.preview_exists == 'true' - run: | - cat > .lighthouserc.js << 'EOF' - const fs = require('fs'); - const urls = fs.readFileSync('lhci-urls.txt', 'utf-8').split('\n').filter(Boolean); - - // Add auth to installation URLs using the same format as the URL check step - const urlsWithAuth = urls.map(url => { - if (url.includes('/installation/')) { - return `https://${process.env.INSTALLATION_USER}:${process.env.INSTALLATION_PW}@${new URL(url).host}${new URL(url).pathname}`; - } - return url; - }); - - module.exports = { - ci: { - collect: { - url: urlsWithAuth, - numberOfRuns: 1, - settings: { - formFactor: 'desktop', - screenEmulation: { - mobile: false, - width: 1350, - height: 940, - deviceScaleFactor: 1, - disabled: false, + + echo "skip=false" >> $GITHUB_OUTPUT + echo "Lighthouse will check:" + cat ../../lhci-urls.txt + + # Probe first URL from list (beyond index.html) when in changed mode + if [ "$LIGHTHOUSE_MODE" = "changed" ]; then + FIRST=$(head -n1 ../../lhci-urls.txt) + status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" "$FIRST") + echo "Probe $FIRST — status: $status" + if [ "$status" -ne 200 ]; then + echo "Error: Changed page not reachable on preview" + exit 1 + fi + fi + + - name: Verify installation page auth + if: | + steps.check_preview.outputs.preview_exists == 'true' && + steps.generate_urls.outputs.skip != 'true' + run: | + if ! grep -q '/installation/' lhci-urls.txt 2>/dev/null; then + echo "No installation pages in URL list — skipping auth check" + exit 0 + fi + auth_url="https://${{ secrets.INSTALLATION_USER }}:${{ secrets.INSTALLATION_PW }}@docs-staging.validmind.ai/pr_previews/${{ steps.pr.outputs.head_ref }}/installation/index.html" + status=$(curl -s -o /dev/null -w "%{http_code}" -I -A "Mozilla/5.0" --anyauth "$auth_url") + echo "Checking installation page — status: $status" + if [ "$status" -ne 200 ]; then + echo "Error: Installation page not accessible with authentication" + exit 1 + fi + + - name: Post skip comment + if: steps.generate_urls.outputs.skip == 'true' + uses: actions/github-script@v6 + with: + script: | + const prNumber = parseInt(process.env.PR_NUMBER, 10); + const body = `## Lighthouse check results\n\n✓ INFO: No site pages to audit in this PR.\n\nCommit SHA: [${process.env.COMMIT_SHA_SHORT}](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/commit/${process.env.COMMIT_SHA})`; + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body, + }); + + - name: Install Lighthouse CI + if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true' + run: npm install -g @lhci/cli + + - name: Create Lighthouse config + if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true' + run: | + cat > .lighthouserc.js << 'EOF' + const fs = require('fs'); + const urls = fs.readFileSync('lhci-urls.txt', 'utf-8').split('\n').filter(Boolean); + + const urlsWithAuth = urls.map(url => { + if (url.includes('/installation/')) { + return `https://${process.env.INSTALLATION_USER}:${process.env.INSTALLATION_PW}@${new URL(url).host}${new URL(url).pathname}`; + } + return url; + }); + + module.exports = { + ci: { + collect: { + url: urlsWithAuth, + numberOfRuns: 3, + settings: { + formFactor: 'desktop', + screenEmulation: { + mobile: false, + width: 1350, + height: 940, + deviceScaleFactor: 1, + disabled: false, + }, + throttling: { + rttMs: 40, + throughputKbps: 10240, + cpuSlowdownMultiplier: 1, + requestLatencyMs: 0, + downloadThroughputKbps: 0, + uploadThroughputKbps: 0, + }, }, - throttling: { - rttMs: 40, - throughputKbps: 10240, - cpuSlowdownMultiplier: 1, - requestLatencyMs: 0, - downloadThroughputKbps: 0, - uploadThroughputKbps: 0, + }, + assert: { + assertions: { + 'categories:accessibility': ['error', { minScore: 0.9 }], }, }, - }, - assert: { - assertions: { - 'categories:accessibility': ['error', { minScore: 0.9 }], + upload: { + target: 'temporary-public-storage', }, }, - upload: { - target: 'temporary-public-storage', - }, - }, - }; - EOF - - - name: Run Lighthouse audit - if: steps.check_preview.outputs.preview_exists == 'true' - uses: treosh/lighthouse-ci-action@v11 - id: lighthouse - continue-on-error: true - env: - INSTALLATION_USER: ${{ secrets.INSTALLATION_USER }} - INSTALLATION_PW: ${{ secrets.INSTALLATION_PW }} - with: - configPath: .lighthouserc.js - uploadArtifacts: true - temporaryPublicStorage: true - - - name: Check Lighthouse audit result - if: steps.check_preview.outputs.preview_exists == 'true' - run: | - # Check if the manifest exists and is valid JSON - if [ -z "${{ steps.lighthouse.outputs.manifest }}" ]; then - echo "Error: Lighthouse audit failed - no manifest output" - exit 1 - fi - - # Try to parse the manifest as JSON - if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq . > /dev/null 2>&1; then - echo "Error: Lighthouse audit failed - invalid manifest format" - exit 1 - fi - - # Check if any URLs were successfully audited - if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq 'length > 0' > /dev/null 2>&1; then - echo "Error: Lighthouse audit failed - no URLs were successfully audited" - exit 1 - fi - - - name: Post Lighthouse results comment - if: steps.check_preview.outputs.preview_exists == 'true' - uses: actions/github-script@v6 - with: - script: | - const runId = context.runId; - const baseUrl = process.env.PREVIEW_URL; - const commitSha = process.env.COMMIT_SHA; - const commitShaShort = process.env.COMMIT_SHA_SHORT; - - // Get artifacts for this run - const { data: artifacts } = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: runId, - }); + }; + EOF + + - name: Run Lighthouse audit + if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true' + uses: treosh/lighthouse-ci-action@v11 + id: lighthouse + env: + INSTALLATION_USER: ${{ secrets.INSTALLATION_USER }} + INSTALLATION_PW: ${{ secrets.INSTALLATION_PW }} + with: + configPath: .lighthouserc.js + uploadArtifacts: true + temporaryPublicStorage: true + + - name: Check Lighthouse audit result + if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true' + run: | + if [ -z "${{ steps.lighthouse.outputs.manifest }}" ]; then + echo "Error: Lighthouse audit failed - no manifest output" + exit 1 + fi - // Lighthouse artifact - const lighthouseArtifact = artifacts.artifacts.find(a => a.name === 'lighthouse-report'); - const lighthouseArtifactUrl = lighthouseArtifact - ? `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}/artifacts/${lighthouseArtifact.id}` - : null; - - // Lighthouse - const manifest = '${{ steps.lighthouse.outputs.manifest }}'; - let manifestJson; - try { - manifestJson = JSON.parse(manifest); - if (!Array.isArray(manifestJson) || manifestJson.length === 0) { - throw new Error('Invalid manifest format or empty results'); + if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq . > /dev/null 2>&1; then + echo "Error: Lighthouse audit failed - invalid manifest format" + exit 1 + fi + + if ! echo '${{ steps.lighthouse.outputs.manifest }}' | jq 'length > 0' > /dev/null 2>&1; then + echo "Error: Lighthouse audit failed - no URLs were successfully audited" + exit 1 + fi + + # Fail if any page scored below 0.9 on accessibility + below=$(echo '${{ steps.lighthouse.outputs.manifest }}' | jq '[.[] | select(.summary.accessibility < 0.9)] | length') + if [ "$below" -gt 0 ]; then + echo "Error: $below page(s) scored below 0.9 on accessibility" + echo '${{ steps.lighthouse.outputs.manifest }}' | jq -r '.[] | select(.summary.accessibility < 0.9) | "\(.url): \(.summary.accessibility)"' + exit 1 + fi + + - name: Post Lighthouse results comment + if: steps.generate_urls.outputs.skip != 'true' && steps.check_preview.outputs.preview_exists == 'true' + uses: actions/github-script@v6 + env: + LIGHTHOUSE_MODE: ${{ env.LIGHTHOUSE_MODE }} + LIGHTHOUSE_DEPTH: ${{ env.LIGHTHOUSE_DEPTH }} + with: + script: | + const fs = require('fs'); + const prNumber = parseInt(process.env.PR_NUMBER, 10); + const runId = context.runId; + const baseUrl = process.env.PREVIEW_URL; + const commitSha = process.env.COMMIT_SHA; + const commitShaShort = process.env.COMMIT_SHA_SHORT; + const mode = process.env.LIGHTHOUSE_MODE; + const depth = process.env.LIGHTHOUSE_DEPTH; + + let metadata = {}; + try { + metadata = JSON.parse(fs.readFileSync('lighthouse-metadata.json', 'utf8')); + } catch (e) { + console.log('No metadata file:', e.message); } - } catch (error) { - console.error('Error parsing Lighthouse manifest:', error); - await github.rest.issues.createComment({ + + const manifest = '${{ steps.lighthouse.outputs.manifest }}'; + let manifestJson; + try { + manifestJson = JSON.parse(manifest); + if (!Array.isArray(manifestJson) || manifestJson.length === 0) { + throw new Error('Invalid manifest'); + } + } catch (error) { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: `## Lighthouse check results\n\n⚠️ WARN: Failed to parse Lighthouse results. [Workflow run](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId})`, + }); + return; + } + + const { data: comments } = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, - issue_number: context.issue.number, - body: `## Lighthouse check results\n\n⚠️ WARN: Failed to parse Lighthouse results. Please check the [workflow run](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}) for details.` + issue_number: prNumber, }); - return; - } - - // Delete old Lighthouse comments - const { data: comments } = await github.rest.issues.listComments({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - }); - - // Delete any previous comments from this workflow - for (const comment of comments) { - if (comment.user.login === 'github-actions[bot]' && - comment.body.includes('## Lighthouse check results')) { - try { - console.log(`Deleting Lighthouse comment ${comment.id}`); + for (const comment of comments) { + if (comment.user.login === 'github-actions[bot]' && + comment.body.includes('## Lighthouse check results')) { await github.rest.issues.deleteComment({ owner: context.repo.owner, repo: context.repo.repo, comment_id: comment.id, }); - console.log(`Successfully deleted Lighthouse comment ${comment.id}`); - } catch (error) { - console.error(`Failed to delete Lighthouse comment ${comment.id}:`, error); } } - } - - // Calculate average accessibility score - const scores = manifestJson.map(run => run.summary.accessibility); - const avgScore = scores.reduce((a, b) => a + b, 0) / scores.length; - const lighthouseScore = avgScore.toFixed(2); - - const lighthouseReportUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}`; - let lighthouseComment = ''; - if (parseFloat(lighthouseScore) >= 0.9) { - lighthouseComment = `✓ INFO: Average accessibility score is **${lighthouseScore}** (required: >0.9) — [View the workflow run](${lighthouseReportUrl})`; - } else { - lighthouseComment = `⚠️ WARN: Average accessibility score is **${lighthouseScore}** (required: >0.9) — [Check the workflow run](${lighthouseReportUrl})`; - } - const stripAuth = url => { - try { - const u = new URL(url); - u.username = ''; - u.password = ''; - return u.toString(); - } catch { - return url; - } - }; + const scores = manifestJson.map(run => run.summary.accessibility); + const avgScore = (scores.reduce((a, b) => a + b, 0) / scores.length).toFixed(2); + const lighthouseReportUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${runId}`; + const lighthouseComment = parseFloat(avgScore) >= 0.9 + ? `✓ INFO: Average accessibility score is **${avgScore}** (required: ≥0.9) — [View the workflow run](${lighthouseReportUrl})` + : `⚠️ WARN: Average accessibility score is **${avgScore}** (required: ≥0.9) — [Check the workflow run](${lighthouseReportUrl})`; - // Helper to get the public report URL from htmlPath - const getReportUrl = (run) => { - if (run.report && Array.isArray(run.report)) { - // Find the public .report.html URL - const htmlReport = run.report.find(r => r.endsWith('.report.html') && r.startsWith('http')); - if (htmlReport) return htmlReport; - // Fallback: first report if available - if (run.report.length > 0) return run.report[0]; - } - // Fallback: just show the workflow run if nothing else - return lighthouseReportUrl; - }; + const stripAuth = url => { + try { + const u = new URL(url); + u.username = ''; + u.password = ''; + return u.toString(); + } catch { + return url; + } + }; - // Parse the links output from the Lighthouse step - const links = (() => { - try { - return JSON.parse(`${{ steps.lighthouse.outputs.links }}`); - } catch { - return {}; + const links = (() => { + try { + return JSON.parse(`${{ steps.lighthouse.outputs.links }}`); + } catch { + return {}; + } + })(); + + const scoresTable = manifestJson + .map(run => { + const formatScore = score => score === null ? 'N/A' : score.toFixed(2); + const displayPath = stripAuth(run.url).replace(baseUrl, '') || run.url; + const reportUrl = links[run.url] || lighthouseReportUrl; + return `| [${displayPath}](${reportUrl}) | ${formatScore(run.summary.accessibility)} | ${formatScore(run.summary.performance)} | ${formatScore(run.summary['best-practices'])} | ${formatScore(run.summary.seo)} |`; + }) + .join('\n'); + + const modeLine = mode === 'changed' + ? `Audit mode: **changed pages** (${metadata.paths?.length || manifestJson.length} URL(s))` + : `Audit mode: **depth ${depth}** (sitemap)`; + + let comment = `## Lighthouse check results\n\n`; + comment += `${lighthouseComment}\n\n`; + comment += `${modeLine}\n\n`; + comment += `
\nShow Lighthouse scores\n\n`; + comment += `Commit SHA: [${commitShaShort}](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/commit/${commitSha})\n\n`; + if (metadata.global_fallback) { + comment += `_Global site files changed — audited root navigation pages._\n\n`; } - })(); - - const scoresTable = manifestJson - .map(run => { - const formatScore = (score) => score === null ? 'N/A' : score.toFixed(2); - const displayPath = stripAuth(run.url).replace(baseUrl, ''); - // Use the public report URL from the links output, fallback to workflow run if missing - const reportUrl = links[run.url] || lighthouseReportUrl; - return `| [${displayPath}](${reportUrl}) | ${formatScore(run.summary.accessibility)} | ${formatScore(run.summary.performance)} | ${formatScore(run.summary['best-practices'])} | ${formatScore(run.summary.seo)} |`; - }) - .join('\n'); - - let comment = `## Lighthouse check results\n\n`; - comment += `${lighthouseComment}\n\n`; - comment += `
\nShow Lighthouse scores\n\n`; - comment += `Folder depth level checked: **${process.env.DEPTH}**\n\n`; - comment += `Commit SHA: [${commitShaShort}](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/commit/${commitSha})\n\n`; - comment += `Modify the workflow to check a different depth:\n`; - comment += `- 0: Top-level navigation only — /index.html, /guide/guides.html, ...\n`; - comment += `- 1: All first-level subdirectories — /guide/\*.html, /developer/\*.html, ...\n`; - comment += `- 2: All second-level subdirectories — /guide/attestation/\*.html, ...\n\n`; - comment += `| Page | Accessibility | Performance | Best Practices | SEO |\n`; - comment += `|------|---------------|-------------|----------------|-----|\n`; - comment += `${scoresTable}\n\n`; - comment += `
\n\n`; - - await github.rest.issues.createComment({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: context.issue.number, - body: comment - }); + comment += `For a thorough audit, run the **Lighthouse check** workflow manually (Actions → Lighthouse check → Run workflow) with depth 0–2, or add the \`lighthouse:full\` label for depth 2 on the next validate run.\n\n`; + comment += `| Page | Accessibility | Performance | Best Practices | SEO |\n`; + comment += `|------|---------------|-------------|----------------|-----|\n`; + comment += `${scoresTable}\n\n`; + comment += `
\n\n`; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: prNumber, + body: comment, + }); diff --git a/.github/workflows/publish-llm-markdown.yaml b/.github/workflows/publish-llm-markdown.yaml index e5500fe568..3bb4b307bc 100644 --- a/.github/workflows/publish-llm-markdown.yaml +++ b/.github/workflows/publish-llm-markdown.yaml @@ -39,10 +39,11 @@ jobs: with: version: ${{ vars.QUARTO_VERSION }} - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python3 - - echo "$HOME/.local/bin" >> $GITHUB_PATH + - name: Set up uv + uses: astral-sh/setup-uv@v5 + + - name: Set up uv + uses: astral-sh/setup-uv@v5 - name: Generate Python library docs run: | diff --git a/.github/workflows/validate-docs-site.yaml b/.github/workflows/validate-docs-site.yaml index 309a7b302f..f9a7fc80f6 100644 --- a/.github/workflows/validate-docs-site.yaml +++ b/.github/workflows/validate-docs-site.yaml @@ -5,6 +5,7 @@ on: types: [opened, synchronize, ready_for_review] permissions: + actions: write issues: write pull-requests: write @@ -65,6 +66,9 @@ jobs: src/backend/templates/documentation/model_documentation sparse-checkout-cone-mode: true + - name: Set up uv + uses: astral-sh/setup-uv@v5 + - name: Verify copyright headers run: | cd site @@ -75,11 +79,6 @@ jobs: with: version: pre-release - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python3 - - echo "$HOME/.local/bin" >> $GITHUB_PATH - - name: Generate Python library docs run: | cd site/_source/validmind-library @@ -91,8 +90,7 @@ jobs: - name: Generate template schema docs run: | - pip install json-schema-for-humans - BACKEND_ROOT=site/_source/backend python scripts/generate_template_schema_docs.py + BACKEND_ROOT=site/_source/backend uv run --with json-schema-for-humans python scripts/generate_template_schema_docs.py - name: Populate installation run: cp -r site/_source/installation/site/installation site/installation @@ -164,15 +162,48 @@ jobs: body: comment }); + - name: Trigger Lighthouse check + uses: actions/github-script@v6 + with: + script: | + await github.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'lighthouse-check.yaml', + ref: context.payload.pull_request.head.ref, + inputs: { + mode: 'changed', + depth: '0', + pr_number: String(context.issue.number), + }, + }); + console.log(`Dispatched Lighthouse check for PR #${context.issue.number}`); + - name: Install pandoc run: | sudo apt-get update sudo apt-get install -y pandoc + - name: Verify chatbot product map is up to date + run: | + python3 site/scripts/generate_chatbot_product_map.py + git diff --exit-code site/llm/chatbot-product-map.md site/llm/chatbot-product-map-frontend-snapshot.json + + - name: Test chatbot product map generator + run: python3 -m unittest discover -s site/scripts -p 'test_generate_chatbot_product_map.py' -v + - name: Validate LLM markdown render run: bash llm/render.sh && bash llm/clean.sh working-directory: site + - name: Verify LLM corpus includes product map and docs IA hub + run: | + test -f site/llm/_llm-output/chatbot-product-map.md + test -f site/llm/_llm-output/AGENTS.md + test -f site/llm/_llm-output/about/using-the-documentation.md + test ! -f site/llm/_llm-output/about/contributing/validmind-community.md + test ! -d site/llm/_llm-output/about/contributing/style-guide + # Release headroom and shrink before final lightweight steps & post-job - name: Release reserve & shrink if: always() diff --git a/AGENTS.md b/AGENTS.md index b1c662e0e7..52f3c75e8c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -6,7 +6,7 @@ This repository contains the source files for [ValidMind's documentation site](h For an overview of the documentation structure and how to navigate it, see: -**[Using the documentation](https://docs.validmind.ai/about/contributing/using-the-documentation.html)** +**[Using the documentation](https://docs.validmind.ai/about/using-the-documentation.html)** This page explains: - How the documentation is organized @@ -32,6 +32,16 @@ If you are an AI agent embedded in ValidMind, your capabilities are documented h This page describes what the assistant can and cannot do, including context-aware features and current limitations. +## Product UI mapping + +The in-app assistant (Valerie) also ingests **`chatbot-product-map.md`** in the LLM corpus. That file maps **platform routes** (for example `/settings/workflows`, `/model-inventory`, `/dashboard`) to documentation URLs and section hints. + +Route and help-link data from the product UI is vendored as **`site/llm/chatbot-product-map-frontend-snapshot.json`**. Regenerate it with `make -C site refresh-chatbot-product-map` when frontend routes or `helpLink` values change (requires a local `validmind/frontend` checkout). + +Use the map when the user’s question is tied to where they are in the product — especially **Settings**, where the UI groups features differently than the documentation sidebars (Configuration, Workflows, Inventory, and so on). + +For documentation organized by topic, continue to use **Using the documentation** (above) and the section table in this file. + ## File format Documentation is written in Quarto Markdown (`.qmd`). Key conventions: diff --git a/README.md b/README.md index 8de8105fa5..d35a5c978f 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,19 @@ The script reads from: Output: Content is injected directly into `site/guide/templates/customize-document-templates.qmd` between marker comments. +#### Chatbot product map and LLM corpus + +The in-app assistant (Valerie) uses generated files under `site/llm/`, including `chatbot-product-map.md` (platform routes mapped to docs URLs and section headings). CI regenerates that map and fails if it is out of date with your changes. + +If you edit `.qmd` files that affect linked docs or headings (for example FAQ or guide pages referenced from the product UI), regenerate and commit the map before opening or updating a pull request: + +```bash +cd site +make generate-chatbot-product-map +``` + +If product routes or in-app help links changed, use `make refresh-chatbot-product-map` instead (requires a local `validmind/frontend` checkout). See [`site/llm/README.md`](site/llm/README.md) for the full LLM render pipeline, snapshot maintenance, and when to refresh each artifact. + #### Stylesheet organization (IN PROGRESS) The site uses a modular stylesheet architecture to maintain organized and maintainable styles: @@ -212,7 +225,7 @@ When constructing links, refer to the `.qmd` file as Quarto will properly render | Correct | Incorrect | |---|---| -| `[Quickstart — Model Development](get-started/developer/quickstart-developer.qmd)` | `[Quickstart — Model Development](get-started/developer/quickstart-developer.html)` | +| `[Quickstart — Development](get-started/developer/quickstart-developer.qmd)` | `[Quickstart — Development](get-started/developer/quickstart-developer.html)` | When constructing filepaths, including while using [Quarto's Includes](https://quarto.org/docs/authoring/includes.html) (single-sourcing feature), you'll also want start with the root directory whenever possible as this minimizes usage of unclear relative paths: @@ -379,24 +392,22 @@ Similarly, http://localhost:4444/ in your browsers should show an all green logo ## Configuring Lighthouse checks -Lighthouse is an open-source tool that audits web pages for accessibility, performance, best practices, and SEO. We automatically run Lighthouse against PR preview sites to enable a better, accessible documentation for everyone. +Lighthouse is an open-source tool that audits web pages for accessibility, performance, best practices, and SEO. We automatically run Lighthouse against PR preview sites when **Validate docs site** finishes deploying a preview (it dispatches the Lighthouse workflow on the PR branch). -By default, Lighthouse checks only the top-level pages in our site navigation, such as `/index.html`, `/guide/guides.html`, `/developer/validmind-library.html`, and so forth. You can configure this behavior in the workflow: +**Default (every PR):** Lighthouse audits only HTML pages that correspond to files changed under `site/` in the pull request. If you change shared layout files (`_quarto.yml`, `theme.scss`, `_variables.yml`, `_extensions/`, and similar), it falls back to the root navigation pages (`index.html`, `guide/guides.html`, and so on). -```sh -env: - # To change the default depth level: - # 0 — Top-level navigation only (e.g. /index.html, /guide/guides.html, /developer/validmind-library.html, etc.) - # 1 — All first-level subdirectories (e.g. /guide/*.html) - # 2 — All second-level subdirectories (e.g. /guide/attestation/*.html) - # Note: While the crawler technically supports deeper levels, expect the workflow to take >2-12 hours to complete - DEFAULT_DEPTH: '0' -``` +**Thorough audit:** + +- Add the `lighthouse:full` label to a PR to run a depth-2 sitemap audit on the next successful validate run. +- Or run the **Lighthouse check** workflow manually from Actions → **Run workflow**, set the PR number, and choose depth `0` (root pages), `1` (first-level sections), or `2` (second-level). Depths above zero can take hours; use them on feature branches only. + +The PR comment lists audited URLs, the commit SHA, and accessibility scores (required: ≥ 0.9 per page). **Tips:** -- On the first run, the workflow waits for a preview site to become available. For subsequent runs, it checks the currently available site, which may be behind HEAD. The PR comment shows which commit SHA was checked — rerun the check if needed. -- Use folder depths greater than zero only on working branches when you need a thorough site audit. Deeper checks take 2-12 hours to complete and significantly slow down the CI/CD pipeline. Do not merge depth changes to `main`. +- Lighthouse starts only after validate succeeds, so it no longer polls for up to 45 minutes. +- If a PR changes only CI or repo metadata (no `site/` pages), Lighthouse skips with an informational comment. +- Re-run validate (or push a commit) if the preview comment SHA does not match the commit you expect audited. ## Monitoring diff --git a/site/Makefile b/site/Makefile index d3372f286c..22f6027d50 100644 --- a/site/Makefile +++ b/site/Makefile @@ -9,12 +9,11 @@ SRC_ROOT := _source SRC_DIR := $(SRC_ROOT)/validmind-library DEST_DIR_NB := notebooks DEST_DIR_PYTHON := validmind -DEST_DIR_TESTS := tests SRC_ROOT := _source SRC_DIR := $(SRC_ROOT)/validmind-library # Define .PHONY target for help section -.PHONY: help add-copyright clean clone copy-installation copy-release-notes delete-demo-branch deploy-demo-branch deploy-prod deploy-staging docker-build docker-serve docker-site docker-site-lite docs-site execute generate-sitemap get-api-json get-source kind-serve kind-stop kind-restart kind-logs notebooks python-docs release-notes render-llm template-schema-docs test-descriptions verify-copyright yearly-releases +.PHONY: help add-copyright clean clone copy-installation copy-release-notes delete-demo-branch deploy-demo-branch deploy-prod deploy-staging docker-build docker-serve docker-site docker-site-lite docs-site execute generate-chatbot-product-map refresh-chatbot-product-map generate-sitemap get-api-json get-source kind-serve kind-stop kind-restart kind-logs notebooks python-docs release-notes render-llm template-schema-docs verify-copyright yearly-releases # Help section help: @@ -39,10 +38,12 @@ help: @echo " docker-site Get source, render site with Docker profile, execute notebooks" @echo " docker-site-lite Get source and render site with Docker profile (skips notebook execution)" @echo " docs-site Get all source files and render the production docs site with Quarto" + @echo " generate-chatbot-product-map Generate product-to-docs map (from committed frontend snapshot)" + @echo " refresh-chatbot-product-map Re-extract frontend snapshot + map (requires ../frontend)" @echo " generate-sitemap Generate a sitemap for the static HTML site" @echo " execute Execute a Jupyter Notebook or notebook directory" @echo " get-api-json Download Swagger JSON specs from ValidMind APIs into reference/" - @echo " get-source Get all source files (clean, clone, copy-installation, copy-release-notes, notebooks, python-docs, test-descriptions, get-api-json)" + @echo " get-source Get all source files (clean, clone, copy-installation, copy-release-notes, notebooks, python-docs, get-api-json)" @echo " kind-serve Set up Kind cluster for ValidMind docs" @echo " kind-stop Stop Kind cluster for ValidMind docs" @echo " kind-restart Restart Kind cluster for ValidMind docs" @@ -53,7 +54,6 @@ help: @echo " Examples: TAG=cmvm/25.07 or TAG=validmind-library/v2.8.22" @echo " template-schema-docs Generate template schema documentation from backend" @echo " render-llm Render site to GFM markdown for LLM ingestion (mirrors CI)" - @echo " test-descriptions Copy the ValidMind tests docs into tests/" @echo " verify-copyright Verify that all .qmd and .yml/.yaml files have copyright headers" @echo " yearly-releases Collate releases by year into a listing landing and update releases sidebar" @@ -177,12 +177,12 @@ execute: elif [ "$$PROFILE_CHOICE" = "3" ]; then PROFILE="exe-prod"; \ else echo "Invalid choice"; exit 1; fi; \ echo "Select FILE_PATH:"; \ - echo "1) notebooks/EXECUTED/model_development"; \ - echo "2) notebooks/EXECUTED/model_validation"; \ + echo "1) notebooks/EXECUTED/development"; \ + echo "2) notebooks/EXECUTED/validation"; \ echo "3) Enter custom filepath"; \ read -p "Enter choice [1-3]: " FILE_CHOICE; \ - if [ "$$FILE_CHOICE" = "1" ]; then FILE_PATH="notebooks/EXECUTED/model_development"; \ - elif [ "$$FILE_CHOICE" = "2" ]; then FILE_PATH="notebooks/EXECUTED/model_validation"; \ + if [ "$$FILE_CHOICE" = "1" ]; then FILE_PATH="notebooks/EXECUTED/development"; \ + elif [ "$$FILE_CHOICE" = "2" ]; then FILE_PATH="notebooks/EXECUTED/validation"; \ elif [ "$$FILE_CHOICE" = "3" ]; then read -p "Enter custom FILE_PATH: " CUSTOM_FILE_PATH; FILE_PATH="$$CUSTOM_FILE_PATH"; \ else echo "Invalid choice"; exit 1; fi; \ read -p "Enter VM_API_HOST (default: https://api.prod.validmind.ai/api/v1/tracking): " INPUT_VM_API_HOST; \ @@ -284,7 +284,7 @@ get-api-json: @curl -s -o reference/rapidoc-min.js "https://unpkg.com/rapidoc/dist/rapidoc-min.js" || echo "Failed to fetch RapiDoc library" # Get all source files -get-source: clean clone copy-installation copy-release-notes notebooks python-docs test-descriptions template-schema-docs +get-source: clean clone copy-installation copy-release-notes notebooks python-docs template-schema-docs # Requires that you've run `make docker-build` kind-serve: @@ -324,12 +324,12 @@ notebooks: @rm -f notebooks.zip @rm -rf $(DEST_DIR_NB)/ && mkdir -p $(DEST_DIR_NB) @cp -r $(SRC_DIR)/notebooks/. $(DEST_DIR_NB)/ - @echo "Duplicating all files from notebooks/tutorials/model_development/ for execution" - @mkdir -p notebooks/EXECUTED/model_development/ - @cp -r notebooks/tutorials/model_development/* notebooks/EXECUTED/model_development/ - @echo "Duplicating all files from notebooks/tutorials/model_validation/ for execution" - @mkdir -p notebooks/EXECUTED/model_validation/ - @cp -r notebooks/tutorials/model_validation/* notebooks/EXECUTED/model_validation/ + @echo "Duplicating all files from notebooks/tutorials/development/ for execution" + @mkdir -p notebooks/EXECUTED/development/ + @cp -r notebooks/tutorials/development/* notebooks/EXECUTED/development/ + @echo "Duplicating all files from notebooks/tutorials/validation/ for execution" + @mkdir -p notebooks/EXECUTED/validation/ + @cp -r notebooks/tutorials/validation/* notebooks/EXECUTED/validation/ @echo "Copying LICENSE into notebooks ..." @cp -r $(SRC_DIR)/LICENSE $(DEST_DIR_NB)/ @rm -rf $(DEST_DIR_NB)/code_sharing @@ -430,16 +430,6 @@ template-schema-docs: @python -m pip install -q json-schema-for-humans @BACKEND_ROOT=$(SRC_ROOT)/backend python ../scripts/generate_template_schema_docs.py -test-descriptions: - @echo "\nUpdating test descriptions source ..." - @cd _source/validmind-library && make install && poetry run python scripts/extract_descriptions.py validmind/tests - @cd ../../ - @rm -rf $(DEST_DIR_TESTS) - @mkdir -p $(DEST_DIR_TESTS) - @cp -r $(SRC_DIR)/build/_test_descriptions/validmind/tests/. $(DEST_DIR_TESTS) - @echo "Copying _metadata.yml into tests/ ..." - @cp developer/_metadata.yml $(DEST_DIR_TESTS)/_metadata.yml - verify-copyright: @echo "\nVerifying copyright headers in .qmd and .yml/.yaml files ..." @cd .. && python site/scripts/verify_copyright_qmd.py @@ -451,6 +441,14 @@ yearly-releases: git status | grep -v 'release-scripts/' quarto preview +# Generate product-to-documentation map for chatbot RAG (uses committed frontend snapshot) +generate-chatbot-product-map: + @python3 scripts/generate_chatbot_product_map.py + +# Refresh vendored frontend snapshot from a local validmind/frontend checkout +refresh-chatbot-product-map: + @python3 scripts/generate_chatbot_product_map.py --from-frontend + # Render site to GFM markdown for LLM ingestion render-llm: @echo "\nRendering site to GFM markdown for LLM ingestion ..." diff --git a/site/_quarto.yml b/site/_quarto.yml index 28da8c4f4e..977562cd82 100644 --- a/site/_quarto.yml +++ b/site/_quarto.yml @@ -95,7 +95,7 @@ website: - text: "Model risk management" file: about/use-cases/model-risk-management.qmd contents: - - about/use-cases/sr-11-7.qmd + - about/use-cases/sr-26-2.qmd - about/use-cases/ss1-23.qmd - about/use-cases/e-23.qmd - text: "---" @@ -103,7 +103,7 @@ website: - text: "Library and platform" file: about/library-and-platform.qmd contents: - - about/overview-model-documentation.qmd + - about/overview-documentation.qmd - about/overview-llm-features.qmd - text: "Deployment options" file: about/deployment/deployment-options.qmd @@ -125,7 +125,7 @@ website: - text: "Software license agreement" file: about/fine-print/license-agreement.qmd - text: "---" - - about/contributing/using-the-documentation.qmd + - about/using-the-documentation.qmd - title: "Support" contents: diff --git a/site/about/contributing/style-guide/conventions.qmd b/site/about/contributing/style-guide/conventions.qmd index 351ed06336..17fc6e2c78 100644 --- a/site/about/contributing/style-guide/conventions.qmd +++ b/site/about/contributing/style-guide/conventions.qmd @@ -125,7 +125,7 @@ Column 2, 50% wide Sometimes, it's helpful to highlight a call to action with a button that takes you to a topic or to a notebook on JupyterHub. -Change any Markdown link into a our theme-styled button by appending `{.button}`: +Change any Markdown link into one of our theme-styled buttons by appending `{.button}`: :::: {.flex .flex-wrap .justify-around} @@ -167,8 +167,8 @@ Using a markdown button also enables you to link to to the `.qmd` path instead o ```markdown - The record is registered in the inventory.[^1] - - You've already customized your model lifecycle statuses for use in workflows.[^2] - - Workflows have already been set up for use with your models.[^3] + - You've already customized your record stages for use in workflows.[^2] + - Workflows have already been set up for use with your records.[^3] - You are assigned a role that has access to complete actions set up by workflows.[^5] @@ -344,7 +344,7 @@ Use backticks to enclose keyboard commands, parameters, field values, and file n | Correct | Incorrect | |------|-----| -| Learn how to store model identifier credentials in a `.env` file instead of using inline credentials. | Learn how to store model identifier credentials in a ".env" file instead of using inline credentials. | +| Learn how to store record identifier credentials in a `.env` file instead of using inline credentials. | Learn how to store record identifier credentials in a ".env" file instead of using inline credentials. | | For example, the `classifier_full_suite` test suite runs tests from the `tabular_dataset` and `classifier` test suites to fully document the data and model sections for binary classification model use cases. | For example, the "classifier_full_suite" test suite runs tests from the "tabular_dataset" and "classifier" test suites to fully document the data and model sections for binary classification model use cases. | | Under When these conditions are met, you are able to set both `AND` and `OR` conditions. | Under When these conditions are met, you are able to set both "AND" and "OR" conditions.| : **Backtick** examples {.hover} @@ -359,7 +359,7 @@ Use backticks to enclose keyboard commands, parameters, field values, and file n : **Mathematical formula** examples {.hover} ## Proper nouns -In the context of model risk management, proper nouns include specific models, laws, or regulations, such as "Basel IV" or "SR 11-7." These refer to specific frameworks or guidelines and you spell them with initial capital letters or exactly as indicated by official sources. +In the context of model risk management, proper nouns include specific models, laws, or regulations, such as "Basel IV" or "SR 26-2." These refer to specific frameworks or guidelines and you spell them with initial capital letters or exactly as indicated by official sources. - Terms that are not proper nouns include general concepts such as "model validation," "stress testing," "risk assessment," and "backtesting." - These are common terms in the field and are not capitalized unless starting a sentence. @@ -380,7 +380,7 @@ Within our documentation (`https://docs.validmind.ai/`), you are able to referen | Product Name | Variable Key | Description | |---:|---|---| -| {{< var validmind.product >}} | `{{{< var validmind.product >}}}` | Comphrensive suite of tools with a {{< var vm.developer >}} for documenting and testing models, alongside a {{< var vm.platform >}} hosting cloud-based tools, APIs, databases, and validation engines. | +| {{< var validmind.product >}} | `{{{< var validmind.product >}}}` | Comprehensive suite of tools with a {{< var vm.developer >}} for documenting and testing records (such as models), alongside a {{< var vm.platform >}} hosting cloud-based tools, APIs, databases, and validation engines. | | {{< var validmind.developer >}} | `{{{< var validmind.developer >}}}` | Open-source library that connects to the {{< var validmind.platform >}}. | | {{< var validmind.platform >}} | `{{{< var validmind.platform >}}}` | Hosted multi-tenant architecture that includes a cloud-based web interface. | | {{< var validmind.api >}} | `{{{< var validmind.api >}}}` | Used to make calls to the {{< var validmind.developer >}}.[^21] | @@ -438,7 +438,7 @@ From **{{< fa gear >}} Settings** in the {{< var validmind.platform >}},
yo - Set up your organization - Onboard new users - Manage roles, groups and
permissions -- Configure the model inventory +- Configure the inventory - Manage templates and workflows - And much more! diff --git a/site/about/contributing/style-guide/voice-and-tone.qmd b/site/about/contributing/style-guide/voice-and-tone.qmd index 60b20dcbd4..f43495e50b 100644 --- a/site/about/contributing/style-guide/voice-and-tone.qmd +++ b/site/about/contributing/style-guide/voice-and-tone.qmd @@ -47,7 +47,7 @@ Behind every page, there’s a person. In every word, lies an opportunity to win | Correct | Incorrect | |------|-----| | **User acknowledgement:** Documenting artifacts can be difficult and tedious for even the most seasoned of validators. | **User dismissal:** For experienced validators, documenting artifacts is a breeze. | -| **Success toast:** Nice work — you’ve successfully registered your first model! | **Inappropriate humor:** We lost your model documentation, oops! Here, have a pony! (e.g. error message for serious issue) | +| **Success toast:** Nice work — you’ve successfully registered your first record! | **Inappropriate humor:** We lost your documentation, oops! Here, have a pony! (e.g. error message for serious issue) | : **Empathy & humor** examples {.hover} ### Be positive @@ -82,7 +82,7 @@ Address the reader directly by using the second person. | Correct | Incorrect | |------|-----| -| After completing this quickstart, you will be able to view your test results as part of your model documentation right in the {{< var validmind.platform >}}. | After completing this quickstart, the model developer will be able to view the test results as part of the model documentation right in the {{< var validmind.platform >}}. | +| After completing this quickstart, you will be able to view your test results as part of your documentation right in the {{< var validmind.platform >}}. | After completing this quickstart, the developer will be able to view the test results as part of the documentation right in the {{< var validmind.platform >}}. | : **2nd person** examples {.hover} ### Avoid stiff formality @@ -92,7 +92,7 @@ Address the reader directly by using the second person. | Correct | Incorrect | |------|-----| -| Once you’ve registered the model, you can then grab the unique code snippet that will have been generated for you to use in the next step. | First, you must register the model as this will generate a unique code snippet that needs to be copied. Then, you need to retrieve the code snippet so that you can make use of it in the following step. | +| Once you’ve registered the record, you can then grab the unique code snippet that will have been generated for you to use in the next step. | First, you must register the record as this will generate a unique code snippet that needs to be copied. Then, you need to retrieve the code snippet so that you can make use of it in the following step. | : **Informal language** examples {.hover} ### Focus on teamwork diff --git a/site/about/contributing/validmind-community.qmd b/site/about/contributing/validmind-community.qmd index bc0996e66b..8dc36167ef 100644 --- a/site/about/contributing/validmind-community.qmd +++ b/site/about/contributing/validmind-community.qmd @@ -11,7 +11,7 @@ aliases: - /about/join-community.html --- -Work with financial models, in model risk management (MRM), or are simply enthusiastic about artificial intelligence (AI) and machine learning and how these tools are actively shaping our futures within the finance industry and beyond? Congratulations — you're already part of the {{< var vm.product >}} community! Come learn and play with us. +Work with financial models, in model risk management (MRM), in AI governance, or are simply enthusiastic about artificial intelligence (AI) and machine learning and how these tools are actively shaping our futures within the finance industry and beyond? Congratulations — you're already part of the {{< var vm.product >}} community! Come learn and play with us. ::: {.callout} diff --git a/site/about/deployment/deployment-options.qmd b/site/about/deployment/deployment-options.qmd index 9dab1db881..acb49defa8 100644 --- a/site/about/deployment/deployment-options.qmd +++ b/site/about/deployment/deployment-options.qmd @@ -26,23 +26,23 @@ Choose the {{< var vm.product >}} deployment option that best suits your organiz ![{{< var vm.product >}} architecture overview](validmind-architecture-overview.png){fig-alt="An image showing the ValidMind architecture"} -In your own environment, model developers can continue to run models using your existing tools for data science and model development, such as Python, Jupyter Notebooks, and R, accessing data from sources such as Google Cloud Storage, Amazon S3, and Snowflake. +In your own environment, developers can continue to run records (such as models) using your existing tools for data science and development, such as Python, Jupyter Notebooks, and R, accessing data from sources such as Google Cloud Storage, Amazon S3, and Snowflake. -These models are then integrated with the {{< var validmind.developer >}}, which communicates with the {{< var validmind.platform >}} via our {{< var validmind.api >}}. +These records are then integrated with the {{< var validmind.developer >}}, which communicates with the {{< var validmind.platform >}} via our {{< var validmind.api >}}. The {{< var validmind.platform >}} provides: -- **Model inventory** — Centralized tracking and organization of models, accessible by developers, validators, and executives. +- **Inventory** — Centralized tracking and organization of records, accessible by developers, validators, and executives. - **Documentation & validation engine** — Automated testing and documentation, with validation processes, ensuring compliance with regulations and internal policies. - **Template management** — Allows for easy creation, customization, and reuse of document templates. -- **{{< var vm.product >}} dashboard** — A user-friendly interface providing insights, status updates, and governance reporting for model risk. +- **{{< var vm.product >}} dashboard** — A user-friendly interface providing insights, status updates, and governance reporting for risk. ## Security & data privacy -We ensure data security through strong data isolation, encryption, and role-based access controls.[^1] Personal identifiable information and customer data are not stored in model documentation. For more information, see our data privacy policy.[^2] +We ensure data security through strong data isolation, encryption, and role-based access controls.[^1] Personal identifiable information and customer data are not stored in documentation. For more information, see our data privacy policy.[^2] ## Secure access diff --git a/site/about/deployment/system-access-requirements.qmd b/site/about/deployment/system-access-requirements.qmd index 9e0275968d..37586d99bf 100644 --- a/site/about/deployment/system-access-requirements.qmd +++ b/site/about/deployment/system-access-requirements.qmd @@ -10,7 +10,7 @@ Allow list the following domains in your organization’s firewall to ensure you ## ValidMind Library Python API access -To use our documentation automation tools and test suites for model developers and validators: +To use our documentation automation tools and test suites for developers and validators: ```html *.validmind.ai diff --git a/site/about/fine-print/data-privacy-policy.qmd b/site/about/fine-print/data-privacy-policy.qmd index 734d2c7f6a..2bb274fdc2 100644 --- a/site/about/fine-print/data-privacy-policy.qmd +++ b/site/about/fine-print/data-privacy-policy.qmd @@ -38,16 +38,16 @@ Understanding our policies shouldn’t feel like deciphering code, so we’ve ma The key points of our data privacy policy include: -- **No personal identifiable information in documentation** — When the {{< var validmind.developer >}} generates documentation, it ensures that no personally identifiable information (PII) is included. This practice is a critical part of our commitment to protecting your privacy and maintaining the confidentiality of your data. +- **No personally identifiable information in documentation** — When the {{< var validmind.developer >}} generates documentation, it ensures that no personally identifiable information (PII) is included. This practice is a critical part of our commitment to protecting your privacy and maintaining the confidentiality of your data. -- **No storage of customer data** — {{< var vm.product >}} does not retain any customer datasets or models. This policy is in place in order to protect your data privacy and security. By not storing this information, {{< var vm.product >}} minimizes the risk of unauthorized access or data breaches. +- **No storage of customer data** — {{< var vm.product >}} does not retain any customer datasets or records (models). This policy is in place in order to protect your data privacy and security. By not storing this information, {{< var vm.product >}} minimizes the risk of unauthorized access or data breaches. We believe it is important for users of {{< var vm.product >}}'s products to understand these practices as they reflect our dedication to data security and privacy. ::: {.callout-important} ## {{< var vm.product >}} does NOT: -- Include any personal identifiable information (PII) when generating documentation reports. -- Store any customer datasets or models. +- Include any personally identifiable information (PII) when generating documentation reports. +- Store any customer datasets or records (models). ::: ## Do you comply with the SOC 2 security standard? @@ -64,13 +64,13 @@ The {{< var validmind.vpv >}} option provides all our features and services but Access is available through AWS PrivateLink, Azure Private Link, or GCP Private Service Connect, all of which provide private connectivity between {{< var vm.product >}} and your on-premises network without exposing your traffic to the public internet. -## What model assets are imported into documentation? +## What record (model) assets are imported into documentation? When you generate documentation or run tests, {{< var vm.product >}} imports the following assets into the documentation via our {{< var validmind.api >}} endpoint integration: ![Artifacts imported into the documentation via our {{< var vm.api >}}](overview-api-integration.jpg){width=80% fig-alt="A representation of assets imported into the documentation via our Python API"} -- Metadata about datasets and models, used to look up programmatic documentation content, such as the stored definition for _common logistic regression limitations_ when a logistic regression model has been passed to the {{< var vm.product >}} test suite to be run. +- Metadata about datasets and records, used to look up programmatic documentation content, such as the stored definition for _common logistic regression limitations_ when a logistic regression model has been passed to the {{< var vm.product >}} test suite to be run. - Quality and performance metrics collected from datasets and models. - Output from tests and test suites that have been run. - Images, plots, visuals that were generated as part of extracting metrics and running tests. diff --git a/site/about/glossary/_ai-governance.qmd b/site/about/glossary/_ai-governance.qmd new file mode 100644 index 0000000000..4b9324e7c6 --- /dev/null +++ b/site/about/glossary/_ai-governance.qmd @@ -0,0 +1,42 @@ + + +AI ethics +: A set of principles and practices guiding the responsible design, development, and deployment of AI systems. Common tenets include fairness, transparency, accountability, privacy, and human well-being. + +AI lifecycle +: The end-to-end stages an AI system progresses through, including problem framing, data collection, model development, validation, deployment, monitoring, and retirement. Each stage carries distinct governance requirements. + +AI risk +: The potential for adverse outcomes — financial, reputational, ethical, regulatory, or societal — arising from the design, deployment, or use of AI systems. AI risk extends beyond traditional model risk to include concerns such as bias, opacity, misuse, and unintended consequences. + +algorithmic accountability +: The principle that organizations must take responsibility for the outcomes of the AI systems they deploy, including documenting decisions, monitoring performance, and providing mechanisms to identify and remediate harm. + +bias, algorithmic bias +: Systematic errors or unfair outcomes in AI system results that disproportionately affect specific groups. Sources include unrepresentative training data, flawed assumptions in system design, or feedback loops introduced during deployment. Detecting and mitigating bias is a core AI governance activity. + +EU AI Act +: A regulatory framework introduced by the European Union that classifies AI systems by risk tier^[**European Union:** [Regulation (EU) 2024/1689: Artificial Intelligence Act](https://eur-lex.europa.eu/eli/reg/2024/1689/oj)] — prohibited, high-risk, limited-risk, and minimal-risk — and imposes proportionate obligations such as risk management, data governance, transparency, human oversight, and conformity assessment. + +explainability +: The degree to which the internal mechanics or outputs of an AI system can be understood by humans. Explainability is a core requirement for high-risk AI systems and supports accountability, debugging, and regulatory review. + +fairness +: The principle that AI systems should produce equitable outcomes across individuals and groups. Fairness assessments are a routine part of bias evaluation and impact assessment within AI governance programs. + +ISO/IEC 42001 +: An international management system standard for artificial intelligence published by the International Organization for Standardization. Provides requirements for establishing, implementing, maintaining, and continually improving an AI management system within an organization. + +model card, system card +: A standardized document that summarizes an AI system's intended use, training data, performance characteristics, limitations, and ethical considerations. Model and system cards support transparency and informed deployment decisions.^[**Refer also to:** [documentation](#documentation)] + +NIST AI Risk Management Framework (AI RMF) +: A voluntary framework published by the U.S. National Institute of Standards and Technology to help organizations manage risks associated with AI. Organized around four core functions: govern, map, measure, and manage. + +responsible AI +: An umbrella approach to designing, building, and deploying AI systems in ways that are ethical, transparent, accountable, fair, and aligned with human values and societal expectations. + +transparency +: The disclosure of meaningful information about an AI system's design, data, capabilities, limitations, and decision-making processes to relevant stakeholders. Transparency supports trust, accountability, and informed oversight. diff --git a/site/about/glossary/_ai.qmd b/site/about/glossary/_ai.qmd index 5ef7bba2ff..b2b90b0106 100644 --- a/site/about/glossary/_ai.qmd +++ b/site/about/glossary/_ai.qmd @@ -5,40 +5,48 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> Refer to [IBM's series on artificial intelligence](https://www.ibm.com/think/artificial-intelligence) for more in-depth resources. AI governance -: The organizational framework for directing and overseeing how AI is designed, deployed, and used. It sets policy, accountability, and decision rights, covering ethics, compliance, risk appetite, lifecycle controls, and ongoing oversight across people, process, and technology. +: The organizational framework for directing and overseeing how AI is designed, deployed, and used. It sets policy, accountability, and decision rights, covering ethics, compliance, risk appetite, lifecycle controls, and ongoing oversight across people, process, and technology.^[**Refer also to:** [AI governance](#ai-governance)] AI system -: A combination of software, algorithms, and data designed to perform tasks that typically require human intelligence. In AI governance, an AI system is the primary unit of management, distinct from individual models. +: A combination of software, algorithms, and data designed to perform tasks that typically require human intelligence. In AI governance, an AI system is the primary unit of management, distinct from individual records (such as models).[^ai-system] AI use case : A specific application or deployment of AI technology to solve a business problem or achieve an objective. Use cases are often the unit of oversight in AI governance frameworks. artificial intelligence (AI) -: Artificial intelligence is a broad term used to classify machines that mimic human intelligence and human cognitive functions like problem-solving and learning. +: Artificial intelligence is a broad term used to classify machines that mimic human intelligence and human cognitive functions like problem-solving and learning. deep-learning -: A subset of machine learning that uses multi-layered neural networks (deep neural networks) to simulate the complex decision-making power of the human brain. +: A subset of machine learning that uses multi-layered neural networks (deep neural networks) to simulate the complex decision-making power of the human brain. generative AI (GenAI) : Generative AI refers to deep-learning models that can generate high-quality text, images, and other content based on the data they were trained on. human oversight -: Controls and processes ensuring human involvement in AI-driven decisions. Required by regulations like the EU AI Act for high-risk AI systems to enable human intervention and override capabilities. +: Controls and processes ensuring human involvement in AI-driven decisions. Required by regulations like the EU AI Act for high-risk AI systems to enable human intervention and override capabilities.^[**Refer also to:** [EU AI Act](./glossary.qmd#eu-ai-act)] impact assessment : An evaluation of the potential risks, harms, and consequences associated with deploying an AI system. Impact assessments are a core artifact in AI governance programs. large language model (LLM) -: Advanced types of artificial intelligence models designed to understand, generate, and interact with human language at a sophisticated level, such as ChatGPT.^[[ChatGPT](https://chat.openai.com)] +: An advanced type of artificial intelligence model designed to understand, generate, and interact with human language at a sophisticated level, such as ChatGPT.^[[ChatGPT](https://chat.openai.com)] -machine learning -: Machine learning is a subset of artificial intelligence that allows for optimization. It helps make predictions that minimize the errors that arise from merely guessing. +machine learning (ML) +: Machine learning is a subset of artificial intelligence that allows for optimization. It helps make predictions that minimize the errors that arise from merely guessing. risk tier -: A classification level assigned to an AI system based on its potential impact and risk. The EU AI Act defines tiers including prohibited, high-risk, limited-risk, and minimal-risk categories. +: A classification level assigned to an AI system based on its potential impact and risk. The EU AI Act defines tiers including prohibited, high-risk, limited-risk, and minimal-risk categories.^[**Refer also to:** [EU AI Act](./glossary.qmd#eu-ai-act)] -traditional statistical models -: Mathematical frameworks used to analyze and make inferences from data. These models are foundational in statistics and serve to explain relationships, predict outcomes, and guide decision-making across various fields, such as economics, biology, engineering, and social sciences. +traditional statistical model +: A mathematical framework used to analyze and make inferences from data. Traditional statistical models are foundational in statistics and serve to explain relationships, predict outcomes, and guide decision-making across various fields, such as economics, biology, engineering, and social sciences. use case owner : The individual accountable for an AI use case within an organization. Responsible for decisions about AI deployment, compliance, and ongoing oversight. + + + + +[^ai-system]: **Refer to:** + + - [record](#records) + - [model](#models) diff --git a/site/about/glossary/_attestation.qmd b/site/about/glossary/_attestation.qmd index 2ca29bdacf..75271ae84e 100644 --- a/site/about/glossary/_attestation.qmd +++ b/site/about/glossary/_attestation.qmd @@ -2,29 +2,29 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> -attestation -: A formal process where attestation participants certify key model information at a specific time. Attestation is part of your audit trail and confirms that governance, documentation, and control requirements are met. +attestation +: A formal process where attestation participants certify key record (model) information at a specific time. Attestation is part of your audit trail and confirms that governance, documentation, and control requirements are met. attestation instance -: The invocation of the attestation process on the {{< var validmind.platform >}}. Created when the attestation is triggered by the schedule you set up, it includes a snapshot with record activity and artifacts, questionnaire responses and review status, forming a full record of the review and approval process. +: The invocation of the attestation process on the {{< var validmind.platform >}}. Created when the attestation is triggered by the schedule you set up, it includes a snapshot with record activity and artifacts, questionnaire responses and review status, forming a full record of the review and approval process. attestation participant -: A user who participates in the attestation workflow as a submitter, reviewer, or approver. Submitters are assigned from model stakeholders; reviewers and approvers are assigned from organizational roles. +: A user who participates in the attestation workflow as a submitter, reviewer, or approver. Submitters are assigned from inventory record stakeholders; reviewers and approvers are assigned from organizational roles. -attestation period -: The time window during which attestation is active, with fixed start and end dates. Each period creates an unchanging model snapshot. Periods are usually scheduled quarterly or annually and can align with regulatory or internal cycles. +attestation period +: The time window during which attestation is active, with fixed start and end dates. Each period creates an unchanging record (model) snapshot. Periods are usually scheduled quarterly or annually and can align with regulatory or internal cycles. -attestation questionnaire -: A structured form that submitters use to confirm model status, documentation and compliance. It supports formatted inputs like checkboxes and text fields, serving as both a compliance check and formal review record. +attestation questionnaire +: A structured form that submitters use to confirm record (model) status, documentation and compliance. It supports formatted inputs like checkboxes and text fields, serving as both a compliance check and formal review record. -execution schedule +execution schedule : The mechanism, manual or automated, that starts the attestation process based on set periods. It creates attestation instances, triggers snapshots and begins the workflow for attestation participants. group -: An organizational unit that associates models with specific teams or functions. When reviewers or approvers are assigned by role, they can only act on models within groups they belong to — resulting in one attestation submission per model owner per group. +: An organizational unit that associates records (models) with specific teams or functions. When reviewers or approvers are assigned by role, they can only act on records within groups they belong to — resulting in one attestation submission per owner per group. inventory scope -: The filter conditions that define which models are included in an attestation. Scope can be set using rules based on model fields, stages, or custom attributes. +: The filter conditions that define which records (models) are included in an attestation. Scope can be set using rules based on fields, stages, or custom attributes. -snapshot -: A fixed capture of model data at a specific time. It includes optional custom fields and related artifacts and stays unchanged throughout the attestation, ensuring historical accuracy. \ No newline at end of file +snapshot +: A fixed capture of record (model) data at a specific time. It includes optional custom fields and related artifacts and stays unchanged throughout the attestation, ensuring historical accuracy. \ No newline at end of file diff --git a/site/about/glossary/_developer-tools.qmd b/site/about/glossary/_developer-tools.qmd index 0381ba6ff1..9dd5cb07bd 100644 --- a/site/about/glossary/_developer-tools.qmd +++ b/site/about/glossary/_developer-tools.qmd @@ -14,9 +14,9 @@ Decorators are a simpler way for users to run their own code as a {{< var vm.pro {{< include key_concepts/_parameters.qmd >}} pip -: A package manager for Python, used to install and manage software packages written in the Python programming language. +: A package manager for Python, used to install and manage software packages written in the Python programming language. -{{< var vm.product >}} uses the `pip` command to install the Python client library that is part of the {{< var validmind.developer >}} so that model developers can make use of its features. +{{< var vm.product >}} uses the `pip` command to install the Python client library that is part of the {{< var validmind.developer >}} so that developers can make use of its features. JupyterHub : A multi-user server provides a platform for users to interactively work with data science and scientific computing tools in a collaborative environment. @@ -33,4 +33,4 @@ Jupyter Notebook GitHub : A cloud-based platform that provides hosting for software development and version control using Git. GitHub^[[GitHub](https://github.com/)] offers collaboration tools such as bug tracking, feature requests, task management, and continuous integration pipelines. -{{< var vm.product >}} uses GitHub to share [pen-source software^[**GitHub:** [validmind](https://github.com/validmind/)] with you. +{{< var vm.product >}} uses GitHub to share open-source software^[**GitHub:** [validmind](https://github.com/validmind/)] with you. diff --git a/site/about/glossary/_documentation.qmd b/site/about/glossary/_documentation.qmd new file mode 100644 index 0000000000..fc4a038cd3 --- /dev/null +++ b/site/about/glossary/_documentation.qmd @@ -0,0 +1,23 @@ + + + + +{{< include documentation/_doc-intro.qmd >}} + +{{< include documentation/_conceptual-soundness.qmd >}} + +{{< include documentation/_data-preparation.qmd >}} + +{{< include documentation/_model-development.qmd >}} + +{{< include documentation/_monitoring-governance.qmd >}} + diff --git a/site/about/glossary/_model-documentation.qmd b/site/about/glossary/_model-documentation.qmd deleted file mode 100644 index b0c983236b..0000000000 --- a/site/about/glossary/_model-documentation.qmd +++ /dev/null @@ -1,23 +0,0 @@ - - - - -{{< include model_documentation/_doc-intro.qmd >}} - -{{< include model_documentation/_conceptual-soundness.qmd >}} - -{{< include model_documentation/_data-preparation.qmd >}} - -{{< include model_documentation/_model-development.qmd >}} - -{{< include model_documentation/_monitoring-governance.qmd >}} - diff --git a/site/about/glossary/_models.qmd b/site/about/glossary/_models.qmd index 0bc6ee8e3f..8c9ce74f80 100644 --- a/site/about/glossary/_models.qmd +++ b/site/about/glossary/_models.qmd @@ -4,14 +4,13 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> #### Models -model -: SR 11-7^[[SR 11-7: Guidance on Model Risk Management](https://www.federalreserve.gov/supervisionreg/srletters/sr1107.htm)] defines a model as a "quantitative method, system, or approach that applies statistical, economic, financial, or mathematical theories, techniques, and assumptions to process input data into quantitative estimates." +{{< include /about/glossary/key_concepts/_docs.qmd >}} + +{{< include /about/glossary/key_concepts/_models.qmd >}} model development : An iterative process in which many models are derived, tested, and built upon until a model fitting the desired criteria is achieved. -{{< include key_concepts/_docs.qmd >}} - :::: {.content-visible when-format="html" when-meta="includes.glossary"} -model inventory^[**Refer also to:** [{{< var vm.product >}} model inventory](./glossary.qmd#platform-model-inventory)] +model inventory^[**Refer also to:** [inventory](./glossary.qmd#inventory)] : A systematic and organized record of all quantitative and qualitative models used within an organization. This inventory facilitates oversight, tracking, and assessment by listing each model's purpose, characteristics, owners, validation status, and associated risks. :::: diff --git a/site/about/glossary/_mrm.qmd b/site/about/glossary/_mrm.qmd index 4872d9f341..31835071a4 100644 --- a/site/about/glossary/_mrm.qmd +++ b/site/about/glossary/_mrm.qmd @@ -13,27 +13,27 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> 3rd line of defense : Typically an internal audit function responsible for providing an independent and comprehensive review of the risk management processes and controls that the first two lines have implemented. -model developer +model developer, developer : Responsible for the design, implementation, and maintenance of models to ensure they are fit-for-purpose, accurate, and aligned with business requirements. As subject matter experts, they collaborate with model validators and other business units, ensuring the models are conceptually sound and robust. -model governance +model governance, governance : A framework of policies, procedures, and standards established to oversee the lifecycle of models within an organization. Ensures that models are developed, validated, implemented, and retired in a controlled and consistent manner, promoting accountability, transparency, and adherence to regulatory requirements. -model implementation +model implementation, implementation : A collaborative effort among model developers and model owners. Model implementation includes a formalized implementation plan and associated procedures, a review of results, and a record of model change procedures. -model owner +model owner, owner : Responsible for coordinating model development, model implementation, ongoing model monitoring and maintaining the model’s administration, such as model documentation and model risk reporting. -model user +model user, user : Those who rely on the model’s outputs to inform business decisions. -model validation +model validation, validation : A systematic process to evaluate and verify that a model is performing as intended, accurately represents the phenomena it is designed to capture, and is appropriate for its specified purpose. This assessment encompasses a review of the model's conceptual soundness, data integrity, calibration, and performance outcomes, as well as testing against out-of-sample datasets. Within model risk management, model validation ensures that potential risks associated with model errors, misuse, or misunderstanding are identified and mitigated. -model validator +model validator, validator : Responsible for conducting independent assessments of models to ensure their accuracy, reliability, and appropriateness for intended purposes. The role involves evaluating a model's conceptual soundness, data integrity, calibration methods, and overall performance, typically using out-of-sample datasets. Model validators identify potential risks and weaknesses, ensuring that models within an organization meet established standards and regulatory requirements, and provide recommendations to model developers for improvements or modifications. diff --git a/site/about/glossary/_validmind-features.qmd b/site/about/glossary/_validmind-features.qmd index 2be12d5923..4d3cc46369 100644 --- a/site/about/glossary/_validmind-features.qmd +++ b/site/about/glossary/_validmind-features.qmd @@ -8,16 +8,26 @@ client library, Python client library : Enables the interaction of your development environment with the {{< var validmind.platform >}} as part of the {{< var validmind.developer >}}. content block -: Content blocks provide you with sections that are part of a template, and are used in model documentation, validation reports, ongoing monitoring reports, and custom document types.^[[Work with content blocks](/guide/documentation/work-with-content-blocks.qmd)] +: A modular document template component. Content blocks are used to populate text and test results in documentation, validation reports, ongoing monitoring reports, and custom document types.^[[Work with content blocks](/guide/documentation/work-with-content-blocks.qmd)] documentation automation -: A core benefit of {{< var vm.product >}} that allows for the automatic creation of model documentation using predefined templates and test suites. +: A core benefit of {{< var vm.product >}} that allows for the automatic creation of documentation using predefined templates and test suites.[^test-suite] -model inventory -: A feature of the {{< var validmind.platform >}} where you can track, manage, and oversee the lifecycle of models. Covers the full model lifecycle, including customizable approval workflows for different user roles, status and activity tracking, and periodic revalidation. +inventory +: A feature of the {{< var validmind.platform >}} where you can track, manage, and oversee the lifecycle of your records (such as models). Covers the full record lifecycle, including customizable approval workflows for different user roles, status and activity tracking, and periodic revalidation. + +{{< include key_concepts/_records.qmd >}} {{< include key_concepts/_template.qmd >}} {{< include key_concepts/_test.qmd >}} -{{< include key_concepts/_test-suite.qmd >}} \ No newline at end of file +{{< include key_concepts/_test-suite.qmd >}} + + + + +[^test-suite]: **Refer to:** + + - [document template](/about/glossary/glossary.qmd#document-template) + - [test suite](/about/glossary/glossary.qmd#test-suite) \ No newline at end of file diff --git a/site/about/glossary/_validmind.qmd b/site/about/glossary/_validmind.qmd index fcba5f96cc..b9d29c8ff5 100644 --- a/site/about/glossary/_validmind.qmd +++ b/site/about/glossary/_validmind.qmd @@ -4,10 +4,10 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> #### {{< var validmind.product >}} -These two features are intertwined and work in tandem to help streamline your model lifecycle. +These two features are intertwined and work in tandem to help streamline your risk management lifecycles. {{< var validmind.developer >}} ({{< var vm.developer >}}) -: An open-source^[**{{< var vm.product >}} GitHub:** [`validmind-library`](https://github.com/validmind/validmind-library/)] suite of documentation tools and test suites designed to document models, test models for weaknesses, and identify overfit areas. Enables automating the generation of model documentation by uploading documentation and test results to the {{< var validmind.platform >}}. +: An open-source^[**{{< var vm.product >}} GitHub:** [`validmind-library`](https://github.com/validmind/validmind-library/)] suite of documentation tools and test suites designed to document records (such as models), test records for weaknesses, and identify overfit areas. Enables automating the generation of documentation by uploading documentation and test results to the {{< var validmind.platform >}}. {{< var validmind.platform >}} ({{< var vm.platform >}}) -: A hosted multi-tenant architecture^[[Log into {{< var vm.product >}}](/guide/access/log-in-to-validmind.qmd)] that includes the {{< var vm.product >}} cloud-based web interface, APIs, databases, documentation and validation engine, and various internal services. +: A hosted multi-tenant architecture^[[Log into {{< var vm.product >}}](/guide/access/log-in-to-validmind.qmd)] that includes the {{< var vm.product >}} cloud-based web interface, APIs, databases, documentation and validation engine, and various internal services. diff --git a/site/about/glossary/documentation/_conceptual-soundness.qmd b/site/about/glossary/documentation/_conceptual-soundness.qmd new file mode 100644 index 0000000000..1dae64104f --- /dev/null +++ b/site/about/glossary/documentation/_conceptual-soundness.qmd @@ -0,0 +1,6 @@ + + +conceptual soundness +: Establishes the foundation of a selected record (such as a model), covering the overview, intended use and business use case, regulatory requirements, limitations, and the rationale behind selection. It emphasizes purpose, scope, and constraints, which are crucial for stakeholders to understand applicability and limitations. diff --git a/site/about/glossary/model_documentation/_data-preparation.qmd b/site/about/glossary/documentation/_data-preparation.qmd similarity index 88% rename from site/about/glossary/model_documentation/_data-preparation.qmd rename to site/about/glossary/documentation/_data-preparation.qmd index 9a5b539e85..84646b6c75 100644 --- a/site/about/glossary/model_documentation/_data-preparation.qmd +++ b/site/about/glossary/documentation/_data-preparation.qmd @@ -3,4 +3,4 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> data preparation -: Details the data description, including dataset summary, data quality tests, descriptive statistics, correlations and interactions, and feature selection and engineering. It provides transparency into the data used for training, ensuring that the record such as a model is built on a solid and relevant dataset. \ No newline at end of file +: Details the data description, including dataset summary, data quality tests, descriptive statistics, correlations and interactions, and feature selection and engineering. It provides transparency into the data used for training, ensuring that the record (such as a model) is built on a solid and relevant dataset. \ No newline at end of file diff --git a/site/about/glossary/model_documentation/_doc-intro.qmd b/site/about/glossary/documentation/_doc-intro.qmd similarity index 100% rename from site/about/glossary/model_documentation/_doc-intro.qmd rename to site/about/glossary/documentation/_doc-intro.qmd diff --git a/site/about/glossary/model_documentation/_model-development.qmd b/site/about/glossary/documentation/_model-development.qmd similarity index 81% rename from site/about/glossary/model_documentation/_model-development.qmd rename to site/about/glossary/documentation/_model-development.qmd index 5acf1242eb..1c7eafc727 100644 --- a/site/about/glossary/model_documentation/_model-development.qmd +++ b/site/about/glossary/documentation/_model-development.qmd @@ -3,4 +3,4 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> model development, development -: Discusses the training, evaluation, explainability, interpretability, and diagnosis, including weak spots, overfit regions, and robustness. This section is vital for understanding how the record such as a model was developed, how it performs, and its areas of strength and weakness. +: Discusses the training, evaluation, explainability, interpretability, and diagnosis, including weak spots, overfit regions, and robustness. This section is vital for understanding how the record (such as a model) was developed, how it performs, and its areas of strength and weakness. diff --git a/site/about/glossary/documentation/_monitoring-governance.qmd b/site/about/glossary/documentation/_monitoring-governance.qmd new file mode 100644 index 0000000000..05ccf5390e --- /dev/null +++ b/site/about/glossary/documentation/_monitoring-governance.qmd @@ -0,0 +1,6 @@ + + +monitoring and governance +: Focuses on the record (such as a model)’s ongoing monitoring plan, implementation, and governance plan. It outlines strategies for maintaining the performance over time and ensuring that it remains compliant with regulatory requirements and ethical standards. \ No newline at end of file diff --git a/site/about/glossary/glossary.qmd b/site/about/glossary/glossary.qmd index cbf0c8ab03..43167cf368 100644 --- a/site/about/glossary/glossary.qmd +++ b/site/about/glossary/glossary.qmd @@ -17,32 +17,39 @@ includes: This glossary of terms provides short definitions for technical terms you find commonly used in our product documentation grouped by terms related to: - [{{< var vm.product >}}](#validmind) -- [Artificial intelligence](#artificial-intelligence) -- [Models and model risk management](#models-and-model-risk-management) -- [Model documentation](#model-documentation) +- [Artificial intelligence (AI) governance](#artificial-intelligence-ai-governance) +- [Model risk management](#model-risk-management) +- [Documentation](#documentation) - [Validation reports](#validation-reports) - [Ongoing monitoring](#ongoing-monitoring) - [Attestations](#attestations) - [Integrations](#integrations) - [Developer tools](#developer-tools) +

## {{< var vm.product >}} {{< include _validmind.qmd >}} {{< include _validmind-features.qmd >}} -## Artificial intelligence +## Artificial intelligence (AI) governance + +#### AI {{< include _ai.qmd >}} -## Models and model risk management +#### AI governance + +{{< include _ai-governance.qmd >}} + +## Model risk management {{< include _models.qmd >}} {{< include _mrm.qmd >}} -## Model documentation +## Documentation -{{< include _model-documentation.qmd >}} +{{< include _documentation.qmd >}} ## Validation reports diff --git a/site/about/glossary/key_concepts/_docs.qmd b/site/about/glossary/key_concepts/_docs.qmd index 0ff2fa8d76..886d9f2f20 100644 --- a/site/about/glossary/key_concepts/_docs.qmd +++ b/site/about/glossary/key_concepts/_docs.qmd @@ -2,7 +2,12 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> -model documentation -: A structured and detailed record pertaining to a model, encompassing key components such as its underlying assumptions, methodologies, data sources, inputs, performance metrics, evaluations, limitations, and intended uses. - -Within the realm of model risk management, this documentation serves to ensure transparency, adherence to regulatory requirements, and a clear understanding of potential risks associated with the model's application. \ No newline at end of file + + +documentation, model documentation +: A structured and detailed record pertaining to a record (such as a model), encompassing key components such as its underlying assumptions, methodologies, data sources, inputs, performance metrics, evaluations, limitations, and intended uses. + +:::: {.content-visible when-format="html" when-meta="includes.glossary"} +Within the realm of risk management, this documentation serves to ensure transparency, adherence to regulatory requirements, and a clear understanding of potential risks associated with the record's application. + +:::: \ No newline at end of file diff --git a/site/about/glossary/key_concepts/_inputs.qmd b/site/about/glossary/key_concepts/_inputs.qmd index 0c45358b5e..bc148ade75 100644 --- a/site/about/glossary/key_concepts/_inputs.qmd +++ b/site/about/glossary/key_concepts/_inputs.qmd @@ -5,7 +5,19 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> inputs : Objects to be evaluated and documented in the {{< var validmind.developer >}}. They can be any of the following: - - **model**: A single model that has been initialized in {{< var vm.product >}}. Refer to the [`vm.init_model()` function](/validmind/validmind.qmd#init_model){target="_blank"} for more information. - - **dataset**: Single dataset that has been initialized in {{< var vm.product >}}. Refer to the [`vm.init_dataset()` function](/validmind/validmind.qmd#init_dataset){target="_blank"} for more information. - - **models**: A list of {{< var vm.product >}} models - usually this is used when you want to compare multiple models in your custom tests. - - **datasets**: A list of {{< var vm.product >}} datasets - usually this is used when you want to compare multiple datasets in your custom tests. (Learn more: [Run tests with multiple datasets](/notebooks/how_to/tests/run_tests/configure_tests/run_tests_that_require_multiple_datasets.ipynb)) \ No newline at end of file +:::: {.content-visible when-format="html" when-meta="includes.glossary"} + - **model**: A single record (such as a model) that has been initialized in {{< var vm.product >}}. Despite the naming convention, model objects can be any type of record you want to test, document, validate, or monitor with {{< var vm.product >}}.^[**Refer to:** [`init_model()`](/validmind/validmind.qmd#init_model){target="_blank"}] + - **dataset**: A single dataset that has been initialized in {{< var vm.product >}}.^[**Refer to:** [`init_dataset()`](/validmind/validmind.qmd#init_dataset){target="_blank"}] + - **models**: A list of {{< var vm.product >}} records — usually this is used when you want to compare multiple records in your custom tests. + - **datasets**: A list of {{< var vm.product >}} datasets — usually this is used when you want to compare multiple datasets in your custom tests.^[**Learn more:** [Run tests with multiple datasets](/notebooks/how_to/tests/run_tests/configure_tests/run_tests_that_require_multiple_datasets.ipynb)] + +:::: + +:::: {.content-visible when-format="html" unless-meta="includes.glossary"} + - **model**: A single record (such as a model) that has been initialized in {{< var vm.product >}}. Despite the naming convention, model objects can be any type of record you want to test, document, validate, or monitor with {{< var vm.product >}}. Refer to the [`vm.init_model()` function](/validmind/validmind.qmd#init_model){target="_blank"} for more information. + - **dataset**: A single dataset that has been initialized in {{< var vm.product >}}. Refer to the [`vm.init_dataset()` function](/validmind/validmind.qmd#init_dataset){target="_blank"} for more information. + - **models**: A list of {{< var vm.product >}} records — usually this is used when you want to compare multiple records in your custom tests. + - **datasets**: A list of {{< var vm.product >}} datasets — usually this is used when you want to compare multiple datasets in your custom tests. (Learn more: [Run tests with multiple datasets](/notebooks/how_to/tests/run_tests/configure_tests/run_tests_that_require_multiple_datasets.ipynb)) + +:::: + diff --git a/site/about/glossary/key_concepts/_key-concepts.qmd b/site/about/glossary/key_concepts/_key-concepts.qmd index 4e2fe9c64d..73a4f68bab 100644 --- a/site/about/glossary/key_concepts/_key-concepts.qmd +++ b/site/about/glossary/key_concepts/_key-concepts.qmd @@ -6,20 +6,28 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> 1. Create a new file under the `about/glossary/key_concepts` folder with the following structure `_concept.qmd` (the `_` is mandatory for Quarto to retrieve the file as a single-source embed: https://quarto.org/docs/authoring/includes.html) 2. Include it below with the structure `{{< include /about/glossary/key_concepts/_concept.qmd >}}` -3. In the `about/glossary` folder, locate the correct section file it belongs to (e.g. `_ai.qmd`) and embed it there as well in ABC order with the structure `{{< include key_concepts/_concept.qmd >}}` +3. In the `about/glossary` folder, locate the correct section file it belongs to (e.g. `_ai.qmd`) and embed it there as well in ABC order with the structure `{{< include key_concepts/_concept.qmd >}}` These instructions update the key concept on anywhere the key concepts are reference as well as within the glossary. --> +{{< include /about/glossary/key_concepts/_records.qmd >}} + +{{< include /about/glossary/key_concepts/_models.qmd >}} + {{< include /about/glossary/key_concepts/_docs.qmd >}} {{< include /about/glossary/key_concepts/_report.qmd >}} +{{< include /about/glossary/monitoring/_ongoing-monitoring.qmd >}} + {{< include /about/glossary/key_concepts/_template.qmd >}} {{< include /about/glossary/key_concepts/_test.qmd >}} +{{< include /about/glossary/key_concepts/_test-suite.qmd >}} + {{< include /about/glossary/key_concepts/_metrics.qmd >}} {{< include /about/glossary/key_concepts/_inputs.qmd >}} @@ -28,4 +36,3 @@ These instructions update the key concept on anywhere the key concepts are refer {{< include /about/glossary/key_concepts/_outputs.qmd >}} -{{< include /about/glossary/key_concepts/_test-suite.qmd >}} \ No newline at end of file diff --git a/site/about/glossary/key_concepts/_metrics.qmd b/site/about/glossary/key_concepts/_metrics.qmd index 53dd1223f7..d95bb3588d 100644 --- a/site/about/glossary/key_concepts/_metrics.qmd +++ b/site/about/glossary/key_concepts/_metrics.qmd @@ -2,7 +2,16 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> +:::: {.content-visible when-format="html" when-meta="includes.glossary"} metrics, custom metrics -: Metrics are a subset of tests that do not have thresholds. Custom metrics are functions that you define to evaluate your model or dataset. These functions can be registered via the {{< var validmind.developer >}} to be used with the {{< var validmind.platform >}}. +: Metrics are a subset of tests that do not have thresholds. Custom metrics are functions that you define to evaluate your record (such as a model) or dataset. These functions can be registered via the {{< var validmind.developer >}} to be used with the {{< var validmind.platform >}}. -In the context of {{< var vm.product >}}'s Jupyter Notebooks, metrics and tests can be thought of as interchangeable concepts. \ No newline at end of file +In the context of {{< var vm.product >}}'s Jupyter Notebooks, metrics and tests can be thought of as interchangeable concepts.^[**Refer also to:** [test](/about/glossary/glossary.qmd#tests)] +:::: + +:::: {.content-visible when-format="html" unless-meta="includes.glossary"} +metrics, custom metrics +: Metrics are a subset of tests that do not have thresholds. Custom metrics are functions that you define to evaluate your record (such as a model) or dataset. These functions can be registered via the {{< var validmind.developer >}} to be used with the {{< var validmind.platform >}}. + +In the context of {{< var vm.product >}}'s Jupyter Notebooks, metrics and tests can be thought of as interchangeable concepts. +:::: \ No newline at end of file diff --git a/site/about/glossary/key_concepts/_models.qmd b/site/about/glossary/key_concepts/_models.qmd new file mode 100644 index 0000000000..15bdf932df --- /dev/null +++ b/site/about/glossary/key_concepts/_models.qmd @@ -0,0 +1,17 @@ + + +:::: {.content-visible when-format="html" when-meta="includes.glossary"} +model +: SR 26-2^[[SR 26-2: Interagency Guidance on Model Risk Management for Banking Organizations](https://www.federalreserve.gov/supervisionreg/srletters/SR2602.htm)] (which supersedes SR 11-7) defines a model as a "complex quantitative method, system, or approach that applies statistical, economic, or financial theories to process input data into quantitative estimates." Simple arithmetic, deterministic rule-based processes, or software without statistical, economic, or financial theories underpinning their design or use are generally outside SR 26-2’s definition of a model. + +Within {{< var vm.product >}}, a model is a type of record tracked in the inventory.^[**Refer also to:** [record](/about/glossary/glossary.qmd#records)] +:::: + +:::: {.content-visible when-format="html" unless-meta="includes.glossary"} +model +: SR 26-2 (which supersedes SR 11-7) defines a model as a "complex quantitative method, system, or approach that applies statistical, economic, or financial theories to process input data into quantitative estimates." Simple arithmetic, deterministic rule-based processes, or software without statistical, economic, or financial theories underpinning their design or use are generally outside SR 26-2’s definition of a model. + +Within {{< var vm.product >}}, a model is a type of record tracked in the inventory. +:::: \ No newline at end of file diff --git a/site/about/glossary/key_concepts/_records.qmd b/site/about/glossary/key_concepts/_records.qmd new file mode 100644 index 0000000000..174f514997 --- /dev/null +++ b/site/about/glossary/key_concepts/_records.qmd @@ -0,0 +1,16 @@ + + +:::: {.content-visible when-format="html" when-meta="includes.glossary"} +record +: A tool tracked in the {{< var validmind.platform >}} inventory,^[**Refer to:** [model](/about/glossary/glossary.qmd#models)] such as a model. Records include traditional statistical models, legacy systems, artificial intelligence/machine learning models, large language models (LLMs), agentic AI systems, and other documentable items that benefit from oversight, testing, and lifecycle management. + +:::: + +:::: {.content-visible when-format="html" unless-meta="includes.glossary"} +record +: A tool tracked in the {{< var validmind.platform >}} inventory, such as a model. Records include traditional statistical models, legacy systems, artificial intelligence/machine learning models, large language models (LLMs), agentic AI systems, and other documentable items that benefit from oversight, testing, and lifecycle management. +:::: + + diff --git a/site/about/glossary/key_concepts/_report.qmd b/site/about/glossary/key_concepts/_report.qmd index 42157729e9..c5bf7b6960 100644 --- a/site/about/glossary/key_concepts/_report.qmd +++ b/site/about/glossary/key_concepts/_report.qmd @@ -5,4 +5,7 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> validation report : A formal document produced after a model validation process, outlining the artifacts, assessments, and recommendations related to a specific model's performance, appropriateness, and limitations. Provides a comprehensive review of the model's conceptual framework, data sources and integrity, calibration methods, and performance outcomes. -Within model risk management, the validation report is crucial for ensuring transparency, demonstrating regulatory compliance, and offering actionable insights for model refinement or adjustments. \ No newline at end of file +:::: {.content-visible when-format="html" when-meta="includes.glossary"} +Within model risk management, the validation report is crucial for ensuring transparency, demonstrating regulatory compliance, and offering actionable insights for model refinement or adjustments. + +:::: \ No newline at end of file diff --git a/site/about/glossary/key_concepts/_template.qmd b/site/about/glossary/key_concepts/_template.qmd index 6a47859219..7c035e586b 100644 --- a/site/about/glossary/key_concepts/_template.qmd +++ b/site/about/glossary/key_concepts/_template.qmd @@ -10,16 +10,18 @@ SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> :::: {.content-visible when-format="html" when-meta="includes.glossary"} document template -: Lays out the structure of model documents, segmented into various sections and sub-sections, and function as test suites to help automate your development, validation, monitoring, and other risk management processes. Document templates are available for default {{< var vm.product >}} document types[^default-documents] as well as custom document types. +: Lays out the structure of documents, segmented into various sections and sub-sections, and functions as a test suite to help automate your development, validation, monitoring, and other risk management processes. Document templates are available for default {{< var vm.product >}} document types[^default-documents] as well as custom document types. -documentation template^[**Refer also to:** [Model documentation](/about/glossary/glossary.qmd#model-documentation)] -: A default {{< var vm.product >}} document type that serves as a standardized framework for developing and documenting models, including sections designated for model details, data descriptions, test results, and performance metrics. By outlining required documentation and recommended analyses, document templates ensure consistency and completeness across model documentation and help guide developers through a systematic development process while promoting comparability and traceability of development outcomes. +documentation template^[**Refer also to:** [documentation](/about/glossary/glossary.qmd#documentation)] +: A default {{< var vm.product >}} document template that serves as a standardized framework for developing and documenting records (such as models), including sections designated for record details, data descriptions, test results, and performance metrics. By outlining required documentation and recommended analyses, documentation templates ensure consistency and completeness across documentation and help guide developers through a systematic development process while promoting comparability and traceability of development outcomes. -validation report template^[**Refer also to:** [Validation reports](/about/glossary/glossary.qmd#validation-reports)] -: A default {{< var vm.product >}} document type that serves as a standardized framework for conducting and documenting model validation, including sections designated for attaching test results, evidence, or artifacts (findings). By outlining required documentation, recommended analyses, and expected validation tests, validation report templates ensure consistency and completeness across validation reports and help guide validators through a systematic review process while promoting comparability and traceability of validation outcomes. +{{< var vm.product >}} documentation templates function as test suites by defining the structure of your documentation, specifying the tests that should be run, and how the results should be displayed. -monitoring template, monitoring report template^[**Refer also to:** [Ongoing monitoring](/about/glossary/glossary.qmd#ongoing-monitoring)] -: A default {{< var vm.product >}} document type that serves as a standardized framework for ongoing model monitoring, including sections designated for test results, performance metrics, and drift analyses. By outlining required monitoring checks and expected routine tests, monitoring templates ensure consistency and completeness across monitoring reports and help guide model owners through a systematic monitoring process while promoting early detection of model performance degradation. +validation report template^[**Refer also to:** [validation reports](/about/glossary/glossary.qmd#validation-reports)] +: A default {{< var vm.product >}} document template that serves as a standardized framework for conducting and documenting validation, including sections designated for attaching test results, evidence, or artifacts (findings). By outlining required documentation, recommended analyses, and expected validation tests, validation report templates ensure consistency and completeness across validation reports and help guide validators through a systematic review process while promoting comparability and traceability of validation outcomes. + +monitoring template, monitoring report template^[**Refer also to:** [ongoing monitoring](/about/glossary/glossary.qmd#ongoing-monitoring)] +: A default {{< var vm.product >}} document template that serves as a standardized framework for ongoing monitoring, including sections designated for test results, performance metrics, and drift analyses. By outlining required monitoring checks and expected routine tests, monitoring templates ensure consistency and completeness across monitoring reports and help guide owners through a systematic monitoring process while promoting early detection of performance degradation. :::: @@ -31,16 +33,16 @@ document template :::: {.content-visible when-format="html" unless-meta="includes.glossary"} document template -: Lays out the structure of model documents, segmented into various sections and sub-sections, and function as test suites to help automate your development, validation, monitoring, and other risk management processes. Document templates are available for default {{< var vm.product >}} document types as well as custom document types. +: Lays out the structure of documents, segmented into various sections and sub-sections, and functions as a test suite to help automate your development, validation, monitoring, and other risk management processes. Document templates are available for default {{< var vm.product >}} document types as well as custom document types. -documentation template -: A default {{< var vm.product >}} document type that serves as a standardized framework for developing and documenting models, including sections designated for model details, data descriptions, test results, and performance metrics. By outlining required documentation and recommended analyses, document templates ensure consistency and completeness across model documentation and help guide developers through a systematic development process while promoting comparability and traceability of development outcomes. +documentation template +: A default {{< var vm.product >}} document type that serves as a standardized framework for developing and documenting records (such as models), including sections designated for record details, data descriptions, test results, and performance metrics. By outlining required documentation and recommended analyses, documentation templates ensure consistency and completeness across documentation and help guide developers through a systematic development process while promoting comparability and traceability of development outcomes. -validation report template -: A default {{< var vm.product >}} document type that serves as a standardized framework for conducting and documenting model validation, including sections designated for attaching test results, evidence, or artifacts (findings). By outlining required documentation, recommended analyses, and expected validation tests, validation report templates ensure consistency and completeness across validation reports and help guide validators through a systematic review process while promoting comparability and traceability of validation outcomes. +validation report template +: A default {{< var vm.product >}} document type that serves as a standardized framework for conducting and documenting validation, including sections designated for attaching test results, evidence, or artifacts (findings). By outlining required documentation, recommended analyses, and expected validation tests, validation report templates ensure consistency and completeness across validation reports and help guide validators through a systematic review process while promoting comparability and traceability of validation outcomes. -monitoring template, monitoring report template -: A default {{< var vm.product >}} document type that serves as a standardized framework for ongoing model monitoring, including sections designated for test results, performance metrics, and drift analyses. By outlining required monitoring checks and expected routine tests, monitoring templates ensure consistency and completeness across monitoring reports and help guide model owners through a systematic monitoring process while promoting early detection of model performance degradation. +monitoring template, monitoring report template +: A default {{< var vm.product >}} document type that serves as a standardized framework for ongoing monitoring, including sections designated for test results, performance metrics, and drift analyses. By outlining required monitoring checks and expected routine tests, monitoring templates ensure consistency and completeness across monitoring reports and help guide owners through a systematic monitoring process while promoting early detection of performance degradation. :::: diff --git a/site/about/glossary/key_concepts/_test-suite.qmd b/site/about/glossary/key_concepts/_test-suite.qmd index b1d2289365..59024193bc 100644 --- a/site/about/glossary/key_concepts/_test-suite.qmd +++ b/site/about/glossary/key_concepts/_test-suite.qmd @@ -2,7 +2,13 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> -test suite -: A collection of tests which are run together to generate model documentation end-to-end for specific use cases. +:::: {.content-visible when-format="html" when-meta="includes.glossary"} + +test suite +: A collection of tests designed to run together to automate and generate documentation end-to-end for specific use cases.^[**Learn more:** [test_suites](/validmind/validmind/test_suites.qmd)] +:::: -For example, the [`classifier_full_suite`](/validmind/validmind/test_suites/classifier.qmd#classifierfullsuite){target="_blank"} test suite runs tests from the [`tabular_dataset`](/validmind/validmind/test_suites/tabular_datasets.qmd){target="_blank"} and [`classifier`](/validmind/validmind/test_suites/classifier.qmd){target="_blank"} test suites to fully document the data and model sections for binary classification model use cases. \ No newline at end of file +:::: {.content-visible when-format="html" unless-meta="includes.glossary"} +test suite +: A collection of tests designed to run together to automate and generate documentation end-to-end for specific use cases. (Learn more: [test_suites](/validmind/validmind/test_suites.qmd)) +:::: \ No newline at end of file diff --git a/site/about/glossary/key_concepts/_test.qmd b/site/about/glossary/key_concepts/_test.qmd index 6708c16a77..c4ead6e4b3 100644 --- a/site/about/glossary/key_concepts/_test.qmd +++ b/site/about/glossary/key_concepts/_test.qmd @@ -2,7 +2,7 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> -test -: A function contained in the {{< var vm.developer >}}, designed to run a specific quantitative test on the dataset or model. Test results are sent to the {{< var validmind.platform >}} to generate the model documentation according to the template that is associated with the documentation. +test +: A function contained in the {{< var vm.developer >}}, designed to run a specific quantitative test on the dataset or record (such as a model). Test results are logged to the {{< var validmind.platform >}}, where they are attached to documents. -Tests are the building blocks of {{< var vm.product >}}, used to evaluate and document models and datasets, and can be run individually or as part of a suite defined by your model documentation template. \ No newline at end of file +Tests are the building blocks of {{< var vm.product >}}, used to evaluate and document records and datasets, and can be run individually or as part of a suite defined by your templates. \ No newline at end of file diff --git a/site/about/glossary/model_documentation/_conceptual-soundness.qmd b/site/about/glossary/model_documentation/_conceptual-soundness.qmd deleted file mode 100644 index b173ee9f63..0000000000 --- a/site/about/glossary/model_documentation/_conceptual-soundness.qmd +++ /dev/null @@ -1,6 +0,0 @@ - - -conceptual soundness -: Establishes the foundation of a selected record such as a model, covering the overview, intended use and business use case, regulatory requirements, limitations, and the rationale behind selection. It emphasizes purpose, scope, and constraints, which are crucial for stakeholders to understand applicability and limitations. diff --git a/site/about/glossary/model_documentation/_monitoring-governance.qmd b/site/about/glossary/model_documentation/_monitoring-governance.qmd deleted file mode 100644 index 9d6426d097..0000000000 --- a/site/about/glossary/model_documentation/_monitoring-governance.qmd +++ /dev/null @@ -1,6 +0,0 @@ - - -monitoring and governance -: Focuses on the record such as a model’s ongoing monitoring plan, implementation, and governance plan. It outlines strategies for maintaining the performance over time and ensuring that it remains compliant with regulatory requirements and ethical standards. \ No newline at end of file diff --git a/site/about/glossary/monitoring/_backtesting.qmd b/site/about/glossary/monitoring/_backtesting.qmd index 77a7e5cfe5..e642f0cef0 100644 --- a/site/about/glossary/monitoring/_backtesting.qmd +++ b/site/about/glossary/monitoring/_backtesting.qmd @@ -2,5 +2,5 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> -backtesting -: Comparing a model's predictions against actual outcomes to verify its predictive power and reliability. +backtesting +: Comparing a record's predictions against actual outcomes to verify its predictive power and reliability. diff --git a/site/about/glossary/monitoring/_compliance-and-regulatory-adherence.qmd b/site/about/glossary/monitoring/_compliance-and-regulatory-adherence.qmd index 2f1b50ff63..a0b9e89f71 100644 --- a/site/about/glossary/monitoring/_compliance-and-regulatory-adherence.qmd +++ b/site/about/glossary/monitoring/_compliance-and-regulatory-adherence.qmd @@ -3,4 +3,4 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> compliance and regulatory adherence -: Ensuring that the model continues to meet evolving regulatory requirements and standards. \ No newline at end of file +: Ensuring that the record (model) continues to meet evolving regulatory requirements and standards. \ No newline at end of file diff --git a/site/about/glossary/monitoring/_model-drift.qmd b/site/about/glossary/monitoring/_model-drift.qmd index 3023bc8ffc..84272e38da 100644 --- a/site/about/glossary/monitoring/_model-drift.qmd +++ b/site/about/glossary/monitoring/_model-drift.qmd @@ -2,5 +2,5 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> -model drift -: Changes in data patterns, input distributions, or model behavior that may indicate a degradation in model performance over time. +model drift, drift +: Changes in data patterns, input distributions, or record (such as a model) behavior that may indicate a degradation in performance over time. diff --git a/site/about/glossary/monitoring/_model-performance.qmd b/site/about/glossary/monitoring/_model-performance.qmd index 90d343f912..bf3dd4c6cb 100644 --- a/site/about/glossary/monitoring/_model-performance.qmd +++ b/site/about/glossary/monitoring/_model-performance.qmd @@ -2,5 +2,5 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> -model performance -: The measure of a model's accuracy, stability, and robustness in achieving its intended outcomes, which is regularly evaluated through monitoring after deployment to ensure ongoing reliability. \ No newline at end of file +model performance, performance +: The measure of a record's accuracy, stability, and robustness in achieving its intended outcomes, which is regularly evaluated through monitoring after deployment to ensure ongoing reliability. \ No newline at end of file diff --git a/site/about/glossary/monitoring/_ongoing-monitoring.qmd b/site/about/glossary/monitoring/_ongoing-monitoring.qmd index 818f5ac5f9..da04e6c99d 100644 --- a/site/about/glossary/monitoring/_ongoing-monitoring.qmd +++ b/site/about/glossary/monitoring/_ongoing-monitoring.qmd @@ -2,5 +2,5 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> -ongoing monitoring -: A periodic report assessing the tool such as a model's performance and compliance over time, ensuring it remains valid under changing conditions. \ No newline at end of file +ongoing monitoring, ongoing monitoring report, ongoing monitoring plan, monitoring plan +: A comprehensive and structured periodic report assessing the record's performance and compliance over time, ensuring it remains valid under changing conditions. Monitoring includes key elements such as data sources, inputs, performance metrics, and periodic evaluations, ensuring transparency and visibility of the record's performance in the production environment. \ No newline at end of file diff --git a/site/about/glossary/monitoring/_recalibrating-models.qmd b/site/about/glossary/monitoring/_recalibrating-models.qmd index 252a8ac653..9c1f2285d9 100644 --- a/site/about/glossary/monitoring/_recalibrating-models.qmd +++ b/site/about/glossary/monitoring/_recalibrating-models.qmd @@ -2,5 +2,5 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> -recalibrating models -: The process of adjusting a model to account for detected drift or changes in the underlying data or environment. +recalibrating models, recalibrating +: The process of adjusting a record (such as a model) to account for detected drift or changes in the underlying data or environment. diff --git a/site/about/glossary/monitoring/_reporting-and-governance.qmd b/site/about/glossary/monitoring/_reporting-and-governance.qmd index e0908f6ce2..d0152b7056 100644 --- a/site/about/glossary/monitoring/_reporting-and-governance.qmd +++ b/site/about/glossary/monitoring/_reporting-and-governance.qmd @@ -2,5 +2,5 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> -reporting and governance +reporting and governance : The documentation of monitoring artifacts and communication to stakeholders to support decision-making and maintain transparency. diff --git a/site/about/glossary/validation_reports/_artifacts.qmd b/site/about/glossary/validation_reports/_artifacts.qmd index fc3c7cfe8b..5f27365aef 100644 --- a/site/about/glossary/validation_reports/_artifacts.qmd +++ b/site/about/glossary/validation_reports/_artifacts.qmd @@ -3,4 +3,4 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> artifacts (previously findings) -: Observations or issues identified during validation, including any deviations from expected performance or standards. Artifacts are organized by type — default types include Validation Issue, Policy Exception, and Limitation. Custom artifact types such as Change Management Record can be created to track other categories relevant to your organization. \ No newline at end of file +: Observations or issues identified during validation, including any deviations from expected performance or standards. Artifacts are organized by type — default types provided by {{< var vm.product >}} include Validation Issue, Policy Exception, and Limitation. Custom artifact types can be created to track other categories relevant to your organization. \ No newline at end of file diff --git a/site/about/glossary/validation_reports/_report-intro.qmd b/site/about/glossary/validation_reports/_report-intro.qmd index ed19217e3a..3525a01e55 100644 --- a/site/about/glossary/validation_reports/_report-intro.qmd +++ b/site/about/glossary/validation_reports/_report-intro.qmd @@ -2,4 +2,4 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> -A validation report is a comprehensive review that evaluates a record's accuracy, performance, and suitability for its intended purpose. It encompasses the process of risk assessment, identifying areas of potential error or risk within the record's components, such as data inputs and algorithms. The report follows established validation guidelines to ensure consistency and adherence to internal and regulatory standards. \ No newline at end of file +A validation report is a comprehensive and structured review evaluating a record's accuracy, performance, and suitability for its intended purpose. A report follows established validation guidelines to ensure consistency and adherence to internal and regulatory standards — encompassing the process of risk assessment, identifying areas of potential error or risk within the record's components, supporting transparency, regulatory compliance, and informed decision-making by documenting the validator’s independent review and conclusions. \ No newline at end of file diff --git a/site/about/glossary/validation_reports/_risk-areas.qmd b/site/about/glossary/validation_reports/_risk-areas.qmd index 34169742e5..323a642ca1 100644 --- a/site/about/glossary/validation_reports/_risk-areas.qmd +++ b/site/about/glossary/validation_reports/_risk-areas.qmd @@ -3,4 +3,4 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> model risk areas, risk areas -: Specific components or aspects of a record such as a model where risk might be present, such as data inputs, algorithms, or implementation. \ No newline at end of file +: Specific components or aspects of a record (such as a model) where risk might be present, such as data inputs, algorithms, or implementation. \ No newline at end of file diff --git a/site/about/glossary/validation_reports/_risk-assessment.qmd b/site/about/glossary/validation_reports/_risk-assessment.qmd index b862511921..39ec545370 100644 --- a/site/about/glossary/validation_reports/_risk-assessment.qmd +++ b/site/about/glossary/validation_reports/_risk-assessment.qmd @@ -3,4 +3,4 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> model risk assessment, risk assessment -: The process of identifying and evaluating risks associated with the use and potential errors in a record such as a model. \ No newline at end of file +: The process of identifying and evaluating risks associated with the use and potential errors in a record (such as a model). \ No newline at end of file diff --git a/site/about/glossary/validation_reports/_validation-guidelines.qmd b/site/about/glossary/validation_reports/_validation-guidelines.qmd index 77930799af..2ac7b8baf0 100644 --- a/site/about/glossary/validation_reports/_validation-guidelines.qmd +++ b/site/about/glossary/validation_reports/_validation-guidelines.qmd @@ -3,4 +3,4 @@ Refer to the LICENSE file in the root of this repository for details. SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial --> validation guidelines -: Established standards or procedures for conducting thorough and consistent validations, usually aligned with principles within specific tools such as models or AI risk frameworks. \ No newline at end of file +: Established standards or procedures for conducting thorough and consistent validations, usually aligned with principles within specific records (such as models) or AI risk frameworks. \ No newline at end of file diff --git a/site/about/library-and-platform.qmd b/site/about/library-and-platform.qmd index 7f776107c8..ed0bcf2cf9 100644 --- a/site/about/library-and-platform.qmd +++ b/site/about/library-and-platform.qmd @@ -12,7 +12,7 @@ listing: sort: false fields: [title, description] contents: - - overview-model-documentation.qmd + - overview-documentation.qmd - overview-llm-features.qmd - deployment-options.qmd - system-access-requirements.qmd @@ -24,21 +24,21 @@ listing: #### 1. {{< var validmind.developer >}} -The *{{< var validmind.developer >}}* is a Python library of tools and methods designed to automate generating model documentation and running validation tests. The {{< var vm.developer >}} is designed to be platform agnostic and integrates with your existing development environment. +The *{{< var validmind.developer >}}* is a Python library of tools and methods designed to automate generating documentation and running validation tests. The {{< var vm.developer >}} is designed to be platform agnostic and integrates with your existing development environment. For Python developers, a single installation command provides access to all the functions: - + ```python %pip install validmind ``` #### 2. {{< var validmind.platform >}} -The *{{< var validmind.platform >}}* is an easy-to-use web-based interface that enables you to track the model lifecycle: +The *{{< var validmind.platform >}}* is an easy-to-use web-based interface that enables you to track your risk management lifecycles: - Customize workflows to adhere to and oversee your governance processes. - Review and edit the documentation and test metrics generated by the {{< var vm.developer >}}. -- Collaborate with and capture feedback from model developers and model validators. +- Collaborate with and capture feedback from developers and validators. - Generate validation reports and approvals. ::: diff --git a/site/about/overview-model-documentation.qmd b/site/about/overview-documentation.qmd similarity index 63% rename from site/about/overview-model-documentation.qmd rename to site/about/overview-documentation.qmd index 59e462e4aa..7954b36a9e 100644 --- a/site/about/overview-model-documentation.qmd +++ b/site/about/overview-documentation.qmd @@ -2,10 +2,11 @@ # Copyright © 2023-2026 ValidMind Inc. All rights reserved. # Refer to the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -title: "Automated model testing & documentation" +title: "Automated testing & documentation" date: last-modified aliases: - /guide/overview-model-documentation.html + - /about/overview-model-documentation.html listing: id: quickstart type: grid @@ -17,40 +18,40 @@ listing: - path: ../developer/validmind-library.qmd # INVISIBLE SPACE REQUIRED TO ENSURE THAT THE DESCRIPTION DOESN'T HAVE EXTRA PADDING DUE TO THE VARIABLE title: "{{< var validmind.developer >}}​" - description: "The {{< var validmind.developer >}} streamlines model development and validation by automating testing." + description: "The {{< var validmind.developer >}} streamlines development and validation by automating testing." fields: [title, description] --- -The {{< var validmind.developer >}} streamlines the process of documenting various types of models. {{< var vm.product >}} automates the documentation process, ensuring that your model documentation and testing aligns with regulatory and compliance standards. +The {{< var validmind.developer >}} streamlines the process of documenting various types of records, such as models. {{< var vm.product >}} automates the documentation process, ensuring that your documentation and testing aligns with regulatory and compliance standards. ::: {.attn} ## {{< fa code >}} The {{< var validmind.developer >}} -The {{< var validmind.developer >}} is a Python library and documentation engine designed to streamline the process of documenting various types of models, including traditional statistical models, legacy systems, artificial intelligence/machine learning models, and large language models (LLMs). +The {{< var validmind.developer >}} is a Python library and documentation engine designed to streamline the process of documenting various types of records, including traditional statistical models, legacy systems, artificial intelligence/machine learning models, large language models (LLMs), agentic AI systems, and more. -It offers model developers a systematic approach to documenting and testing risk models with repeatability and consistency, ensuring alignment with regulatory and compliance standards. +It offers developers a systematic approach to documenting and testing with repeatability and consistency, ensuring alignment with regulatory and compliance standards. ![The two main components of {{< var vm.product >}}: the {{< var validmind.developer >}} that integrates with your existing developer environment, and the {{< var validmind.platform >}}](/about/deployment/validmind-architecture-overview.png){fig-alt="An image showing the two main components of ValidMind: the ValidMind Library that integrates with your existing developer environment, and the ValidMind Platform"} -The {{< var validmind.developer >}} consists of a client-side library, a {{< var vm.api >}} integration for models and testing, and validation tests that streamline the model development process. Implemented as a series of independent libraries in Python and R, our {{< var vm.developer >}} ensures compatibility and flexibility with diverse sets of developer environments and requirements. +The {{< var validmind.developer >}} consists of a client-side library, a {{< var vm.api >}} integration for records (models) and testing, and validation tests that streamline the development process. Implemented as a series of independent libraries in Python and R, our {{< var vm.developer >}} ensures compatibility and flexibility with diverse sets of developer environments and requirements. With the {{< var validmind.developer >}}, you can: -- **Automate documentation** — Add comprehensive documentation as metadata while you build models to be shared with model validators, streamlining and speeding up the process. +- **Automate documentation** — Add comprehensive documentation as metadata while you build records to be shared with validators, streamlining and speeding up the process. - **Run test suites** — Identify potential risks for a diverse range of statistical and AI/LLM/ML models by assessing data quality, model outcomes, robustness, and explainability. -- **Integrate with your development environment** — Seamlessly incorporate the {{< var validmind.developer >}} into your existing model development environment, connecting to your existing model code and data sets. -- **Upload documentation data** — Send qualitative and quantitative test data to the {{< var validmind.platform >}}[^1] to generate the model documentation for review and approval, fostering effective collaboration with model reviewers and validators. +- **Integrate with your development environment** — Seamlessly incorporate the {{< var validmind.developer >}} into your existing development environment, connecting to your existing code and data sets. +- **Upload documentation data** — Send qualitative and quantitative test data to the {{< var validmind.platform >}}[^1] to generate the documentation for review and approval, fostering effective collaboration with reviewers and validators. ::: ## Simple installation -Install the {{< var vm.developer >}} with: +Install the {{< var vm.developer >}} with: ```python %pip install validmind @@ -64,8 +65,8 @@ Install the {{< var vm.developer >}} with: What the {{< var validmind.developer >}} offers: -- Generates documentation artifacts utilizing the context of the model and dataset, the model's metadata, and the chosen documentation template. -- Can be easily imported into your local model development environment. The supported platforms include Python and R. +- Generates documentation artifacts utilizing the context of the record (such as a model) and dataset, the record's metadata, and the chosen documentation template. +- Can be easily imported into your local development environment. The supported platforms include Python and R. - Dual-licensed — The {{< var vm.developer >}} is available as open-source under AGPL v3 license and also with a commercial software license. ::: @@ -79,18 +80,18 @@ vm.init(model="MODEL_IDENTIFIER") ``` ```python -vm_dataset = vm. log_dataset( +vm_dataset = vm.log_dataset( df, "training", targets=targets, ) -vm. run_dataset_tests(df, vm_dataset=vm_dataset) +vm.run_dataset_tests(df, vm_dataset=vm_dataset) ``` ```python -vm. Log_model (model) -vm. log_training_metrics (model, x_train, y_train) -vm. run_model_tests (model, x_test, y_test) +vm.log_model (model) +vm.log_training_metrics (model, x_train, y_train) +vm.run_model_tests (model, x_test, y_test) ``` ::: @@ -100,12 +101,12 @@ vm. run_model_tests (model, x_test, y_test) How the {{< var validmind.developer >}} works: -- The tests and functions are executed automatically, following pre-configured templates tailored for specific model use cases. This ensures that minimum documentation requirements are consistently fulfilled. +- The tests and functions are executed automatically, following pre-configured templates tailored for specific use cases. This ensures that minimum documentation requirements are consistently fulfilled. - The {{< var vm.developer >}} integrates with ETL/data processing pipelines using connector interfaces. This enables the extraction of relationships between raw data sources and their corresponding post-processed datasets, such as those preloaded session instances received from platforms like Spark and Snowflake. ## Extensible by design -{{< var vm.product >}} supports various model types, including:[^2] +{{< var vm.product >}} supports various record (model) types, including:[^2] - Traditional machine learning models (ML) such as tree-based models and neural network models. - Natural language processing models (NLP) for text analysis and understanding. @@ -114,25 +115,25 @@ How the {{< var validmind.developer >}} works: {{< var vm.product >}} is designed to be highly extensible to cater to our customers' specific requirements. You can expand its functionality in the following ways: -- You can easily add support for new models and data types by defining new classes within the {{< var validmind.developer >}}. We provide templates to guide you through this process.[^3] -- To include custom tests in the library, you can define new functions. We offer templates to help you create these custom tests.[^4] -- You have the flexibility to integrate third-party test libraries seamlessly. These libraries can be hosted either locally within your infrastructure or remotely, for example, on GitHub. Leverage additional testing capabilities and resources as needed.[^5] +- You can easily add support for new records and data types by defining new classes within the {{< var validmind.developer >}}. We provide templates to guide you through this process.[^3] +- To include custom tests in the library, you can define new functions. We offer templates to help you create these custom tests.[^4] +- You have the flexibility to integrate third-party test libraries seamlessly. These libraries can be hosted either locally within your infrastructure or remotely, for example, on GitHub. Leverage additional testing capabilities and resources as needed.[^5] ## {{< var validmind.api >}} integration {{< var vm.product >}} imports the following artifacts into the documentation via our {{< var validmind.api >}} integration: -- Metadata about datasets and models, used to lookup programmatic documentation content, such as the stored definition for _common logistic regression limitations_ when a logistic regression model has been passed to the {{< var vm.product >}} test plan to be run. -- Quality and performance metrics collected from datasets and models. -- Output from test and test suites that have been run. +- Metadata about datasets and records (models), used to look up programmatic documentation content, such as the stored definition for _common logistic regression limitations_ when a logistic regression model has been passed to the {{< var vm.product >}} test plan to be run. +- Quality and performance metrics collected from datasets and records. +- Output from tests and test suites that have been run. - Images, plots, visuals that were generated as part of extracting metrics and running tests. ![Artifacts imported into the documentation via our {{< var vm.api >}}](fine-print/overview-api-integration.jpg){width=90% fig-alt="A representation of artifacts imported into the documentation via our Python API"} ::: {.callout-important} -## {{< var vm.product >}} does NOT: -- Send any personal identifiable information (PII) when generating documentation reports. -- Store any customer datasets or models. +## {{< var vm.product >}} does NOT: +- Send any personally identifiable information (PII) when generating documentation reports. +- Store any customer datasets or records. ::: ## Ready to try out {{< var vm.product >}}? @@ -144,9 +145,9 @@ How the {{< var validmind.developer >}} works: -[^1]: [Model risk management](overview-model-risk-management.qmd) +[^1]: [Model risk management](use-cases/model-risk-management.qmd) -[^2]: [Supported models and frameworks](/developer/supported-models-and-frameworks.qmd) +[^2]: [Supported records and frameworks](/developer/supported-records-and-frameworks.qmd) [^3]: [Customize document templates](/guide/templates/customize-document-templates.qmd) diff --git a/site/about/overview-llm-features.qmd b/site/about/overview-llm-features.qmd index 2dc6c1d19d..202c7e1217 100644 --- a/site/about/overview-llm-features.qmd +++ b/site/about/overview-llm-features.qmd @@ -6,7 +6,7 @@ title: "Large language model features" date: last-modified --- -{{< var vm.product >}} offers several specialized features that use large language models (LLMs) to streamline model risk management and ensure regulatory compliance. Here's how we approach these features and what you need to know. +{{< var vm.product >}} offers several specialized features that use large language models (LLMs) to streamline risk management and ensure regulatory compliance. Here's how we approach these features and what you need to know. ::: {.attn} ## {{< fa list-check >}} Our philosophy @@ -30,7 +30,7 @@ Our testing methodologies and philosophy around testing are readily available, a ## Our features -{{< var vm.product >}} enhances model documentation, testing, and compliance workflows, providing your team with tools for effective model governance. +{{< var vm.product >}} enhances documentation, testing, and compliance workflows, providing your team with tools for effective risk governance. ::: {.column-margin .pl3 .pt6} @@ -54,13 +54,13 @@ Why it matters ::: {.w-50-ns .pl2 .pr2} ### Qualitative checks -Leverages metadata from the model inventory, test outcomes, and additional data provided to create qualitative sections within model documentation. +Leverages metadata from the inventory, test outcomes, and additional data provided to create qualitative sections within documentation.

::: {.feature} Why it matters -: Qualitative checks ensure that essential contextual information is accurately documented and aligned with the model's purpose and scope. +: Qualitative checks ensure that essential contextual information is accurately documented and aligned with the record's purpose and scope. ::: ::: @@ -72,7 +72,7 @@ Why it matters ::: {.w-50-ns .pr2} ### Risk assessment -Using data from test results, generates a tailored risk assessment for each section of model documentation. This feature aids in identifying potential risks based on the model’s performance and results. +Using data from test results, generates a tailored risk assessment for each section of documentation. This feature aids in identifying potential risks based on the record's performance and results. ::: {.feature} Why it matters @@ -85,7 +85,7 @@ Why it matters ::: {.w-50-ns .pl2 .pr2} ### {{< var validmind.checker >}} -Reviews documents such as model documentation or validation reports to ensure documents aligns with relevant regulatory requirements. +Reviews documents such as documentation or validation reports to ensure documents align with relevant regulatory requirements.

@@ -97,7 +97,7 @@ Why it matters +Assesses each part of the documentation for adherence to internal guidelines and policies. This tool supports consistent documentation standards across the organization, promoting uniformity in compliance practices. --> ::: :::: @@ -120,7 +120,7 @@ These documents detail our [AI usage policy](https://validmind.com/about/legal/a ::: {.w-50-ns .pr3} ### Try it yourself -Discover how {{< var vm.product >}}’s LLM-powered platform, purpose-built for model risk management teams, enables streamlined and confident testing, documentation, validation, and governance of generative AI models and processes. +Discover how {{< var vm.product >}}’s LLM-powered platform, purpose-built for risk management teams, enables streamlined and confident testing, documentation, validation, and governance of generative AI systems and processes. [Request a Demo](https://validmind.com/request-demo/){.button .button-green} diff --git a/site/about/overview-model-risk-management.qmd b/site/about/overview-model-risk-management.qmd deleted file mode 100644 index 81a6d137ed..0000000000 --- a/site/about/overview-model-risk-management.qmd +++ /dev/null @@ -1,226 +0,0 @@ ---- -# Copyright © 2023-2026 ValidMind Inc. All rights reserved. -# Refer to the LICENSE file in the root of this repository for details. -# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -title: "Model risk management" -date: last-modified -aliases: - - /guide/overview-model-risk-management.html -listing: - id: quickstart - type: grid - grid-columns: 2 - # image-height: 100% - contents: - - path: ../get-started/get-started.qmd - title: "Get started" - description: "Our quickstarts are the quickest and easiest way to try out our product features." - fields: [title, description, reading-time] ---- - -The {{< var validmind.platform >}} offers an integrated solution to manage validation reports, track artifacts, and report on model risk compliance across your model portfolio. {{< var vm.product >}} enables your organization to monitor and manage models effectively, focusing on mitigating risks, maintaining governance, and ensuring compliance throughout the entire enterprise. - -::: {.attn} - -## {{< fa laptop-code >}} The {{< var validmind.platform >}} - -The {{< var validmind.platform >}} provides a comprehensive suite of tools, guidelines, and best practices. You use {{< var vm.product >}} to review and evaluate models and model documentation to ensure they comply with organizational and regulatory requirements. - - - -![The two main components of {{< var vm.product >}}. The {{< var validmind.developer >}} that integrates with your existing developer environment, and the {{< var validmind.platform >}}.](overview-platform.png){width=80% fig-alt="An image showing the two main components of ValidMind. The ValidMind Library that integrates with your existing developer environment, and the ValidMind Platform."} - -The {{< var vm.platform >}} employs a multi-tenant architecture, hosting the cloud-based user interface, APIs, databases, and internal services. The design ensures efficient resource utilization and offers a highly scalable solution for organizations of varying sizes. - -With the {{< var vm.platform >}}, you can: - -- **Track your model inventory**[^1] — Manage the model lifecycle, track the workflow status for models, plan for upcoming validation dates, and more. -- **Work on validation initiatives**[^2] — Collaborate with developers and validators to review documentation, add artifacts, keep track of review statuses, and generate validation reports. -- **Configure workflows**[^3] — Set up the {{< var validmind.platform >}} to follow your existing model risk management processes, manage statuses for different parts of the workflow, and get an end-to-end view of workflows and who is involved. -- **Use, create, or edit tests, test suites, and templates**[^4] — Create and/or configure required validation tests, test suites, and document templates for specific model use cases, tailoring it to your own specific needs. -- **Integrate with your stack**[^5] — Import and export model documentation and validation reports. - -::: - -## Regulatory requirements - -{{< var vm.product >}} is designed to cater to the regulatory compliance and model risk management (MRM) requirements of financial institutions, facilitating enhanced compliance with government regulations, policies concerning MRM, and emerging legislations addressing AI model risk, including risks associated with the use of large language models (LLMs). - -Examples of regulations or policies include: - -:::: {.flex .flex-wrap .justify-around} - -::: {.w-75-ns} - -### SR 11-7: Guidance on Model Risk Management - -The Supervisory Guidance on model risk management issued by the Board of Governors of the Federal Reserve System and the Office of the Comptroller of the Currency in the United States in 2011. It provides comprehensive guidance to financial institutions on developing and maintaining a robust model risk management framework, covering aspects like model development, implementation, use, and validation. - -::: - -::: {.w-20-ns .content-center} - -![Board of Governors Federal Reserve logo](federal-reserve.svg){width=70% fig-alt="Board of Governors Federal Reserve logo"} - -::: - -:::: - -SR 11-7 is widely recognized and has become a benchmark in the industry for model risk management practices. - -SR 11-7 outlines these core requirements: - -Model Risk Management -: - Identify and mitigate risks associated with incorrect or inappropriate model usage, outputs, or implementation errors. -- Encourage "effective challenge" to identify model limitations and propose necessary changes. -- Consider materiality in model risk management based on the extent of model usage and its impact on the organization's financial condition. - -Model Development, Implementation, and Use -: - Develop with a clear statement of purpose, sound design, theory, and logic. -- Assess rigorously data quality and relevance, robust methodologies, and appropriate documentation. -- Test to ensure accuracy, robustness, stability, and to evaluate limitations and assumptions. - -Model Validation -: - Be an integral part for managing model risk, ensuring models perform as intended. -- Identify and address potential errors or misuses. - -Governance, Policies, and Controls -: - Establish a sound governance framework to oversee model risk management. -- Implement policies and controls for appropriate use and validation of models. - -The regulation also mandates ongoing monitoring and periodic reviews to ensure models remain valid and effective. - -:::: {.flex .flex-wrap .justify-around} - -::: {.w-20-ns .content-center} - -![Bank of England logo](bank-of-england.svg){fig-alt="Bank of England logo"} - -::: - -::: {.w-75-ns} - -### SS1/23 – Model Risk Management Principles for Banks - -A policy issued by the Prudential Regulation Authority (PRA) in the UK. It encapsulates the final model risk management principles following feedback on the earlier consultation paper CP6/22. The statement provides guidelines for banks in the UK on managing model risk effectively, with particular emphasis on strategic planning and technical capabilities. - -::: - -:::: - -It outlines principles and amendments, like clarifications on model complexity factors, senior management function responsibilities, and inclusion of dynamic adjustments in model change management, aiming to standardize MRM practices across UK banks and foster the safe adoption of emerging technologies, such as machine learning, artificial intelligence, and large language models (LLMs). - -SS1/23 outlines these core principles: - -Model Identification and Model Risk Classification -: - Ensure a structured approach to accurately identify and categorize models within the model risk management (MRM) framework. -- Facilitate the proper management and oversight of models, aiding in the alignment of model risk management efforts with organizational risks and objectives. - -Governance -: - Establish a structured oversight mechanism for effective model risk management, delineating clear responsibilities and authorities. -- Ensure accountability, transparency, and effective communication within the organization regarding model risks and controls. - -Model Development, Implementation, and Use -: - Emphasize the correct development, deployment, and utilization of models as per the guidelines laid down in the MRM framework. -- Ensure models are developed and utilized in a manner consistent with their intended purposes and within acceptable risk boundaries. - -Independent Model Validation -: - Stress the importance of independent validation to ascertain model performance, accuracy, and identify potential issues. -- Provide an objective assessment of models to ensure they are functioning as intended and to identify any potential areas of improvement or correction. - -Model Risk Mitigants -: - Underline the necessity for measures to mitigate risks associated with model use, including the identification and implementation of controls. -- Help in reducing the potential adverse impact of model risks on the organization’s financial condition, reputation, and regulatory compliance. - -The regulation encourages a proportionate application of these principles based on the size and complexity of the institution. - -### Around the globe - -Other, similar guidelines and policies that {{< var vm.product >}} is designed to help you with include: - -:::: {.flex .flex-wrap .justify-around} - -::: {.w-50-ns} - -#### Guideline-E23: Enterprise-Wide Model Risk Management for Deposit-Taking Institutions - -Issued by the Office of the Superintendent of Financial Institutions (OSFI) in Canada, it outlines minimum prudent practices for model development, review, approval, use, and modification​​. - -::: - -::: {.w-40-ns} - -#### Principles for Model Risk Management - -Issued by the Financial Services Agency (FSA) in Japan in June 2021, this document was finalized after a consultation period and outlines principles for managing model risk​​​. - -::: - -:::: - -::: {.callout title="Read more:"} -- [SR 11-7](https://www.federalreserve.gov/supervisionreg/srletters/sr1107.htm) -- [SS1/23](https://www.bankofengland.co.uk/prudential-regulation/publication/2023/may/model-risk-management-principles-for-banks-ss) -- [Guideline-E23](https://www.osfi-bsif.gc.ca/Eng/fi-if/rg-ro/gdn-ort/gl-ld/Pages/e23.aspx) -- [Principles for Model Risk Management](https://www.fsa.go.jp/common/law/ginkou/pdf_03.pdf) - -::: - -### Meeting regulatory requirements with {{< var vm.product >}} - - As a robust tool for implementing Model Risk Management (MRM) best practices, including the _three lines of defense_, {{< var vm.product >}} significantly aids organizations in adhering to the regulatory guidelines set forth by SR 11-7 and SS1/23. - -:::: {.flex .flex-wrap .justify-around} - -::: {.w-40-ns} - -First line of defense — model developers -: {{< var vm.product >}} offers a suite of tools for model developers, facilitating thorough documentation and rigorous testing of models, aligning with the regulatory expectations of both SR 11-7 and SS1/23, particularly for models under regulatory purview. - -Second line of defense — model validators -: {{< var vm.product >}} empowers model validators with the ability to independently validate models ensuring adherence to the organization's MRM principles throughout the model lifecycle, a core requirement of these regulations. - -Third line of defense — auditors -: Enabling internal and external audits provides an independent and objective assurance to the organization by assessing the robustness of controls within the model risk management framework. It evaluates how well the first and second lines of defense are functioning, ensuring adherence to regulatory and organizational standards, thereby promoting a robust model risk management environment. - -Model inventory -: The {{< var vm.product >}} **{{< fa cubes >}} Inventory** encapsulates a centralized repository for all models, aiding in streamlined tracking, management, and monitoring, simplifying compliance with the inventory mandates specified in SR 11-7 and SS1/23. - -::: - -::: {.w-50-ns} -Lifecycle management and custom workflows -: {{< var vm.product >}}’s capabilities extend to effective model lifecycle management through configurable workflows. This structured approach to managing model risks across various lifecycle stages significantly aids in meeting the rigorous management and oversight expectations set by SR 11-7 and SS1/23. - -Model documentation automation -: By automating model documentation through configurable templates and test plans, {{< var vm.product >}} ensures consistent and accurate documentation capture, directly aligning with the documentation standards stipulated in these regulatory guidelines. - -Model validation and approval -: With automated validation features and comprehensive risk assessment tools, {{< var vm.product >}} aligns with the effective validation criteria and thorough risk evaluation mandates of SR 11-7 and SS1/23. - -Communication and tracking -: The built-in communication and tracking functionality of {{< var vm.product >}} facilitates seamless collaboration and understanding among stakeholders regarding model usage, limitations, and risks, fostering a collaborative environment as encouraged by these regulations. - -By integrating these features, {{< var vm.product >}} provides a comprehensive suite of tools that not only simplifies the path to compliance with SR 11-7 and SS1/23 but also embeds a culture of rigorous and transparent model risk management within the organization. - -::: - -:::: - -## Ready to try out {{< var vm.product >}}? - -:::{#quickstart} -::: - - - - -[^1]: [Working with the inventory](/guide/inventory/working-with-the-inventory.qmd) - -[^2]: [Preparing validation reports](/guide/validation/preparing-validation-reports.qmd) - -[^3]: [Setting up workflows](/guide/workflows/setting-up-workflows.qmd) - -[^4]: [Working with document templates](/guide/templates/working-with-document-templates.qmd) - -[^5]: [Export documents](/guide/reporting/export-documents.qmd) \ No newline at end of file diff --git a/site/about/overview.qmd b/site/about/overview.qmd index 8b1ef72853..af7f860745 100644 --- a/site/about/overview.qmd +++ b/site/about/overview.qmd @@ -32,7 +32,7 @@ aliases: - /about.html --- -{{< var vm.product >}} is the system of record for AI governance. You use {{< var vm.product >}} to model the full lifecycle of AI systems, models, use cases, and tools, along with their dependencies, and automates the governance and documentation you build on top. +{{< var vm.product >}} is the system of record for AI governance. You use {{< var vm.product >}} to model the full lifecycle of AI systems, records (such as models), use cases, and tools, along with their dependencies, and automates the governance and documentation you build on top. Flexible by design, the {{< var vm.platform >}} lets you define your own inventory hierarchy, dependencies, and governance rules, powered by {{< var vm.product >}}’s documentation automation, workflows, and analytics. @@ -40,7 +40,7 @@ Flexible by design, the {{< var vm.platform >}} lets you define your own invento ::: {.column-margin} ::: {.image-container} - + ![](/assets/img/admin-diagram.png) ![](/assets/img/developer-diagram.png) ![](/assets/img/validator-diagram.png) @@ -56,7 +56,8 @@ Flexible by design, the {{< var vm.platform >}} lets you define your own invento ## {{< fa hand-point-right >}} Ready to try out {{< var vm.product >}}? +:::{#validmind-next-steps} ::: -:::{#validmind-next-steps} ::: + diff --git a/site/about/use-cases/eu-ai-act.qmd b/site/about/use-cases/eu-ai-act.qmd index a33f793e66..02639a6180 100644 --- a/site/about/use-cases/eu-ai-act.qmd +++ b/site/about/use-cases/eu-ai-act.qmd @@ -28,7 +28,7 @@ The EU AI Act categorizes AI systems by risk level: ### Harmonization with other standards -The EU AI Act requirements overlap with existing model risk management frameworks. Organizations already following SR 11-7, SS1/23, or E-23 can leverage existing documentation and controls but must also comply with the EU AI Act requirements. +The EU AI Act requirements overlap with existing model risk management frameworks. Organizations already following SR 26-2, SS1/23, or E-23 can leverage existing documentation and controls but must also comply with the EU AI Act requirements. ## 1. Risk management system implementation (Article 9) @@ -230,14 +230,14 @@ Integrate all components into a complete compliance workflow addressing Articles [^3]: [Setting up workflows](/guide/workflows/setting-up-workflows.qmd) -[^4]: [Install and initialize the {{< var validmind.developer >}}](/developer/model-documentation/install-and-initialize-validmind-library.qmd) +[^4]: [Install and initialize the {{< var validmind.developer >}}](/developer/quickstart/install-and-initialize-validmind-library.qmd) [^5]: [Run tests and test suites](/developer/how-to/testing-overview.qmd) [^6]: [Work with document templates](/guide/templates/working-with-document-templates.qmd) -[^7]: [Test descriptions](/developer/test-descriptions.qmd) +[^7]: [{{< var vm.product >}} test sandbox](/developer/how-to/test-sandbox.qmd) [^8]: [Work with content blocks](/guide/documentation/work-with-content-blocks.qmd) diff --git a/site/about/use-cases/model-risk-management.qmd b/site/about/use-cases/model-risk-management.qmd index cae973dca0..b5d0926c25 100644 --- a/site/about/use-cases/model-risk-management.qmd +++ b/site/about/use-cases/model-risk-management.qmd @@ -4,6 +4,9 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial title: "Model risk management" date: last-modified +aliases: + - /about/overview-model-risk-management.html + - /guide/overview-model-risk-management.html listing: id: quickstart type: grid @@ -126,19 +129,19 @@ Lifecycle {{< var vm.product >}} is designed to help organizations comply with MRM regulatory requirements: -### SR 11-7 (United States) +### SR 26-2 (United States) :::: {.columns} ::: {.column width="70%" .pr3} -The Supervisory Guidance on model risk management was issued by the Board of Governors of the Federal Reserve System and the Office of the Comptroller of the Currency. It provides comprehensive guidance on designing and maintaining a robust MRM framework. +*Interagency Guidance on Model Risk Management for Banking Organizations* was issued jointly by the Board of Governors of the Federal Reserve System, the Federal Deposit Insurance Corporation, and the Office of the Comptroller of the Currency. It supersedes SR 11-7 and sets expectations for a risk-based MRM program — including a narrowed model definition, materiality-based tiering, strong ongoing monitoring and outcomes analysis, and a comprehensive enterprise inventory. ::: ::: {.column width="30%" .pl3} -[SR 11-7 implementation guide](sr-11-7.qmd){.button} +[SR 26-2 implementation guide](sr-26-2.qmd){.button} ::: diff --git a/site/about/use-cases/sr-11-7.qmd b/site/about/use-cases/sr-11-7.qmd deleted file mode 100644 index ad4b573fda..0000000000 --- a/site/about/use-cases/sr-11-7.qmd +++ /dev/null @@ -1,226 +0,0 @@ ---- -# Copyright © 2023-2026 ValidMind Inc. All rights reserved. -# Refer to the LICENSE file in the root of this repository for details. -# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -title: "SR 11-7" -date: last-modified ---- - -Implement SR 11-7 compliance using {{< var vm.product >}}. - -{{< include _use-case-summary.qmd >}} - -## Overview - -SR 11-7 (Supervisory Guidance on Model Risk Management)[^1] provides comprehensive guidance to financial institutions on developing and maintaining a robust model risk management framework. By emphasizing the three lines of defense model and the concept of "effective challenge" — SR 11-7 ensures that models are rigorously questioned and tested. - -This guide is organized around the expectations for the three main areas outlined in SR 11-7: - -1. Model development, implementation, and use - -2. Model validation - -3. Governance, policies, and controls - -## 1. Model inventory setup - -#### Purpose - -Establish a comprehensive model inventory aligned to SR 11-7 requirements. - -### Steps - -1. Establish model definition criteria: - - - Define what constitutes a model in your organization. - - Document inclusion and exclusion criteria. - - Classify models by type and purpose. - -2. Configure inventory fields:[^2] - - - Set up materiality and tiering fields. - - Add fields for model purpose and business use. - - Configure risk rating classifications. - -3. Populate the inventory:[^3] - - - Register existing models. - - Document model ownership and stakeholders. - - Track model lifecycle status. - -## 2. Model development documentation - -#### Purpose - -Document models according to SR 11-7 development standards. - -### Steps - -1. Configure document templates:[^4] - - - Select or create templates aligned to SR 11-7. - - Include sections for purpose, design, theory, and logic. - -2. Document purpose, design, theory, and logic:[^5] - - - Clearly state the model's intended use. - - Document the theoretical basis. - - Explain design choices and methodology. - -3. Document data quality and relevance: - - - Assess and document data sources. - - Evaluate data quality and limitations. - - Document data transformations. - -4. Track assumptions and limitations: - - - Document all model assumptions. - - Identify known limitations. - - Establish conditions under which the model should not be used. - -## 3. Testing and outcomes analysis - -#### Purpose - -Test models for accuracy, robustness, and stability per SR 11-7 requirements. - -### Steps - -1. Run automated testing using the {{< var validmind.developer >}}:[^6] - - - Execute accuracy tests. - - Run robustness and stability tests. - - Document test results. - -2. Perform sensitivity analysis and benchmarking: - - - Test model sensitivity to input changes. - - Compare against benchmarks or challenger models. - - Document performance boundaries. - -3. Evaluate limitations and assumptions: - - - Test assumption validity. - - Identify conditions where performance degrades. - - Document testing coverage. - -## 4. Independent validation (effective challenge) - -#### Purpose - -Implement independent validation aligned to SR 11-7's "effective challenge" concept. - -### Steps - -1. Configure validation workflows: - - - Set up validation initiation triggers. - - Define validation scope and requirements. - - Establish independence requirements. - -2. Prepare validation reports:[^7] - - - Use validation report templates. - - Document validation activities. - - Summarize findings and conclusions. - -3. Track findings and remediation:[^8] - - - Document validation findings. - - Assign remediation owners. - - Track remediation progress. - -## 5. Ongoing monitoring and periodic review - -#### Purpose - -Implement ongoing monitoring per SR 11-7 requirements. - -### Steps - -1. Set up monitoring workflows:[^9] - - - Configure monitoring frequency. - - Define monitoring metrics. - - Establish escalation procedures. - -2. Configure performance tracking and alerts: - - - Set up performance dashboards. - - Configure threshold-based alerts. - - Document alert response procedures. - -3. Schedule periodic reviews:[^10] - - - Establish review frequency by model tier. - - Configure review reminders. - - Document review requirements. - -## 6. Governance, policies, and controls - -#### Purpose - -Establish governance framework aligned to SR 11-7. - -### Steps - -1. Configure role-based access:[^11] - - - Set up roles for three lines of defense. - - Configure permissions by role. - - Ensure appropriate segregation of duties. - -2. Configure approval workflows:[^12] - - - Set up model approval processes. - - Define approval authorities. - - Configure escalation paths. - -3. Enable audit trail and compliance reporting:[^12] - - - Configure activity logging. - - Set up compliance dashboards. - - Generate audit reports. - -## Implementation checklist - -- [x] Model definition criteria established -- [x] Model inventory configured and populated -- [x] Document templates aligned to SR 11-7 -- [x] Testing framework implemented -- [x] Validation workflows configured -- [x] Ongoing monitoring established -- [x] Three lines of defense roles configured -- [x] Approval workflows in place -- [x] Audit trail enabled - - - - -[^1]: - - **Board of Governors of the Federal Reserve System:**
- [SR 11-7: Guidance on Model Risk Management](https://www.federalreserve.gov/supervisionreg/srletters/sr1107.htm) - -[^2]: [Manage inventory fields](/guide/inventory/manage-inventory-fields.qmd) - -[^3]: [Working with the inventory](/guide/inventory/working-with-the-inventory.qmd) - -[^4]: [Working with document templates](/guide/templates/working-with-document-templates.qmd) - -[^5]: [Working with documentation](/guide/documentation/working-with-documentation.qmd) - -[^6]: [Run tests and test suites](/developer/how-to/testing-overview.qmd) - -[^7]: [Preparing validation reports](/guide/validation/preparing-validation-reports.qmd) - -[^8]: [Working with artifacts](/guide/validation/working-with-artifacts.qmd) - -[^9]: [Ongoing monitoring](/guide/monitoring/ongoing-monitoring.qmd) - -[^10]: [Setting up workflows](/guide/workflows/setting-up-workflows.qmd) - -[^11]: [Manage permissions](/guide/configuration/manage-permissions.qmd) - -[^12]: [Working with analytics](/guide/reporting/working-with-analytics.qmd) diff --git a/site/about/use-cases/sr-26-2.qmd b/site/about/use-cases/sr-26-2.qmd new file mode 100644 index 0000000000..a37fd2e594 --- /dev/null +++ b/site/about/use-cases/sr-26-2.qmd @@ -0,0 +1,210 @@ +--- +# Copyright © 2023-2026 ValidMind Inc. All rights reserved. +# Refer to the LICENSE file in the root of this repository for details. +# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial +title: "SR 26-2" +date: last-modified +aliases: + - /about/use-cases/sr-11-7.html +--- + +Implement SR 26-2 compliance using {{< var vm.product >}}. + +{{< include _use-case-summary.qmd >}} + +## Overview + +SR 26-2 (*Interagency Guidance on Model Risk Management for Banking Organizations*)[^1] was issued jointly by the Board of Governors of the Federal Reserve System, the Federal Deposit Insurance Corporation, and the Office of the Comptroller of the Currency. It supersedes SR 11-7 and reframes U.S. banking agencies’ expectations for model risk management (MRM). + +SR 26-2 keeps core themes from prior guidance — sound development, independent validation, governance, and effective challenge — while emphasizing a risk-based posture: a narrower definition of what counts as a model, an explicit materiality framework, stronger ongoing monitoring and outcomes analysis, and an enterprise model inventory that supports concentration and dependency visibility. + +::: {.callout title="Generative and agentic AI"} +SR 26-2 explicitly excludes generative AI and agentic AI from its scope because these technologies are novel and rapidly evolving. Underlying MRM principles — including materiality, ongoing monitoring, and effective challenge — still apply when you govern those systems alongside traditional models. +::: + +This guide organizes platform actions around four priorities that align with SR 26-2: + +1. **Distinguish models from non-models** — Focus MRM on *complex quantitative methods* that apply statistical, economic, or financial theory, rather than on simple arithmetic, purely deterministic rules, or software without substantive theoretical underpinning. + +2. **Assess and tier models by materiality** — Use model exposure and model purpose so high-materiality models receive commensurate rigor while lower-materiality models can use more automated governance where appropriate. + +3. **Emphasize ongoing monitoring and outcomes analysis** — Treat continuous performance tracking, outcomes testing, and escalation as first-class obligations, not only point-in-time validation events. + +4. **Maintain a comprehensive model inventory** — Keep enterprise visibility into models, dependencies, concentrations, and aggregate risk. + +## 1. Distinguish models from non-models + +#### Purpose + +Apply a clear, documented definition of *model* versus *non-model* so MRM effort targets SR 26-2 scope. + +### Steps + +1. Document inclusion and exclusion criteria: + + - Define which methods count as models under SR 26-2’s narrowed definition. + - Record exclusions (for example, simple spreadsheets, deterministic rules without quantitative theory) and the rationale. + +2. Align inventory registration rules:[^2] + + - Configure inventory fields that capture model type, theoretical basis, and intended analytical use. + - Ensure non-models are not inadvertently treated as in-scope models. + +3. Review the portfolio periodically: + + - Re-evaluate borderline tools when business use or complexity changes. + - Update documentation when classification decisions change. + +## 2. Assess and tier models by materiality + +#### Purpose + +Implement SR 26-2’s materiality lens — combining exposure and purpose — so controls scale with risk. + +### Steps + +1. Configure materiality and tiering fields:[^2] + + - Map organizational tiers to exposure and purpose dimensions. + - Align tiering to validation depth, monitoring frequency, and approval paths. + +2. Document purpose and business use:[^3] + + - Capture how each model affects decisions, capital, liquidity, or customers. + - Link materiality to reporting and committee oversight where required. + +3. Apply tier-aware workflows:[^10] + + - Automate reminders and approvals based on tier. + - Route high-materiality models to stricter documentation and validation templates. + +## 3. Ongoing monitoring, testing, and outcomes analysis + +#### Purpose + +Meet SR 26-2’s heightened expectations for continuous insight into model performance and outcomes, complementing periodic validation. + +### Steps + +1. Run automated testing in the {{< var validmind.developer >}}:[^6] + + - Execute accuracy, robustness, and stability tests on a defined cadence. + - Store results as evidence for monitoring and review. + +2. Configure monitoring and alerts:[^9] + + - Define metrics, thresholds, and escalation paths by materiality tier. + - Integrate outcomes analysis (for example, performance drift, decision quality) into review cycles. + +3. Schedule periodic reviews with monitoring context:[^10] + + - Combine monitoring dashboards, incidents, and validation history in each review. + - Document conclusions and required actions. + +## 4. Model development documentation + +#### Purpose + +Document models so development choices, data, theory, and limitations are transparent and reviewable. + +### Steps + +1. Configure document templates:[^4] + + - Align sections to SR 26-2 expectations for purpose, design, theory, and logic. + - Tier template depth by materiality. + +2. Document purpose, design, theory, and logic:[^5] + + - State intended use and decisions supported. + - Explain methodology, key assumptions, and known limitations. + +3. Document data quality and relevance: + + - Record data sources, transformations, and quality assessments. + - Highlight data gaps that affect reliability. + +## 5. Independent validation (effective challenge) + +#### Purpose + +Preserve effective challenge for in-scope models — independent validation that identifies limitations and supports sound use. + +### Steps + +1. Configure validation workflows: + + - Define independence rules and scope by materiality. + - Trigger validation when models change tier or materiality drivers shift. + +2. Prepare validation reports:[^7] + + - Use templates that capture findings, limitations, and conditions of use. + - Tie conclusions to monitoring metrics where applicable. + +3. Track findings and remediation:[^8] + + - Assign owners, due dates, and retest evidence. + - Close the loop into monitoring and inventory metadata. + +## 6. Governance, policies, and controls + +#### Purpose + +Operate a governance framework — policies, roles, approvals, and auditability — that matches SR 26-2’s risk-based MRM program. + +### Steps + +1. Configure role-based access:[^11] + + - Map the three lines of defense to platform roles. + - Enforce segregation of duties for high-materiality models. + +2. Configure approval workflows:[^12] + + - Encode model approvals, exceptions, and retirements. + - Maintain an audit trail for supervisory and internal review. + +3. Enable audit trail and compliance reporting:[^12] + + - Use activity logs and analytics for evidence of control operation. + - Export reports for committees and audits. + +## Implementation checklist + +- [x] Model versus non-model criteria documented and reflected in the inventory +- [x] Materiality and tiering configured with purpose and exposure fields +- [x] Ongoing monitoring, alerts, and outcomes analysis in place +- [x] Development documentation templates aligned to SR 26-2 +- [x] Validation workflows and effective challenge operating by tier +- [x] Governance roles, approvals, and audit evidence configured + + + + +[^1]: + + **Board of Governors of the Federal Reserve System, FDIC, and OCC:**
+ [SR 26-2: Interagency Guidance on Model Risk Management for Banking Organizations](https://www.federalreserve.gov/supervisionreg/srletters/SR2602.htm) (April 17, 2026; supersedes SR 11-7) + +[^2]: [Manage inventory fields](/guide/inventory/manage-inventory-fields.qmd) + +[^3]: [Working with the inventory](/guide/inventory/working-with-the-inventory.qmd) + +[^4]: [Working with document templates](/guide/templates/working-with-document-templates.qmd) + +[^5]: [Working with documentation](/guide/documentation/working-with-documentation.qmd) + +[^6]: [Run tests and test suites](/developer/how-to/testing-overview.qmd) + +[^7]: [Preparing validation reports](/guide/validation/preparing-validation-reports.qmd) + +[^8]: [Working with artifacts](/guide/validation/working-with-artifacts.qmd) + +[^9]: [Ongoing monitoring](/guide/monitoring/ongoing-monitoring.qmd) + +[^10]: [Setting up workflows](/guide/workflows/setting-up-workflows.qmd) + +[^11]: [Manage permissions](/guide/configuration/manage-permissions.qmd) + +[^12]: [Working with analytics](/guide/reporting/working-with-analytics.qmd) diff --git a/site/about/contributing/using-the-documentation.qmd b/site/about/using-the-documentation.qmd similarity index 82% rename from site/about/contributing/using-the-documentation.qmd rename to site/about/using-the-documentation.qmd index 4757f97f7b..39f59870e6 100644 --- a/site/about/contributing/using-the-documentation.qmd +++ b/site/about/using-the-documentation.qmd @@ -4,9 +4,11 @@ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial title: "Using the documentation" date: last-modified +aliases: + - /about/contributing/using-the-documentation.html --- -This documentation site helps you learn {{< var vm.product >}}, implement it in your organization, govern your AI/ML models, and operate the platform day to day. +This documentation site helps you learn {{< var vm.product >}}, implement it in your organization, govern your AI/ML records (models), and operate the platform day to day. ## How to use this site @@ -25,7 +27,7 @@ Introduces the platform, its use cases, and deployment options. - [About {{< var vm.product >}}](/about/overview.qmd) — Platform overview and capabilities - [AI governance](/about/use-cases/ai-governance.qmd) — EU AI Act compliance and risk classification -- [Model risk management](/about/use-cases/model-risk-management.qmd) — SR 11-7, SS1/23, and E-23 compliance +- [Model risk management](/about/use-cases/model-risk-management.qmd) — SR 26-2, SS1/23, and E-23 compliance - [Library and platform](/about/library-and-platform.qmd) — How the {{< var validmind.developer >}} and {{< var validmind.platform >}} work together - [Deployment options](/about/deployment/deployment-options.qmd) — Multi-tenant cloud vs. Virtual Private {{< var vm.product >}} @@ -35,7 +37,7 @@ Introduces the platform, its use cases, and deployment options. Role-based quickstarts to help you begin using {{< var vm.product >}} quickly. -- [Developer quickstart](/get-started/developer/quickstart-developer.qmd) — Set up your environment and document your first model +- [Developer quickstart](/get-started/developer/quickstart-developer.qmd) — Set up your environment and document your first record (model) - [Validator quickstart](/get-started/validator/quickstart-validator.qmd) — Review documentation and prepare validation reports - [Administrator quickstart](/get-started/administrator/quickstart-administrator.qmd) — Configure users, roles, and organization settings @@ -49,14 +51,14 @@ Step-by-step instructions for platform tasks, organized by feature area. |---------|--------|---------------| | [Access](/guide/guides.qmd#access) | Signing up for and logging into {{< var vm.product >}} | Register, sign in via SSO, recover access | | [Configuration](/guide/guides.qmd#configuration) | Setting up your organization and users | Add users, create groups, assign roles and permissions | -| [Integrations](/guide/integrations/managing-integrations.qmd) | Connecting {{< var vm.product >}} to external systems | Manage secrets, configure connections, link external models | -| [Workflows](/guide/guides.qmd#workflows) | Automating model lifecycle processes | Configure workflow steps, manage transitions, set up approvals | -| [Inventory](/guide/guides.qmd#inventory) | Managing your model and record inventory | Register records, edit fields, configure interdependencies | +| [Integrations](/guide/integrations/managing-integrations.qmd) | Connecting {{< var vm.product >}} to external systems | Manage secrets, configure connections, link external records (models) | +| [Workflows](/guide/guides.qmd#workflows) | Automating lifecycle processes | Configure workflow steps, manage transitions, set up approvals | +| [Inventory](/guide/guides.qmd#inventory) | Managing your records (models) and record inventory | Register records, edit fields, configure interdependencies | | [Documents & templates](/guide/templates/working-with-documents.qmd) | Creating and customizing documentation | Manage document types, customize templates, use the text block library | -| [Model documentation](/guide/guides.qmd#model-documentation) | Authoring and collaborating on model docs | Edit content blocks, add test results, manage versions, submit for approval | -| [Model validation](/guide/guides.qmd#model-validation) | Reviewing and validating models | Review documentation, assess compliance, manage findings and artifacts | +| [Documentation](/guide/guides.qmd#documentation) | Authoring and collaborating on documents | Edit content blocks, add test results, manage versions, submit for approval | +| [Validation](/guide/guides.qmd#validation) | Reviewing and validating records (models) | Review documentation, assess compliance, manage findings and artifacts | | [Reporting](/guide/guides.qmd#reporting) | Analyzing and exporting data | View reports, create custom analytics, export inventory and documents | -| [Monitoring](/guide/guides.qmd#monitoring) | Tracking model performance over time | Enable monitoring, review results, set thresholds and alerts | +| [Monitoring](/guide/guides.qmd#monitoring) | Tracking record (model) performance over time | Enable monitoring, review results, set thresholds and alerts | | [Attestation](/guide/guides.qmd#attestation) | Managing formal attestations | Create, submit, review, and approve attestations | ### [{{< var validmind.developer >}}](/developer/validmind-library.qmd) @@ -65,7 +67,7 @@ Resources for developers integrating {{< var vm.product >}} into their workflows - [{{< var validmind.developer >}}](/developer/validmind-library.qmd) — Python library overview and installation - [Code samples](/developer/samples-jupyter-notebooks.qmd) — Jupyter notebooks for common use cases -- [Test descriptions](/developer/test-descriptions.qmd) — Reference for available validation tests +- [{{< var vm.product >}} test sandbox](/developer/how-to/test-sandbox.qmd) — Reference for available validation tests - [{{< var validmind.api >}}](/validmind/validmind.qmd) — Python API documentation - [Public REST API](/reference/validmind-rest-api-vm.qmd) — REST API for platform integrations diff --git a/site/about/contributing/validmind-chatbot.png b/site/about/validmind-chatbot.png similarity index 100% rename from site/about/contributing/validmind-chatbot.png rename to site/about/validmind-chatbot.png diff --git a/site/developer/_sidebar.yaml b/site/developer/_sidebar.yaml index a9d08ebe6c..399bd86069 100644 --- a/site/developer/_sidebar.yaml +++ b/site/developer/_sidebar.yaml @@ -10,44 +10,45 @@ website: # USING THE VARIABLE IN THE LINK TEXT MESSES UP THE MOBILE VIEW - text: "ValidMind Library" file: developer/validmind-library.qmd - - developer/supported-models-and-frameworks.qmd + - developer/supported-records-and-frameworks.qmd - text: "---" - text: "Quickstart" - - notebooks/quickstart/quickstart_model_documentation.ipynb - - notebooks/quickstart/quickstart_model_validation.ipynb + - notebooks/quickstart/quickstart_documentation.ipynb + - notebooks/quickstart/quickstart_validation.ipynb # USING THE VARIABLE IN THE LINK TEXT MESSES UP THE MOBILE VIEW & BREADCRUMB - section: "Install and initialize ValidMind" contents: - text: "Install and initialize the library" - file: developer/model-documentation/install-and-initialize-validmind-library.qmd + file: developer/quickstart/install-and-initialize-validmind-library.qmd - text: "Install and initialize the library for R" - file: developer/model-documentation/install-and-initialize-validmind-for-r.qmd + file: developer/quickstart/install-and-initialize-validmind-for-r.qmd - text: "Use an HTTP proxy with the library" - file: developer/model-documentation/use-http-proxy-with-validmind-library.qmd - - developer/model-documentation/store-credentials-in-env-file.qmd + file: developer/quickstart/use-http-proxy-with-validmind-library.qmd + - text: "Store credentials in `.env` files" + file: developer/quickstart/store-credentials-in-env-file.qmd - text: "---" - text: "End-to-End Tutorials" # USING THE VARIABLE IN THE LINK TEXT MESSES UP THE MOBILE VIEW & BREADCRUMB - - section: "Model development" + - section: "Development" contents: - text: "1 — Set up ValidMind Library" - file: notebooks/tutorials/model_development/1-set_up_validmind.ipynb - - text: "2 — Start model development process" - file: notebooks/tutorials/model_development/2-start_development_process.ipynb + file: notebooks/tutorials/development/1-set_up_validmind.ipynb + - text: "2 — Start the development process" + file: notebooks/tutorials/development/2-start_development_process.ipynb - text: "3 — Integrate custom tests" - file: notebooks/tutorials/model_development/3-integrate_custom_tests.ipynb + file: notebooks/tutorials/development/3-integrate_custom_tests.ipynb - text: "4 — Finalize testing & documentation" - file: notebooks/tutorials/model_development/4-finalize_testing_documentation.ipynb - - section: "Model validation" + file: notebooks/tutorials/development/4-finalize_testing_documentation.ipynb + - section: "Validation" contents: - text: "1 — Set up ValidMind Library for validation" - file: notebooks/tutorials/model_validation/1-set_up_validmind_for_validation.ipynb - - text: "2 —\u00A0Start model validation process" - file: notebooks/tutorials/model_validation/2-start_validation_process.ipynb - - text: "3 — Developing a challenger model" - file: notebooks/tutorials/model_validation/3-developing_challenger_model.ipynb + file: notebooks/tutorials/validation/1-set_up_validmind_for_validation.ipynb + - text: "2 — Start the validation process" + file: notebooks/tutorials/validation/2-start_validation_process.ipynb + - text: "3 — Developing a challenger" + file: notebooks/tutorials/validation/3-developing_potential_challenger.ipynb - text: "4 — Finalize validation & reporting" - file: notebooks/tutorials/model_validation/4-finalize_validation_reporting.ipynb + file: notebooks/tutorials/validation/4-finalize_validation_reporting.ipynb - text: "---" - text: "How-To" - text: "Run tests & test suites" @@ -57,11 +58,10 @@ website: contents: - notebooks/how_to/tests/explore_tests/explore_tests.ipynb - notebooks/how_to/tests/explore_tests/explore_test_suites.ipynb - - developer/how-to/test-sandbox.qmd - section: "Run tests" contents: - - notebooks/how_to/tests/run_tests/1_run_dataset_based_tests.ipynb - - notebooks/how_to/tests/run_tests/2_run_comparison_tests.ipynb + - notebooks/how_to/tests/run_tests/1-run_dataset-based_tests.ipynb + - notebooks/how_to/tests/run_tests/2-run_comparison_tests.ipynb - section: "Configuring tests" contents: "notebooks/how_to/tests/run_tests/configure_tests/*.ipynb" - section: "Using tests in documentation" @@ -97,8 +97,6 @@ website: contents: "notebooks/use_cases/code_explainer/**/*.ipynb" - section: "Credit risk" contents: "notebooks/use_cases/credit_risk/**/*.ipynb" - - section: "Model validation" - contents: "notebooks/use_cases/model_validation/**/*.ipynb" - section: "NLP and LLM" contents: "notebooks/use_cases/nlp_and_llm/**/*.ipynb" - section: "Ongoing monitoring" @@ -107,11 +105,12 @@ website: contents: "notebooks/use_cases/regression/**/*.ipynb" - section: "Time series" contents: "notebooks/use_cases/time_series/**/*.ipynb" + - section: "Validation" + contents: "notebooks/use_cases/validation/**/*.ipynb" - text: "---" - text: "Reference" - - text: "Test descriptions" - file: developer/test-descriptions.qmd - contents: tests/** + - text: "{{< var vm.product >}} test sandbox" + file: developer/how-to/test-sandbox.qmd - text: "{{< var validmind.api >}}" file: validmind/validmind.qmd # USING THE VARIABLE IN THE LINK TEXT MESSES UP THE MOBILE VIEW & BREADCRUMB diff --git a/site/developer/how-to/test-sandbox.qmd b/site/developer/how-to/test-sandbox.qmd index c9ea3a23c3..5eedac7292 100644 --- a/site/developer/how-to/test-sandbox.qmd +++ b/site/developer/how-to/test-sandbox.qmd @@ -2,19 +2,40 @@ # Copyright © 2023-2026 ValidMind Inc. All rights reserved. # Refer to the LICENSE file in the root of this repository for details. # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial -title: "Test sandbox [beta]{.smallcaps}" -date: last-modified +pagetitle: "{{< var vm.product >}} test sandbox" +title-block-style: none +bread-crumbs: false +page-layout: full aliases: - /guide/test-sandbox.html - /developer/model-testing/test-sandbox.html + - /developer/test-descriptions.html + - /guide/test-descriptions.html + - /developer/model-testing/test-descriptions.html --- - -Explore our interactive sandbox to see what tests are available in the {{< var validmind.developer >}} and how you can use them in your own code. +```{=html} + +``` -::: {.column-screen-right} +::: {.column-screen} ```{=html}