diff --git a/notebooks/tutorials/validation/2-start_validation_process.ipynb b/notebooks/tutorials/validation/2-start_validation_process.ipynb index 8d7321060..e729c2ee3 100644 --- a/notebooks/tutorials/validation/2-start_validation_process.ipynb +++ b/notebooks/tutorials/validation/2-start_validation_process.ipynb @@ -106,7 +106,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Make sure the ValidMind Library is installed\n", "\n", @@ -128,9 +130,7 @@ " # model=\"...\",\n", " document=\"validation-report\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -152,7 +152,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "from validmind.datasets.classification import customer_churn as demo_dataset\n", "\n", @@ -162,9 +164,7 @@ "\n", "raw_df = demo_dataset.load_data()\n", "raw_df.head()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -195,23 +195,23 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Get the list of available task types\n", "sorted(vm.tests.list_tasks())" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Get the list of available tags\n", "sorted(vm.tests.list_tags())" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -224,12 +224,12 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "vm.tests.list_tests(task=\"classification\", tags=[\"tabular_data\", \"data_quality\"])" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -259,7 +259,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# vm_raw_dataset is now a VMDataset object that you can pass to any ValidMind test\n", "vm_raw_dataset = vm.init_dataset(\n", @@ -267,9 +269,7 @@ " input_id=\"raw_dataset\",\n", " target_column=\"Exited\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -302,12 +302,12 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "vm.tests.describe_test(\"validmind.data_validation.DescriptiveStatistics\")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -318,16 +318,16 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "result2 = vm.tests.run_test(\n", " test_id=\"validmind.data_validation.ClassImbalance\",\n", " inputs={\"dataset\": vm_raw_dataset},\n", " params={\"min_percent_threshold\": 30},\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -340,7 +340,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "import pandas as pd\n", "\n", @@ -352,9 +354,7 @@ "\n", "balanced_raw_df = pd.concat([exited_df, not_exited_df])\n", "balanced_raw_df = balanced_raw_df.sample(frac=1, random_state=42)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -367,7 +367,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Register new data and now 'balanced_raw_dataset' is the new dataset object of interest\n", "vm_balanced_raw_dataset = vm.init_dataset(\n", @@ -375,13 +377,13 @@ " input_id=\"balanced_raw_dataset\",\n", " target_column=\"Exited\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Pass the initialized `balanced_raw_dataset` as input into the test run\n", "result = vm.tests.run_test(\n", @@ -389,9 +391,7 @@ " inputs={\"dataset\": vm_balanced_raw_dataset},\n", " params={\"min_percent_threshold\": 30},\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -410,16 +410,16 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "corr_result = vm.tests.run_test(\n", " test_id=\"validmind.data_validation.HighPearsonCorrelation\",\n", " params={\"max_threshold\": 0.3},\n", " inputs={\"dataset\": vm_balanced_raw_dataset},\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -432,16 +432,16 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "print(type(corr_result))\n", "print(\"Result ID: \", corr_result.result_id)\n", "print(\"Params: \", corr_result.params)\n", "print(\"Passed: \", corr_result.passed)\n", "print(\"Tables: \", corr_result.tables)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -454,25 +454,25 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Extract table from `corr_result.tables`\n", "features_df = corr_result.tables[0].data\n", "features_df" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Extract list of features that failed the test\n", "high_correlation_features = features_df[features_df[\"Pass/Fail\"] == \"Fail\"][\"Columns\"].tolist()\n", "high_correlation_features" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -483,13 +483,13 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "high_correlation_features = [feature.split(\",\")[0].strip(\"()\") for feature in high_correlation_features]\n", "high_correlation_features" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -502,7 +502,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Remove the highly correlated features from the dataset\n", "balanced_raw_no_age_df = balanced_raw_df.drop(columns=high_correlation_features)\n", @@ -513,9 +515,7 @@ " input_id=\"raw_dataset_preprocessed\",\n", " target_column=\"Exited\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -526,16 +526,16 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "corr_result = vm.tests.run_test(\n", " test_id=\"validmind.data_validation.HighPearsonCorrelation\",\n", " params={\"max_threshold\": 0.3},\n", " inputs={\"dataset\": vm_raw_dataset_preprocessed},\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -546,15 +546,15 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "corr_result = vm.tests.run_test(\n", " test_id=\"validmind.data_validation.PearsonCorrelationMatrix\",\n", " inputs={\"dataset\": vm_raw_dataset_preprocessed},\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -589,7 +589,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Individual test config with inputs specified\n", "test_config = {\n", @@ -602,9 +604,7 @@ " \"params\": {\"max_threshold\": 0.3}\n", " },\n", "}" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -615,7 +615,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "for t in test_config:\n", " print(t)\n", @@ -635,9 +637,7 @@ " vm.tests.run_test(t, inputs=test_config[t]['inputs']).log()\n", " except Exception as e:\n", " print(f\"Error running test {t}: {str(e)}\")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -666,7 +666,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "result = vm.tests.run_test(\n", " test_id=\"validmind.data_validation.HighPearsonCorrelation:balanced_raw_dataset\",\n", @@ -674,9 +676,7 @@ " inputs={\"dataset\": vm_balanced_raw_dataset},\n", ")\n", "result.log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -694,28 +694,23 @@ "\n", "2. In the left sidebar that appears for your model, click **Validation** under Documents.\n", "\n", - "3. Locate the Data Preparation section and click on **2.2.1. Data Quality** to expand that section.\n", + "3. Click on **2.2.1. Data Quality** to expand that section.\n", "\n", - "4. Under the Class Imbalance Assessment section, locate Validator Evidence then click **Link Evidence to Report**:\n", + "4. Under the Class Imbalance Assessment guideline, click **Evidence** to expand the evidence panel.\n", "\n", - " \"Screenshot\n", - "

\n", + "5. Click **Link Evidence**, then select **Validator Evidence**.\n", "\n", - "5. Select the Class Imbalance test results we logged: **ValidMind Data Validation Class Imbalance** \n", + "6. Select the Class Imbalance test results we logged: **ValidMind Data Validation Class Imbalance**\n", "\n", " \"Screenshot\n", "

\n", "\n", - "6. Click **Update Linked Evidence** to add the test results to the validation report.\n", + "7. Click **Update Linked Evidence** to add the test results to the validation report.\n", "\n", - " Confirm that the results for the Class Imbalance test you inserted has been correctly inserted into section **2.2.1. Data Quality** of the report:\n", + "8. Confirm that the results for the Class Imbalance test you inserted has been correctly inserted into section **2.2.1. Data Quality** of the report.\n", "\n", - " \"Screenshot\n", - "

\n", - "\n", - "7. Note that these test results are flagged as **Requires Attention** — as they include comparative results from our initial raw dataset.\n", - "\n", - " Click **See evidence details** to review the LLM-generated description that summarizes the test results, that confirm that our final preprocessed dataset actually passes our test:\n", + " - Note that these test results are flagged as **Requires Attention** — as they include comparative results from our initial raw dataset.\n", + " - Click **See evidence details** to review the LLM-generated description that summarizes the test results, that confirm that our final preprocessed dataset actually passes our test:\n", "\n", " \"Screenshot\n", "

\n", @@ -749,12 +744,12 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "balanced_raw_no_age_df.head()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -768,15 +763,15 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "balanced_raw_no_age_df = pd.get_dummies(\n", " balanced_raw_no_age_df, columns=[\"Geography\", \"Gender\"], drop_first=True\n", ")\n", "balanced_raw_no_age_df.head()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -790,7 +785,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", @@ -800,9 +797,7 @@ "y_train = train_df[\"Exited\"]\n", "X_test = test_df.drop(\"Exited\", axis=1)\n", "y_test = test_df[\"Exited\"]" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -817,7 +812,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "vm_train_ds = vm.init_dataset(\n", " input_id=\"train_dataset_final\",\n", @@ -830,9 +827,7 @@ " dataset=test_df,\n", " target_column=\"Exited\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", diff --git a/notebooks/tutorials/validation/3-developing_potential_challenger.ipynb b/notebooks/tutorials/validation/3-developing_potential_challenger.ipynb index ca9cb71f6..4fac005c5 100644 --- a/notebooks/tutorials/validation/3-developing_potential_challenger.ipynb +++ b/notebooks/tutorials/validation/3-developing_potential_challenger.ipynb @@ -111,7 +111,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Make sure the ValidMind Library is installed\n", "\n", @@ -133,9 +135,7 @@ " # model=\"...\",\n", " document=\"validation-report\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -150,7 +150,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Load the sample dataset\n", "from validmind.datasets.classification import customer_churn as demo_dataset\n", @@ -160,9 +162,7 @@ ")\n", "\n", "raw_df = demo_dataset.load_data()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -177,7 +177,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "import pandas as pd\n", "\n", @@ -189,9 +191,7 @@ "\n", "balanced_raw_df = pd.concat([exited_df, not_exited_df])\n", "balanced_raw_df = balanced_raw_df.sample(frac=1, random_state=42)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -204,7 +204,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Register new data and now 'balanced_raw_dataset' is the new dataset object of interest\n", "vm_balanced_raw_dataset = vm.init_dataset(\n", @@ -212,9 +214,7 @@ " input_id=\"balanced_raw_dataset\",\n", " target_column=\"Exited\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -225,7 +225,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Run HighPearsonCorrelation test with our balanced dataset as input and return a result object\n", "corr_result = vm.tests.run_test(\n", @@ -233,42 +235,40 @@ " params={\"max_threshold\": 0.3},\n", " inputs={\"dataset\": vm_balanced_raw_dataset},\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# From result object, extract table from `corr_result.tables`\n", "features_df = corr_result.tables[0].data\n", "features_df" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Extract list of features that failed the test\n", "high_correlation_features = features_df[features_df[\"Pass/Fail\"] == \"Fail\"][\"Columns\"].tolist()\n", "high_correlation_features" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Extract feature names from the list of strings\n", "high_correlation_features = [feature.split(\",\")[0].strip(\"()\") for feature in high_correlation_features]\n", "high_correlation_features" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -279,7 +279,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Remove the highly correlated features from the dataset\n", "balanced_raw_no_age_df = balanced_raw_df.drop(columns=high_correlation_features)\n", @@ -290,13 +292,13 @@ " input_id=\"raw_dataset_preprocessed\",\n", " target_column=\"Exited\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Re-run the test with the reduced feature set\n", "corr_result = vm.tests.run_test(\n", @@ -304,9 +306,7 @@ " params={\"max_threshold\": 0.3},\n", " inputs={\"dataset\": vm_raw_dataset_preprocessed},\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -321,20 +321,22 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Encode categorical features in the dataset\n", "balanced_raw_no_age_df = pd.get_dummies(\n", " balanced_raw_no_age_df, columns=[\"Geography\", \"Gender\"], drop_first=True\n", ")\n", "balanced_raw_no_age_df.head()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", @@ -345,13 +347,13 @@ "y_train = train_df[\"Exited\"]\n", "X_test = test_df.drop(\"Exited\", axis=1)\n", "y_test = test_df[\"Exited\"]" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Initialize the split datasets\n", "vm_train_ds = vm.init_dataset(\n", @@ -365,9 +367,7 @@ " dataset=test_df,\n", " target_column=\"Exited\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -382,16 +382,16 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Import the champion model\n", "import pickle as pkl\n", "\n", "with open(\"lr_model_champion.pkl\", \"rb\") as f:\n", " log_reg = pkl.load(f)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -421,7 +421,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Import the Random Forest Classification model\n", "from sklearn.ensemble import RandomForestClassifier\n", @@ -434,9 +436,7 @@ "\n", "# Train the model\n", "rf_model.fit(X_train, y_train)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -456,7 +456,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Initialize the champion logistic regression model\n", "vm_log_model = vm.init_model(\n", @@ -469,9 +471,7 @@ " rf_model,\n", " input_id=\"rf_model\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -491,7 +491,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Champion — Logistic regression model\n", "vm_train_ds.assign_predictions(model=vm_log_model)\n", @@ -500,9 +502,7 @@ "# Challenger — Random forest classification model\n", "vm_train_ds.assign_predictions(model=vm_rf_model)\n", "vm_test_ds.assign_predictions(model=vm_rf_model)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -530,13 +530,13 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "\n", "vm.tests.list_tests(tags=[\"model_performance\"], task=\"classification\")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -555,7 +555,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "mpt = [\n", " \"validmind.model_validation.sklearn.ClassifierPerformance:logreg_champion\",\n", @@ -564,9 +566,7 @@ " \"validmind.model_validation.sklearn.MinimumF1Score:logreg_champion\",\n", " \"validmind.model_validation.sklearn.ROCCurve:logreg_champion\"\n", "]" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -584,7 +584,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "for test in mpt:\n", " vm.tests.run_test(\n", @@ -593,9 +595,7 @@ " \"dataset\": vm_test_ds, \"model\" : vm_log_model,\n", " },\n", " ).log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -620,29 +620,28 @@ "\n", "2. In the left sidebar that appears for your model, click **Validation** under Documents.\n", "\n", - "3. Locate the Data Preparation section and click on **2.2.2. Model Performance** to expand that section.\n", - "\n", - "4. Under the Model Performance Metrics section, locate Artifacts then click **Link Artifact to Report**:\n", - "\n", - " \"Screenshot\n", - "

\n", + "3. Click on **2.2.2. Model Performance** to expand that section.\n", "\n", - "5. Select **Validation Issue** as the type of artifact.\n", + "4. Under the Model Performance Metrics guideline, click to expand the **Artifacts** panel.\n", "\n", - "6. Click **+ Add Validation Issue** to add a validation issue type artifact.\n", + "5. Click **Link Artifact** and select **Validation Issue** as the type of artifact.\n", "\n", - "76. Enter in the details for your validation issue, for example:\n", + "6. Click **+ Add Validation Issue** and enter in the details for your validation issue, for example:\n", "\n", " - **TITLE** — Champion Logistic Regression Model Fails Minimum Accuracy Threshold\n", " - **RISK AREA** — Model Performance\n", " - **DOCUMENTATION SECTION** — 3.2. Model Evaluation\n", " - **DESCRIPTION** — The logistic regression champion model was subjected to a Minimum Accuracy test to determine whether its predictive accuracy meets the predefined performance threshold of 0.7. The model achieved an accuracy score of 0.6136, which falls below the required minimum. As a result, the test produced a Fail outcome.\n", "\n", - "8. Click **Save**.\n", + "7. Click **Add Validation Issue** to submit the validation issue.\n", + "\n", + "8. Select the validation issue you just added to link to your validation report.\n", + "\n", + "9. Click **Update Linked Artifacts** to insert your validation issue.\n", "\n", - "9. Select the validation issue you just added to link to your validation report and click **Update Linked Artifacts** to insert your validation issue.\n", + "10. Confirm that the validation issue you inserted has been correctly inserted into section 2.2.2. Model Performance of the report.\n", "\n", - "10. Click on the validation issue to expand the issue, where you can adjust details such as severity, owner, due date, status, etc. as well as include proposed remediation plans or supporting documentation as attachments." + "11. Click on the validation issue to expand the issue, where you can adjust details such as severity, owner, due date, status, etc. as well as include proposed remediation plans or supporting documentation as attachments." ] }, { @@ -660,7 +659,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "mpt_chall = [\n", " \"validmind.model_validation.sklearn.ClassifierPerformance:champion_vs_challenger\",\n", @@ -669,9 +670,7 @@ " \"validmind.model_validation.sklearn.MinimumF1Score:champion_vs_challenger\",\n", " \"validmind.model_validation.sklearn.ROCCurve:champion_vs_challenger\"\n", "]" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -682,7 +681,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "for test in mpt_chall:\n", " vm.tests.run_test(\n", @@ -691,9 +692,7 @@ " \"dataset\": [vm_test_ds], \"model\" : [vm_log_model,vm_rf_model]\n", " }\n", " ).log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -719,12 +718,12 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "vm.tests.list_tests(tags=[\"model_diagnosis\"], task=\"classification\")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -740,7 +739,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "vm.tests.run_test(\n", " test_id=\"validmind.model_validation.sklearn.OverfitDiagnosis:champion_vs_challenger\",\n", @@ -749,9 +750,7 @@ " \"model\" : [vm_log_model,vm_rf_model]\n", " }\n", ").log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -764,7 +763,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "vm.tests.run_test(\n", " test_id=\"validmind.model_validation.sklearn.RobustnessDiagnosis:Champion_vs_LogRegression\",\n", @@ -773,9 +774,7 @@ " \"model\" : [vm_log_model,vm_rf_model]\n", " },\n", ").log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -792,14 +791,14 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Store the feature importance tests\n", "FI = vm.tests.list_tests(tags=[\"feature_importance\"], task=\"classification\",pretty=False)\n", "FI" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -810,7 +809,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Run and log our feature importance tests for both models for the testing dataset\n", "for test in FI:\n", @@ -820,9 +821,7 @@ " \"dataset\": [vm_test_ds], \"model\" : [vm_log_model,vm_rf_model]\n", " },\n", " ).log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", diff --git a/notebooks/tutorials/validation/4-finalize_validation_reporting.ipynb b/notebooks/tutorials/validation/4-finalize_validation_reporting.ipynb index 768c569b2..32d46c6e2 100644 --- a/notebooks/tutorials/validation/4-finalize_validation_reporting.ipynb +++ b/notebooks/tutorials/validation/4-finalize_validation_reporting.ipynb @@ -121,7 +121,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Make sure the ValidMind Library is installed\n", "\n", @@ -143,9 +145,7 @@ " # model=\"...\",\n", " document=\"validation-report\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -160,7 +160,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Load the sample dataset\n", "from validmind.datasets.classification import customer_churn as demo_dataset\n", @@ -170,13 +172,13 @@ ")\n", "\n", "raw_df = demo_dataset.load_data()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Initialize the raw dataset for use in ValidMind tests\n", "vm_raw_dataset = vm.init_dataset(\n", @@ -184,13 +186,13 @@ " input_id=\"raw_dataset\",\n", " target_column=\"Exited\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "import pandas as pd\n", "\n", @@ -202,9 +204,7 @@ "\n", "balanced_raw_df = pd.concat([exited_df, not_exited_df])\n", "balanced_raw_df = balanced_raw_df.sample(frac=1, random_state=42)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -215,7 +215,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Register new data and now 'balanced_raw_dataset' is the new dataset object of interest\n", "vm_balanced_raw_dataset = vm.init_dataset(\n", @@ -223,13 +225,13 @@ " input_id=\"balanced_raw_dataset\",\n", " target_column=\"Exited\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Run HighPearsonCorrelation test with our balanced dataset as input and return a result object\n", "corr_result = vm.tests.run_test(\n", @@ -237,46 +239,46 @@ " params={\"max_threshold\": 0.3},\n", " inputs={\"dataset\": vm_balanced_raw_dataset},\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# From result object, extract table from `corr_result.tables`\n", "features_df = corr_result.tables[0].data\n", "features_df" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Extract list of features that failed the test\n", "high_correlation_features = features_df[features_df[\"Pass/Fail\"] == \"Fail\"][\"Columns\"].tolist()\n", "high_correlation_features" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Extract feature names from the list of strings\n", "high_correlation_features = [feature.split(\",\")[0].strip(\"()\") for feature in high_correlation_features]\n", "high_correlation_features" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Remove the highly correlated features from the dataset\n", "balanced_raw_no_age_df = balanced_raw_df.drop(columns=high_correlation_features)\n", @@ -287,13 +289,13 @@ " input_id=\"raw_dataset_preprocessed\",\n", " target_column=\"Exited\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Re-run the test with the reduced feature set\n", "corr_result = vm.tests.run_test(\n", @@ -301,9 +303,7 @@ " params={\"max_threshold\": 0.3},\n", " inputs={\"dataset\": vm_raw_dataset_preprocessed},\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -318,20 +318,22 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Encode categorical features in the dataset\n", "balanced_raw_no_age_df = pd.get_dummies(\n", " balanced_raw_no_age_df, columns=[\"Geography\", \"Gender\"], drop_first=True\n", ")\n", "balanced_raw_no_age_df.head()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", @@ -342,13 +344,13 @@ "y_train = train_df[\"Exited\"]\n", "X_test = test_df.drop(\"Exited\", axis=1)\n", "y_test = test_df[\"Exited\"]" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Initialize the split datasets\n", "vm_train_ds = vm.init_dataset(\n", @@ -362,9 +364,7 @@ " dataset=test_df,\n", " target_column=\"Exited\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -379,16 +379,16 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Import the champion model\n", "import pickle as pkl\n", "\n", "with open(\"lr_model_champion.pkl\", \"rb\") as f:\n", " log_reg = pkl.load(f)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -403,7 +403,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Import the Random Forest Classification model\n", "from sklearn.ensemble import RandomForestClassifier\n", @@ -416,9 +418,7 @@ "\n", "# Train the model\n", "rf_model.fit(X_train, y_train)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -433,7 +433,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Initialize the champion logistic regression model\n", "vm_log_model = vm.init_model(\n", @@ -446,13 +448,13 @@ " rf_model,\n", " input_id=\"rf_model\",\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Assign predictions to Champion — Logistic regression model\n", "vm_train_ds.assign_predictions(model=vm_log_model)\n", @@ -461,9 +463,7 @@ "# Assign predictions to Challenger — Random forest classification model\n", "vm_train_ds.assign_predictions(model=vm_rf_model)\n", "vm_test_ds.assign_predictions(model=vm_rf_model)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -509,7 +509,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "from sklearn import metrics\n", @@ -523,9 +525,7 @@ " confusion_matrix=confusion_matrix, display_labels=[False, True]\n", ")\n", "cm_display.plot()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -544,7 +544,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "@vm.test(\"my_custom_tests.ConfusionMatrix\")\n", "def confusion_matrix(dataset, model):\n", @@ -572,9 +574,7 @@ " plt.close() # close the plot to avoid displaying it\n", "\n", " return cm_display.figure_ # return the figure object itself" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -585,7 +585,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Champion train and test\n", "vm.tests.run_test(\n", @@ -595,13 +597,13 @@ " \"model\" : [vm_log_model]\n", " }\n", ").log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Challenger train and test\n", "vm.tests.run_test(\n", @@ -611,9 +613,7 @@ " \"model\" : [vm_rf_model]\n", " }\n", ").log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -637,7 +637,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "@vm.test(\"my_custom_tests.ConfusionMatrix\")\n", "def confusion_matrix(dataset, model, normalize=False):\n", @@ -668,9 +670,7 @@ " plt.close() # close the plot to avoid displaying it\n", "\n", " return cm_display.figure_ # return the figure object itself" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -690,7 +690,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Champion with test dataset and normalize=True\n", "vm.tests.run_test(\n", @@ -701,13 +703,13 @@ " },\n", " params={\"normalize\": True}\n", ").log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Challenger with test dataset and normalize=True\n", "vm.tests.run_test(\n", @@ -718,9 +720,7 @@ " },\n", " params={\"normalize\": True}\n", ").log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -756,7 +756,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "tests_folder = \"my_tests\"\n", "\n", @@ -770,9 +772,7 @@ " # remove files and pycache\n", " if f.endswith(\".py\") or f == \"__pycache__\":\n", " os.system(f\"rm -rf {tests_folder}/{f}\")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -809,16 +809,16 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "confusion_matrix.save(\n", " # Save it to the custom tests folder we created\n", " tests_folder,\n", " imports=[\"import matplotlib.pyplot as plt\", \"from sklearn import metrics\"],\n", ")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -873,7 +873,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "from validmind.tests import LocalTestProvider\n", "\n", @@ -886,9 +888,7 @@ ")\n", "# `my_test_provider.load_test()` will be called for any test ID that starts with `my_test_provider`\n", "# e.g. `my_test_provider.ConfusionMatrix` will look for a function named `ConfusionMatrix` in `my_tests/ConfusionMatrix.py` file" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -906,7 +906,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Champion with test dataset and test provider custom test\n", "vm.tests.run_test(\n", @@ -916,13 +918,13 @@ " \"model\" : [vm_log_model]\n", " }\n", ").log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Challenger with test dataset and test provider custom test\n", "vm.tests.run_test(\n", @@ -932,9 +934,7 @@ " \"model\" : [vm_rf_model]\n", " }\n", ").log()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -951,7 +951,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "test_config = {\n", " # Run with the raw dataset\n", @@ -1061,9 +1063,7 @@ " 'params': {'min_threshold': 0.5}\n", " }\n", "}" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1074,7 +1074,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "for t in test_config:\n", " print(t)\n", @@ -1094,9 +1096,7 @@ " vm.tests.run_test(t, inputs=test_config[t]['inputs']).log()\n", " except Exception as e:\n", " print(f\"Error running test {t}: {str(e)}\")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1141,7 +1141,7 @@ "\n", "Now that you've logged all your test results and verified the work done by the development team, head to the ValidMind Platform to wrap up your validation report. Continue to work on your validation report by:\n", "\n", - "- **Inserting additional test results:** Click **Link Evidence to Report** under any section of 2. Validation in your validation report. (Learn more: [Link evidence to reports](https://docs.validmind.ai/guide/validation/assess-compliance.html#link-evidence-to-reports))\n", + "- **Inserting additional test results:** Click **Link Evidence** under any Evidence panel of 2. Validation in your validation report. (Learn more: [Link evidence to reports](https://docs.validmind.ai/guide/validation/assess-compliance.html#link-evidence-to-reports))\n", "\n", "- **Making qualitative edits to your test descriptions:** Expand any linked evidence under Validator Evidence and click **See evidence details** to review and edit the ValidMind-generated test descriptions for quality and accuracy. (Learn more: [Preparing validation reports](https://docs.validmind.ai/guide/validation/preparing-validation-reports.html#validation-overview))\n", "\n", @@ -1149,7 +1149,7 @@ "\n", "- **Adding risk assessment notes:** Click under **Risk Assessment Notes** in any validation report section to access the text editor and content editing toolbar, including an option to generate a draft with AI. Once generated, edit your ValidMind-generated test descriptions to adhere to your organization's requirements. (Learn more: [Work with content blocks](https://docs.validmind.ai/guide/documentation/work-with-content-blocks.html#content-editing-toolbar))\n", "\n", - "- **Assessing compliance:** Under the Guideline for any validation report section, click **ASSESSMENT** and select the compliance status from the drop-down menu. (Learn more: [Provide compliance assessments](https://docs.validmind.ai/guide/validation/assess-compliance.html#provide-compliance-assessments))\n", + "- **Assessing compliance:** Under the Guideline for any validation report section, click **ASSESSMENT** and select the compliance status from the drop-down menu. (Learn more: [Assign compliance assessments](https://docs.validmind.ai/guide/validation/assess-compliance.html#assign-compliance-assessments))\n", "\n", "- **Collaborate with other stakeholders:** Use the ValidMind Platform's real-time collaborative features to work seamlessly together with the rest of your organization, including developers. Propose suggested changes in the documentation, work with versioned history, and use comments to discuss specific portions of the documentation. (Learn more: [Collaborate with others](https://docs.validmind.ai/guide/documentation/collaborate-with-others.html))\n", "\n", diff --git a/notebooks/tutorials/validation/inserted-class-imbalance-results.png b/notebooks/tutorials/validation/inserted-class-imbalance-results.png deleted file mode 100644 index 2efea1a09..000000000 Binary files a/notebooks/tutorials/validation/inserted-class-imbalance-results.png and /dev/null differ diff --git a/notebooks/tutorials/validation/link-artifact.png b/notebooks/tutorials/validation/link-artifact.png deleted file mode 100644 index 23edc1c9e..000000000 Binary files a/notebooks/tutorials/validation/link-artifact.png and /dev/null differ diff --git a/notebooks/tutorials/validation/link-validator-evidence.png b/notebooks/tutorials/validation/link-validator-evidence.png deleted file mode 100644 index 660a19cfa..000000000 Binary files a/notebooks/tutorials/validation/link-validator-evidence.png and /dev/null differ