# Deposits Forecast Multiple Models using PyMC and Random Forest

1. Data preparation
   - Load data
   - Create VM datasets
   - Run data validation tests
2. Model development
   - Fit seasonality and random forest model
   - Create VM datasets and models 
   - Assign predictions
   - Run model validation tests

# Pre-requisites

Let's go ahead and install the `validmind` library if its not already installed.

In [None]:
%pip install -q validmind

## Initialize the client library

ValidMind generates a unique _code snippet_ for each registered model to connect with your developer environment. You initialize the client library with this code snippet, which ensures that your documentation and tests are uploaded to the correct model when you run the notebook.

Get your code snippet:

1. In a browser, log into the [Platform UI](https://app.prod.validmind.ai).

2. In the left sidebar, navigate to **Model Inventory** and click **+ Register new model**.

3. Enter the model details, making sure to select **Time Series Forecasting** as the template and **Credit Risk - Underwriting - Loan** as the use case, and click **Continue**. ([Need more help?](https://docs.validmind.ai/guide/register-models-in-model-inventory.html))

4. Go to **Getting Started** and click **Copy snippet to clipboard**.

Next, replace this placeholder with your own code snippet:

In [None]:
import validmind as vm

vm.init(
  api_host = "https://api.prod.validmind.ai/api/v1/tracking",
  api_key = "...",
  api_secret = "...",
  project = "..."
)

## External test provider

In [None]:
from validmind.tests import LocalTestProvider

tests_folder = "tests"
# initialize the test provider with the tests folder we created earlier
my_test_provider = LocalTestProvider(tests_folder)

vm.tests.register_test_provider(
    namespace="deposits_test_provider",
    test_provider=my_test_provider,
)

# 1. Data preparation

## Load data

In [None]:
from validmind.datasets.regression import fred_multiple_deposits as demo

deposits_1, deposits_2, deposits_3, deposits_4, deposits_5, fed_funds, tb3ms, gs10, gs30, = demo.load_data()

target_column = "DPSACBW027NBOG"

# Create raw dataframe for model 1
raw_1_df = deposits_1.copy()
raw_1_df["FEDFUNDS"] = fed_funds["FEDFUNDS"]
raw_1_df["TB3MS"] = tb3ms["TB3MS"]
raw_1_df["GS10"] = gs10["GS10"]
raw_1_df["GS30"] = gs30["GS30"]

# Create raw dataframe for model 2
raw_2_df = deposits_2.copy()
raw_2_df["FEDFUNDS"] = fed_funds["FEDFUNDS"]
raw_2_df["TB3MS"] = tb3ms["TB3MS"]
raw_2_df["GS10"] = gs10["GS10"]
raw_2_df["GS30"] = gs30["GS30"]

# Create raw dataframe for model 3
raw_3_df = deposits_3.copy()
raw_3_df["FEDFUNDS"] = fed_funds["FEDFUNDS"]
raw_3_df["TB3MS"] = tb3ms["TB3MS"]
raw_3_df["GS10"] = gs10["GS10"]
raw_3_df["GS30"] = gs30["GS30"]

# Create raw dataframe for model 4
raw_4_df = deposits_4.copy()
raw_4_df["FEDFUNDS"] = fed_funds["FEDFUNDS"]
raw_4_df["TB3MS"] = tb3ms["TB3MS"]
raw_4_df["GS10"] = gs10["GS10"]
raw_4_df["GS30"] = gs30["GS30"]

# Create raw dataframe for model 5
raw_5_df = deposits_5.copy()
raw_5_df["FEDFUNDS"] = fed_funds["FEDFUNDS"]
raw_5_df["TB3MS"] = tb3ms["TB3MS"]
raw_5_df["GS10"] = gs10["GS10"]
raw_5_df["GS30"] = gs30["GS30"]

## Create VM datasets

In [None]:
vm_raw_1_ds = vm.init_dataset(
    dataset=raw_1_df,
    input_id="raw_1_ds",
    target_column=target_column,
)

vm_raw_2_ds = vm.init_dataset(
    dataset=raw_2_df,
    input_id="raw_2_ds",
    target_column=target_column,
)

vm_raw_3_ds = vm.init_dataset(
    dataset=raw_3_df,
    input_id="raw_3_ds",
    target_column=target_column,
)

vm_raw_4_ds = vm.init_dataset(
    dataset=raw_4_df,
    input_id="raw_4_ds",
    target_column=target_column,
)

vm_raw_5_ds = vm.init_dataset(
    dataset=raw_5_df,
    input_id="raw_5_ds",
    target_column=target_column,
)

## Run data validation tests

In [None]:
datasets = [vm_raw_1_ds, vm_raw_2_ds, vm_raw_3_ds, vm_raw_4_ds, vm_raw_5_ds]

### Time Series Datasets Description

In [None]:
test = vm.tests.run_test(
    f"deposits_test_provider.TimeSeriesDatasetsDescription",
    inputs={"datasets": datasets}
)
test.log()

### Time Series Target Variable Description

In [None]:
test = vm.tests.run_test(
    f"deposits_test_provider.TimeSeriesTargetVariableDescription",
    inputs={"datasets": datasets}
)
test.log()

### Time Series Line Plots

In [None]:
run=True
if run:

    for i, dataset in enumerate(datasets, start=1):
        test = vm.tests.run_test(
            f"validmind.data_validation.TimeSeriesLinePlot:raw_dataset_{i}",
            inputs={"dataset": dataset}
        )
        test.log()

### Time Series Frequency

In [None]:
run=True
if run:

    for i, dataset in enumerate(datasets, start=1):
        test = vm.tests.run_test(
            f"validmind.data_validation.TimeSeriesFrequency:raw_dataset_{i}",
            inputs={"dataset": dataset}
        )
        test.log()

### Seasonal Decompose

In [None]:
run=True
if run:

    for i, dataset in enumerate(datasets, start=1):
        test = vm.tests.run_test(
            f"validmind.data_validation.SeasonalDecompose:raw_dataset_{i}",
            inputs={"dataset": dataset}
        )
        test.log()

# 2. Model development

## Fit seasonality and random forest models

In [None]:
# Fit models for dataset 1
prior_likelihood_1, prior_seasonality_1, posterior_likelihood_1, posterior_seasonality_1 = demo.fit_pymc_seasonality_model(raw_1_df, target_column, n_order=10)
model_1, train_1_df, test_1_df = demo.process_and_train_random_forest(raw_1_df, posterior_seasonality_1, target_column)

In [None]:
# Fit models for dataset 2
prior_likelihood_2, prior_seasonality_2, posterior_likelihood_2, posterior_seasonality_2 = demo.fit_pymc_seasonality_model(raw_2_df, target_column, n_order=10)
model_2, train_2_df, test_2_df = demo.process_and_train_random_forest(raw_2_df, posterior_seasonality_2, target_column)

In [None]:
# Fit models for dataset 3
prior_likelihood_3, prior_seasonality_3, posterior_likelihood_3, posterior_seasonality_3 = demo.fit_pymc_seasonality_model(raw_3_df, target_column, n_order=10)
model_3, train_3_df, test_3_df = demo.process_and_train_random_forest(raw_3_df, posterior_seasonality_3, target_column)

In [None]:
# Fit models for dataset 4
prior_likelihood_4, prior_seasonality_4, posterior_likelihood_4, posterior_seasonality_4 = demo.fit_pymc_seasonality_model(raw_4_df, target_column, n_order=10)
model_4, train_4_df, test_4_df = demo.process_and_train_random_forest(raw_4_df, posterior_seasonality_4, target_column)

In [None]:
# Fit models for dataset 5
prior_likelihood_5, prior_seasonality_5, posterior_likelihood_5, posterior_seasonality_5 = demo.fit_pymc_seasonality_model(raw_5_df, target_column, n_order=10)
model_5, train_5_df, test_5_df = demo.process_and_train_random_forest(raw_5_df, posterior_seasonality_5, target_column)

## Create VM datasets and models

In [None]:
vm_train_1_ds = vm.init_dataset(dataset=train_1_df, input_id="train_1_ds", target_column=target_column)
vm_test_1_ds = vm.init_dataset(dataset=test_1_df, input_id="test_1_ds", target_column=target_column)
vm_model_1 = vm.init_model(model_1, input_id="random_forest_model_1")

vm_train_2_ds = vm.init_dataset(dataset=train_2_df, input_id="train_2_ds", target_column=target_column)
vm_test_2_ds = vm.init_dataset(dataset=test_2_df, input_id="test_2_ds", target_column=target_column)
vm_model_2 = vm.init_model(model_2, input_id="random_forest_model_2")

vm_train_3_ds = vm.init_dataset(dataset=train_3_df, input_id="train_3_ds", target_column=target_column)
vm_test_3_ds = vm.init_dataset(dataset=test_3_df, input_id="test_3_ds", target_column=target_column)
vm_model_3 = vm.init_model(model_3, input_id="random_forest_model_3")

vm_train_4_ds = vm.init_dataset(dataset=train_4_df, input_id="train_4_ds", target_column=target_column)
vm_test_4_ds = vm.init_dataset(dataset=test_4_df, input_id="test_4_ds", target_column=target_column)
vm_model_4 = vm.init_model(model_4, input_id="random_forest_model_4")

vm_train_5_ds = vm.init_dataset(dataset=train_5_df, input_id="train_5_ds", target_column=target_column)
vm_test_5_ds = vm.init_dataset(dataset=test_5_df, input_id="test_5_ds", target_column=target_column)
vm_model_5 = vm.init_model(model_5, input_id="random_forest_model_5")

## Assign predictions

In [None]:
vm_train_1_ds.assign_predictions(model=vm_model_1)
vm_test_1_ds.assign_predictions(model=vm_model_1)

vm_train_2_ds.assign_predictions(model=vm_model_2)
vm_test_2_ds.assign_predictions(model=vm_model_2)

vm_train_3_ds.assign_predictions(model=vm_model_3)
vm_test_3_ds.assign_predictions(model=vm_model_3)

vm_train_4_ds.assign_predictions(model=vm_model_4)
vm_test_4_ds.assign_predictions(model=vm_model_4)

vm_train_5_ds.assign_predictions(model=vm_model_5)
vm_test_5_ds.assign_predictions(model=vm_model_5)

## Run model validation tests

In [None]:
models = [vm_model_1, vm_model_2, vm_model_3, vm_model_4, vm_model_5]
raw_datasets = [vm_raw_1_ds, vm_raw_2_ds, vm_raw_3_ds, vm_raw_4_ds, vm_raw_5_ds]
train_datasets = [vm_train_1_ds, vm_train_2_ds, vm_train_3_ds, vm_train_4_ds, vm_train_5_ds]
test_datasets = [vm_test_1_ds, vm_test_2_ds, vm_test_3_ds, vm_test_4_ds, vm_test_5_ds]

prior_likelihoods = [prior_likelihood_1, prior_likelihood_2, prior_likelihood_3, prior_likelihood_4, prior_likelihood_5]
prior_seasonalities = [prior_seasonality_1, prior_seasonality_2, prior_seasonality_3, prior_seasonality_4, prior_seasonality_5]
posterior_likelihoods = [posterior_likelihood_1, posterior_likelihood_2, posterior_likelihood_3, posterior_likelihood_4, posterior_likelihood_5]
posterior_seasonalities = [posterior_seasonality_1, posterior_seasonality_2, posterior_seasonality_3, posterior_seasonality_4, posterior_seasonality_5]

### PyMC Seasonality Prior Plots

In [None]:
for i, (dataset, prior_likelihood) in enumerate(zip(raw_datasets, prior_likelihoods), start=1):
    result = vm.tests.run_test(
        f"deposits_test_provider.PyMCPlot:Prior_Likelihood_{i}",
        inputs={"dataset": dataset},
        params={"pymc_output": prior_likelihood, "title": "Prior Predictive Seasonality"},
    ).log()

In [None]:
for i, (dataset, prior_seasonality) in enumerate(zip(raw_datasets, prior_seasonalities), start=1):
    result = vm.tests.run_test(
        f"deposits_test_provider.PyMCSeasonalityPlot:Prior_Seasonality_{i}",
        inputs={"dataset": dataset},
        params={"seasonality": prior_seasonality, "title": "Prior Seasonality Lines"},
    ).log()

### PyMC Seasonality Posterior Plots

In [None]:
for i, (dataset, posterior_likelihood) in enumerate(zip(raw_datasets, posterior_likelihoods), start=1):
    result = vm.tests.run_test(
        f"deposits_test_provider.PyMCPlot:Posterior_Likelihood_{i}",
        inputs={"dataset": dataset},
        params={"pymc_output": posterior_likelihood, "title": "Posterior Predictive Seasonality",
        },
    ).log()

In [None]:
for i, (dataset, posterior_seasonality) in enumerate(zip(raw_datasets, posterior_seasonalities), start=1):
    result = vm.tests.run_test(
        f"deposits_test_provider.PyMCSeasonalityPlot:Posterior_Seasonality_{i}",
        inputs={"dataset": dataset},
        params={"seasonality": posterior_seasonality, "title": "Posterior Seasonality Lines"},
    ).log()

### Random Forest Model Metadata

In [None]:
test = vm.tests.run_test(
    "deposits_test_provider.ModelMetadataComparison",
    inputs={"models": models}
)
test.log()

In [None]:
for i, model in enumerate(models, start=1):
    test = vm.tests.run_test(
        f"validmind.model_validation.ModelMetadata:model_{i}",
        inputs={"model": model}
    )
    test.log()

### Dataset Split

In [None]:
for i, (train_dataset, test_dataset) in enumerate(zip(train_datasets, test_datasets), start=1):
    test = vm.tests.run_test(
        f"validmind.data_validation.DatasetSplit:dataset_{i}",
        inputs={"datasets": [train_dataset, test_dataset]}
    )
    test.log()

### Regression Errors

In [None]:
test = vm.tests.run_test(
    "deposits_test_provider.RegressionErrorsComparison:train_datasets",
    inputs={"datasets": train_datasets, "models": models}
)
test.log()

In [None]:
test = vm.tests.run_test(
    "deposits_test_provider.RegressionErrorsComparison:test_datasets",
    inputs={"datasets": test_datasets, "models": models}
)
test.log()

In [None]:
for i, (train_dataset, test_dataset, model) in enumerate(zip(train_datasets, test_datasets, models), start=1):
    test = vm.tests.run_test(
        f"validmind.model_validation.sklearn.RegressionErrors:model_{i}",
        inputs={"datasets": [train_dataset, test_dataset], "model": model}
    )
    test.log()

### Regression R2 Square

In [None]:
test = vm.tests.run_test(
    "deposits_test_provider.RegressionR2SquareComparison:train_datasets",
    inputs={"datasets": train_datasets, "models": models}
)
test.log()

In [None]:
test = vm.tests.run_test(
    "deposits_test_provider.RegressionR2SquareComparison:test_datasets",
    inputs={"datasets": test_datasets, "models": models}
)
test.log()

In [None]:
for i, (train_dataset, test_dataset, model) in enumerate(zip(train_datasets, test_datasets, models), start=1):
    test = vm.tests.run_test(
        f"validmind.model_validation.sklearn.RegressionR2Square:model_{i}",
        inputs={"datasets": [train_dataset, test_dataset], "model": model}
    )
    test.log()

### Regression Residuals Plot

In [None]:
for i, (train_dataset, model) in enumerate(zip(train_datasets, models), start=1):
    test = vm.tests.run_test(
        f"validmind.model_validation.RegressionResidualsPlot:model_{i}",
        inputs={"dataset": train_dataset, "model": model}
    )
    test.log()

### Permutation Feature Importance

In [None]:
test = vm.tests.run_test(
    "deposits_test_provider.FeatureImportanceComparison:train_datasets",
    inputs={"datasets": train_datasets, "models": models}
)
test.log()

In [None]:
test = vm.tests.run_test(
    "deposits_test_provider.FeatureImportanceComparison:test_datasets",
    inputs={"datasets": test_datasets, "models": models}
)
test.log()

In [None]:
for i, (train_dataset, model) in enumerate(zip(train_datasets, models), start=1):
    test = vm.tests.run_test(
        f"validmind.model_validation.sklearn.PermutationFeatureImportance:train_dataset_{i}",
        inputs={"dataset": train_dataset, "model": model}
    )
    test.log()