In [None]:
import pandas as pd

from vulkan_public.beam.local.runner import PolicyRunner
from vulkan_public.core.policy import Policy
from vulkan_public.schemas import DataSourceSpec
from vulkan_public.spec.dependency import INPUT_NODE

## Preparations

We'll create a Parquet file with our input data, and a second file to act as a "Data Source".

Data Sources bring external data into your workflow. 
This can be done by consulting a bureau, or by having some test data, like in our case here.

In [None]:
df = pd.read_csv("../data/simple_bkt_lg.csv")
df["month"] = df["month"].astype(str)
df["tax_id"] = df["tax_id"].astype(str)

df.to_parquet("input.parquet")

lookup_df = df[["tax_id", "score"]]
lookup_df.to_parquet("file_data_source.parquet")

## Define the Policy

This is all the code used to define the policy. \
In fact, in `docs/examples/policies/local/test_policy/policy.py` 
we use the exact same code to create our "packaged" version, 
which we'll use later for remote execution.

There are a few key parts here:
- `DataInputNode`: These nodes are used to bring data into your decision flows. Here, we'll use a local file, but this can later be replaced with an API or database without having to change the flow
- `branch_condition` and `BranchNode`: "Branches" are how we make decisions in our policies. At a branch, you can have any number of possible outputs. In our case here, we write a function that returns "approved" if the score is greater than a cutoff.
- `TerminateNode`: Terminate nodes are how we represent the final step in a policy, or the final decision. The `return_status` value is the final decision made. Here, we either approve or deny someone. We'll later see how this can be used to pass information to other systems.

In [None]:
from enum import Enum

from vulkan_public.spec.dependency import INPUT_NODE, Dependency
from vulkan_public.spec.nodes import BranchNode, DataInputNode, TerminateNode
from vulkan_public.spec.policy import PolicyDefinition

sample_file_input = DataInputNode(
    name="My Model Data Source",
    description="Query My Model using REST API",
    source="file-input:my-api-name:v0.0.1",
    dependencies={"inputs": Dependency(INPUT_NODE)},
)


# Branching node
def branch_condition(context, scores, **kwargs):
    context.log.info(scores)
    if scores["score"] > context.env.get("SCORE_CUTOFF"):
        return "approved"
    return "denied"


branch = BranchNode(
    func=branch_condition,
    name="branch",
    description="BranchNode data",
    dependencies={
        "scores": Dependency(sample_file_input.name),
    },
    outputs=["approved", "denied"],
)

approved = TerminateNode(
    name="approved",
    description="TerminateNode data branch",
    return_status="APPROVED",
    dependencies={"condition": Dependency("branch", "approved")},
)

denied = TerminateNode(
    name="denied",
    description="TerminateNode data branch",
    return_status="DENIED",
    dependencies={"condition": Dependency("branch", "denied")},
)

demo_policy = PolicyDefinition(
    nodes=[
        sample_file_input,
        branch,
        approved,
        denied,
    ],
    components=[],
    config_variables=["SCORE_CUTOFF"],
    input_schema={"tax_id": str},
)

In [None]:
policy = Policy.from_definition(demo_policy)

## Run the Policy Locally 

The entire policy can be visualized and the run locally.

To do that, we'll just have to do two things:
1. Create a schema, telling Vulkan where to get data for the data sources;
2. Set a value for our score "cutoff": the minimum score to be Approved;

In [None]:
test_file_schema = {
    "name": "file-input:my-api-name:v0.0.1",
    "keys": ["tax_id"],
    "source": {
        "path": "file_data_source.parquet",
    },
    "caching": {
        "enabled": False,
    },
}

data_sources = [
    DataSourceSpec.model_validate(test_file_schema),
]

In [None]:
runner = PolicyRunner(policy, staging_path="./output/")

### Visualizing the flow of information

We can visualize our policy locally, at all times. \
This can show us how the clients are being treated and where we're making each decision.

In [None]:
runner.graph()

### Running

Now we're ready to run our policy.

Let's start with a single example:

In [None]:
lookup_df.head()

In [None]:
config_variables = {"SCORE_CUTOFF": 500}

result = runner.run(
    input_data={"tax_id": "1"},
    data_sources=data_sources,
    config_variables=config_variables,
)

print("Here are our results:\n")
result.data

### Running for a bunch of data

We can run for 1 example, or for a bunch, just as easily. \
To run for a batch of data, we just need to pass the input data with a file. \
Let's pass in the input file we created at the beginning.

In [None]:
%%time
batch_results = runner.run_batch(
    input_data_path="input.parquet",
    data_sources=data_sources,
    config_variables=config_variables,
)

In [None]:
batch_results.data