In [None]:
%load_ext autotime
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd

from vulkan_public.beam.local.runner import PolicyRunner
from vulkan_public.core.policy import Policy
from vulkan_public.schemas import DataSourceSpec
from vulkan_public.spec.dependency import INPUT_NODE

In [None]:
import logging

logger = logging.getLogger()
logger.setLevel("ERROR")

## Preparations

We'll create a Parquet file with our input data.

In [None]:
df = pd.read_csv("../data/simple_bkt_lg.csv")
df["month"] = df["month"].astype(str)
df["tax_id"] = df["tax_id"].astype(str)

df.to_parquet("input.parquet")

## Define the sub-Policy

In [None]:
from enum import Enum

from vulkan_public.spec.dependency import INPUT_NODE, Dependency
from vulkan_public.spec.nodes import BranchNode, TerminateNode
from vulkan_public.spec.policy import PolicyDefinition


class Status(Enum):
    APPROVED = "APPROVED"
    DENIED = "DENIED"


approved = TerminateNode(
    name="approved",
    description="TerminateNode data branch",
    return_status=Status.APPROVED,
    dependencies={"condition": Dependency("branch_1", "approved")},
)


denied = TerminateNode(
    name="denied",
    description="TerminateNode data branch",
    return_status=Status.DENIED,
    dependencies={"condition": Dependency("branch_1", "denied")},
)


# Branching node
def branch_condition_1(context, scores, **kwargs):
    context.log.info(f"BranchNode data: {scores}")
    if scores["score"] > context.env.get("SCORE_CUTOFF", 500):
        return "approved"
    return "denied"


branch_1 = BranchNode(
    func=branch_condition_1,
    name="branch_1",
    description="BranchNode data",
    dependencies={"scores": Dependency(INPUT_NODE)},
    choices=["approved", "denied"],
)

subpolicy = PolicyDefinition(
    nodes=[
        branch_1,
        approved,
        denied,
    ],
    config_variables=["SCORE_CUTOFF"],
    input_schema={"tax_id": str, "score": int},
)
subpolicy.show()

In [None]:
runner = PolicyRunner(subpolicy, staging_path="./output/")
config_variables = {"SCORE_CUTOFF": 650}

result = runner.run(
    input_data={"tax_id": "1", "score": 651},
    config_variables=config_variables,
)

print("Here are our results:\n")
result.data

## Define the Main Policy

In [None]:
from vulkan_public.spec.policy import PolicyDefinitionNode

subpolicy_node = PolicyDefinitionNode(
    name="subpolicy",
    dependencies={"input_data": Dependency(INPUT_NODE)},
    policy_definition=subpolicy,
)


def main_branch(context, subpolicy_decision, **kwargs):
    context.log.info(
        f"Main Branch Data: {subpolicy_decision} ({type(subpolicy_decision)})"
    )
    if subpolicy_decision["status"] == Status.APPROVED.value:
        return "denied"
    return "approved"


branch_main = BranchNode(
    func=main_branch,
    name="branch_main",
    dependencies={"subpolicy_decision": Dependency(subpolicy_node.name)},
    choices=["approved", "denied"],
)

approved_main = TerminateNode(
    name="approved_main",
    return_status=Status.APPROVED,
    dependencies={"condition": Dependency("branch_main", "approved")},
)


denied_main = TerminateNode(
    name="denied_main",
    return_status=Status.DENIED,
    dependencies={"condition": Dependency("branch_main", "denied")},
)


policy_def = PolicyDefinition(
    nodes=[
        subpolicy_node,
        branch_main,
        approved_main,
        denied_main,
    ],
    config_variables=["SCORE_CUTOFF"],
    input_schema={"tax_id": str, "score": int},
)
policy_def.show()

In [None]:
runner = PolicyRunner(
    PolicyDefinitionNode.from_dict(subpolicy_node.to_dict()).policy_definition,
    staging_path="./output/",
)

config_variables = {"SCORE_CUTOFF": 650}

result = runner.run(
    input_data={"tax_id": "1", "score": 651},
    config_variables=config_variables,
)

## Run the Policy and SubPolicy Locally

In [None]:
runner = PolicyRunner(policy_def, staging_path="./output/")
config_variables = {"SCORE_CUTOFF": 650}

result = runner.run(
    input_data={"tax_id": "1", "score": 651},
    config_variables=config_variables,
)

print("Here are our results:\n")
result.data

In [None]:
p = Policy.from_definition(policy_def)
p.show()

### Running for a bunch of data

We can run for 1 example, or for a bunch, just as easily.
To run for a batch of data, we just need to pass the input data with a file.
Let's pass in the input file we created at the beginning.

In [None]:
%%time
batch_results = runner.run_batch(
    input_data_path="input.parquet",
    # data_sources=data_sources,
    # config_variables=config_variables,
)

batch_results.data