In [2]:
# setup pf client and execution path

from promptflow import PFClient
import json
import os

pf = PFClient()

root = os.path.join(os.getcwd(), "../")
flow = os.path.join(root, "maths-to-code")
data = os.path.join(flow, "math_data.jsonl")
eval_flow = os.path.join(root, "../evaluation/eval-accuracy-maths-to-code")

In [None]:
# start batch run of maths-to-code
base_run = pf.run(
    flow = flow, 
    data = data, 
    column_mapping={"math_question": "${data.question}"},
    display_name="maths_to_code_batch_run",
    stream=True
)

In [4]:
# Show output of flow run
pf.get_details(base_run)

Unnamed: 0,inputs.math_question,inputs.line_number,outputs.answer,outputs.code
0,What is the sum of 5 and 3?,0,8.0,print(5 + 3)
1,Subtract 7 from 10.,1,3.0,print(10 - 7)
2,Multiply 6 by 4.,2,24.0,print(6 * 4)
3,Divide 20 by 5.,3,4.0,print(20 / 5)
4,What is the square of 7?,4,49.0,print(7**2)
5,What is the square root of 81?,5,9.0,import math\nprint(math.sqrt(81))
6,If a rectangle has a length of 10 and width of...,6,50.0,length = 10\nwidth = 5\narea = length * width\...
7,"A circle has a radius of 7, what is the area? ...",7,153.86,area = 3.14 * (7**2)\nprint(area)
8,Solve for x in the equation 2x + 3 = 9.,8,3.0,print((9-3)/2)
9,What is the value of x if 5x = 25?,9,5.0,print(25/5)


In [None]:
# evaluate against the batch run and groundtruth data
eval_run = pf.run(
    flow = eval_flow, 
    data = data, 
    run = base_run,
    column_mapping={"groundtruth": "${data.answer}", "prediction": "${run.outputs.answer}"},
    display_name="maths_to_code_eval_run",
    stream=True
)

pf.get_details(eval_run)

In [6]:
# Get metrics of the evaluation flow run
pf.get_metrics(eval_run)

{'accuracy': 0.95, 'error_rate': 0.0}

In [None]:
# Visualize the flow run and evaluation run with HTML
pf.visualize([base_run, eval_run])

## Run on Azure
If you want to run and evaluate your flow on Azure, you can using following example to setup your Azure ML workspace 

In [None]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential

# init credential
try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

In [None]:
from promptflow.azure import PFClient

try:
    pf = PFClient.from_config(credential=credential)
except Exception as ex:
    # NOTE: Update following workspace information if not correctly configure before
    client_config = {
        "subscription_id": "<SUBSCRIPTION_ID>",
        "resource_group": "<RESOURCE_GROUP>",
        "workspace_name": "<AML_WORKSPACE_NAME>",
    }

    if client_config["subscription_id"].startswith("<"):
        print(
            "please update your <SUBSCRIPTION_ID> <RESOURCE_GROUP> <AML_WORKSPACE_NAME> in notebook cell"
        )
        raise ex
    else:  # write and reload from config file
        import json, os

        config_path = "../.azureml/config.json"
        os.makedirs(os.path.dirname(config_path), exist_ok=True)
        with open(config_path, "w") as fo:
            fo.write(json.dumps(client_config))
        pf = PFClient.from_config(credential=credential, path=config_path)

print(pf)

# NOTE: replace with your own runtime name in your Azure Machine Learning workspace
runtime = '<runtime_name>'

# NOTE: note that you need to replace <open_ai_connection> and <gpt-35-turbo> with your own connection and deployment name in your Azure Machine Learning workspace
connection_mapping = {"code_gen": {"connection": "<my_azure_open_ai_connection>", "deployment_name": "<gpt-35-turbo>"}}

In [None]:
# batch run of maths to code

base_run = pf.run(
    flow = flow,
    data = data,
    column_mapping = {"math_question": "${data.question}"},
    connections = connection_mapping,
    runtime = runtime,
    stream = True,
)

In [20]:
# get output of flow run
pf.get_details(base_run)

Unnamed: 0,inputs.math_question,outputs.code,outputs.answer
0,Subtract 7 from 10.,print(10 - 7),3
1,What is the sum of 5 and 3?,print(5+3),8
2,What is the square of 7?,print(7**2),49
3,A car travels 200 miles in 4 hours. What is th...,print(200 / 4),50.0
4,Multiply 6 by 4.,print(6 * 4),24
5,What is the value of x if 5x = 25?,print(25/5),5.0
6,If a car travels at a speed of 40 mph for 2 ho...,print(40 * 2),80
7,A car travels at a speed of 60 mph. How long w...,print(180 / 60),3.0
8,A triangle has base =10 cm and height =5 cm. W...,print((10*5)/2),25.0
9,Divide 20 by 5.,print(20 / 5),4.0


In [None]:
# evaluation run against base run

eval_run = pf.run(
    flow = eval_flow, 
    data = data, 
    run = base_run,
    column_mapping={"groundtruth": "${data.answer}", "prediction": "${run.outputs.answer}"},
    runtime = runtime,
    stream = True,
)


In [21]:
# get output of evaluation run
pf.get_details(eval_run)

Unnamed: 0,inputs.groundtruth,inputs.prediction,outputs.score
0,4.0,4.0,1
1,3.0,3,1
2,24.0,24,1
3,49.0,49,1
4,5.0,5.0,1
5,50.0,50,1
6,8.0,8,1
7,3.0,3.0,1
8,43.96,43.96,1
9,3.0,3.0,1


In [22]:
metrics = pf.get_metrics(eval_run)
print(json.dumps(metrics, indent=4))

{
    "accuracy": 0.9,
    "error_rate": 0.1
}
