### Setup AWS Credentials

In [8]:
import os
# Setup your AWS Access Key and Secret Key as environment variables.
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_SESSION_TOKEN"] = ""

In [9]:
# Setup Nova Model
NOVA_MODEL_ID = "us.amazon.nova-premier-v1:0"

### Dataset Adapter

Initialize the Dataset Adapter that takes the input_columns and output_columns. We use the CSVDatasetAdapter to read a `.csv` file and adapt it to the standardized format. We also use the adapter to create train and test sets for our use case.

In [10]:
from amzn_nova_prompt_optimizer.core.input_adapters.dataset_adapter import CSVDatasetAdapter

input_columns = {"input"}
output_columns = {"answer"}

dataset_adapter = CSVDatasetAdapter(input_columns, output_columns)

# Adapt
dataset_adapter.adapt("../data/FacilitySupportAnalyzer.csv")

train_set, test_set = dataset_adapter.split(0.5)

### Prompt Adapter

Initialize the Prompt Adapter for the Original Prompt. For this example, we use the FacilitySupportAnalyzer System and User Prompt in the `.txt` format. 

In [None]:
from amzn_nova_prompt_optimizer.core.input_adapters.prompt_adapter import TextPromptAdapter

prompt_variables = input_columns

prompt_adapter = TextPromptAdapter()

prompt_adapter.set_system_prompt(file_path="original_prompt/system_prompt.txt", variables=prompt_variables)
prompt_adapter.set_user_prompt(file_path="original_prompt/user_prompt.txt", variables=prompt_variables)

# Adapt
prompt_adapter.adapt()

### Metric Adapter

Initialize the Metric Adapter for evaluating this prompt for certain optimizers. For this example, we build a Custom Metric for the FacilitySupportAnalyzer Dataset. The metric adapter requires the use of the `apply` [For single row evaluation] or `batch_apply` [For evaluating the whole dataset together] function

In [12]:
from amzn_nova_prompt_optimizer.core.input_adapters.metric_adapter import MetricAdapter
from typing import List, Any, Dict
import re
import json

class FacilitySupportAnalyzerMetric(MetricAdapter):
    def parse_json(self, input_string: str):
        """
        Attempts to parse the given string as JSON. If direct parsing fails,
        it tries to extract a JSON snippet from code blocks formatted as:
            ```json
            ... JSON content ...
            ```
        or any code block delimited by triple backticks and then parses that content. 
        """
        try:
            return json.loads(input_string)
        except json.JSONDecodeError as err:
            error = err

        patterns = [
            re.compile(r"```json\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE),
            re.compile(r"```(.*?)```", re.DOTALL)
        ]

        for pattern in patterns:
            match = pattern.search(input_string)
            if match:
                json_candidate = match.group(1).strip()
                try:
                    return json.loads(json_candidate)
                except json.JSONDecodeError:
                    continue

        raise error

    def _calculate_metrics(self, y_pred: Any, y_true: Any) -> Dict:
        strict_json = False
        result = {
            "is_valid_json": False,
            "correct_categories": 0.0,
            "correct_sentiment": False,
            "correct_urgency": False,
        }

        try:
            y_true = y_true if isinstance(y_true, dict) else (json.loads(y_true) if strict_json else self.parse_json(y_true))
            y_pred = y_pred if isinstance(y_pred, dict) else (json.loads(y_pred) if strict_json else self.parse_json(y_pred))
        except json.JSONDecodeError:
            result["total"] = 0
            return result  # Return result with is_valid_json = False
        else:
            if isinstance(y_pred, str):
                result["total"] = 0
                return result  # Return result with is_valid_json = False
            result["is_valid_json"] = True

            categories_true = y_true.get("categories", {})
            categories_pred = y_pred.get("categories", {})

            if isinstance(categories_true, dict) and isinstance(categories_pred, dict):
                correct = sum(
                    categories_true.get(k, False) == categories_pred.get(k, False)
                    for k in categories_true
                )
                result["correct_categories"] = correct / len(categories_true) if categories_true else 0.0
            else:
                result["correct_categories"] = 0.0  # or raise an error if you prefer

            result["correct_sentiment"] = y_pred.get("sentiment", "") == y_true.get("sentiment", "")
            result["correct_urgency"] = y_pred.get("urgency", "") == y_true.get("urgency", "")

        # Compute overall metric score
        result["total"] = sum(
            float(result[k]) for k in ["correct_categories", "correct_sentiment", "correct_urgency"]
        ) / 3.0

        return result

    def apply(self, y_pred: Any, y_true: Any):
        return self._calculate_metrics(y_pred, y_true)

    def batch_apply(self, y_preds: List[Any], y_trues: List[Any]):
        evals = [self.apply(y_pred, y_true) for y_pred, y_true in zip(y_preds, y_trues)]
        float_keys = [k for k, v in evals[0].items() if isinstance(v, (int, float, bool))]
        return {k: sum(e[k] for e in evals) / len(evals) for k in float_keys}

metric_adapter = FacilitySupportAnalyzerMetric()

### Inference Adapter
Initialize the InferenceAdapter to choose the backend Inference. Currently, we only support BedrockInferenceAdapter.

In [14]:
from amzn_nova_prompt_optimizer.core.inference.adapter import BedrockInferenceAdapter

inference_adapter = BedrockInferenceAdapter(region_name="us-east-1")

### Evaluator

The Evaluator can use the metric_adapter, prompt_adapter, and dataset_adapter to evaluate the prompt given the `model_id` to produce an evaluation score. The Evaluator internally uses the `InferenceRunner` to first generate inference results and then evaluate the output.

#### Base Model Evaluation

In [15]:
from amzn_nova_prompt_optimizer.core.evaluation import Evaluator

evaluator = Evaluator(prompt_adapter, test_set, metric_adapter, inference_adapter)

In [None]:
original_prompt_score = evaluator.aggregate_score(model_id=NOVA_MODEL_ID)

print(f"Original Prompt Evaluation Score = {original_prompt_score}")

### Optimization Adapter

We can now define the Optimization Functions. The Optimization function takes as input the Prompt Adapter and Optionally a Dataset Adapter, Inference Adapter, and Metric Adapter. The optimization function optimizes the prompt and returns a Prompt Adapter.

In [9]:
class FacilitySupportAnalyzerNovaMetric(FacilitySupportAnalyzerMetric):
    def apply(self, y_pred: Any, y_true: Any):
        # Requires to return a value and not a JSON payload
        return self._calculate_metrics(y_pred, y_true)["total"]
        
    def batch_apply(self, y_preds: List[Any], y_trues: List[Any]):
        pass
nova_metric_adapter = FacilitySupportAnalyzerNovaMetric()

#### NovaPromptOptimizer

NovaPromptOptimizer = Nova Meta Prompter + MIPROv2 with Nova Model Tips

In [None]:
from amzn_nova_prompt_optimizer.core.optimizers import NovaPromptOptimizer

nova_prompt_optimizer = NovaPromptOptimizer(prompt_adapter=prompt_adapter, inference_adapter=inference_adapter, dataset_adapter=train_set, metric_adapter=nova_metric_adapter)

optimized_prompt_adapter = nova_prompt_optimizer.optimize(mode="pro")

In [None]:
optimized_prompt_adapter.show()

### Optimized System Prompt

In [None]:
print(optimized_prompt_adapter.system_prompt)

### Optimized User Prompt

In [None]:
print(optimized_prompt_adapter.user_prompt)

### Few Shot Examples

In [None]:
print(f"Number of Few-Shot Examples = {len(optimized_prompt_adapter.few_shot_examples)}")

In [None]:
# Print only the first example
print(optimized_prompt_adapter.few_shot_examples[0])

### Evaluator

Now we evaluate the Nova Prompt Optimizer Optimized prompt

In [16]:
from amzn_nova_prompt_optimizer.core.evaluation import Evaluator

evaluator = Evaluator(optimized_prompt_adapter, test_set, metric_adapter, inference_adapter)

In [None]:
nova_prompt_optimizer_evaluation_score = evaluator.aggregate_score(model_id=NOVA_MODEL_ID)
print(f"Nova Prompt Optimizer = {nova_prompt_optimizer_evaluation_score}")

In [18]:
optimized_prompt_adapter.save("optimized_prompt/")