In [2]:
NOVA_MODEL_ID = "us.amazon.nova-pro-v1:0"

In [3]:
from amzn_nova_prompt_optimizer.core.inference.adapter import BedrockInferenceAdapter

inference_adapter = BedrockInferenceAdapter(region_name="us-east-1")

In [4]:
'''
# Call the model with the passed parametrs
inference_output = inference_adapter.call_model(model_id: str, system_prompt: str, messages: List[Dict[str, str]], inf_config: Dict[str, Any])
'''
response = inference_adapter.call_model(
    model_id=NOVA_MODEL_ID,
    system_prompt="You are a helpful assistant that can answer questions and help with tasks.",
    messages=[{"user": "What is the capital of the moon?"}],
    # CHANGE HERE: Add "top_k" and keep it under 128
    inf_config={
        "max_tokens": 100, 
        "temperature": 1, 
        "top_p": 0.9, 
        "top_k": 50
    } 
)

print(response)

The Moon does not have a capital in the traditional sense, as it is not a country or a political entity with a government or cities. The Moon is a natural satellite of Earth and is not inhabited by humans or any known form of life. 

However, there are several notable landing sites and areas of interest on the Moon, such as:

1. **Apollo Landing Sites** - Locations where the Apollo missions landed, such as Tranquility Base, where Apollo 11 landed in 1


In [5]:
from weaviate.agents.query import QueryAgent
import weaviate

dataset = [
    {"question": "Can you provide a detailed comparison between HyDE and LameR, highlighting their respective strengths, weaknesses, and ideal use cases in modern information retrieval?"},
    {"question": "Please thoroughly explain the concept of Top-Down Partitioning Reranking, including its core algorithmic steps, advantages over other reranking strategies, and scenarios in which it is particularly effective."},
    {"question": "What are the most advanced and effective techniques currently employed in Agentic Search optimization using reinforcement learning, and how do they address existing challenges in this field?"},
    {"question": "To what extent has the Chain-of-Thought approach contributed to measurable improvements in Cross Encoder architectures, particularly in terms of accuracy, reasoning capability, and generalization across benchmarks? Please cite relevant studies or experiments."},
    {"question": "Could you survey the leading methods for LLM query expansion and discuss how the state-of-the-art approaches differ in methodology, effectiveness, and limits, especially for complex or open-ended queries?"},
]

def get_qa() -> weaviate.agents.query.query_agent.QueryAgent:
    weaviate_client = weaviate.connect_to_weaviate_cloud(
        cluster_url=os.getenv("WEAVIATE_URL"),
        auth_credentials=weaviate.auth.AuthApiKey(api_key=os.getenv("WEAVIATE_API_KEY"))
    )
    return QueryAgent(
        client=weaviate_client,
        collections=["IRPapersText_Default"]
    )

def weaviate_search_mode_tool(
    qa: weaviate.agents.query.query_agent.QueryAgent, 
    question: str
) -> str:
    search_response = qa.search(
        question,
        limit=1
    )
    contexts = []
    for o in search_response.search_results.objects:
        contexts.append(o.properties["content"])

    return "\n".join(contexts)

qa = get_qa()

for row in dataset:
    row["context"] = weaviate_search_mode_tool(qa, row["question"])
    row["answer"] = ""


KeyboardInterrupt: 

In [None]:
import pandas as pd

df = pd.DataFrame(dataset)
df.to_csv("dataset.csv", index=False)


In [None]:
from amzn_nova_prompt_optimizer.core.input_adapters.dataset_adapter import CSVDatasetAdapter

input_columns = {"question", "context"}
output_columns = {"answer"}

dataset_adapter = CSVDatasetAdapter(input_columns, output_columns)

# Adapt
dataset_adapter.adapt("dataset.csv")

train_set, test_set = dataset_adapter.split(0.5)

In [None]:
from amzn_nova_prompt_optimizer.core.input_adapters.prompt_adapter import TextPromptAdapter

prompt_adapter = TextPromptAdapter()

prompt_adapter.set_user_prompt(
    content="""
    Assess the context and answer the question.
    Answer the question as succinctly as you are able to.
    Question: {question}
    Context: {context}
    """,
    variables={"question", "context"}
)

prompt_adapter.adapt()

2025/11/25 16:53:34 INFO amzn_nova_prompt_optimizer.core.input_adapters.prompt_adapter: System Prompt not set, initializing as empty string...


<amzn_nova_prompt_optimizer.core.input_adapters.prompt_adapter.TextPromptAdapter at 0x12fc8b390>

In [None]:
from amzn_nova_prompt_optimizer.core.input_adapters.metric_adapter import MetricAdapter
from typing import List, Any, Dict
import re

class WordCountMetric(MetricAdapter):
    def _calculate_metrics(self, y_pred: Any, y_true: Any) -> Dict:
        """
        Calculates the number of words in the predicted answer (y_pred).
        Ignores y_true as it is not needed for a raw length count.
        """
        words = y_pred.split(" ")
        word_count = len(words)

        print(y_pred)
        
        # Return the metrics
        # 'total' is usually the primary metric the optimizer looks at.
        return {
            "word_count": word_count,
            "score": 1 / float(word_count) 
        }

    def apply(self, y_pred: Any, y_true: Any):
        return self._calculate_metrics(y_pred, y_true)

    def batch_apply(self, y_preds: List[Any], y_trues: List[Any]):
        # Calculate metrics for individual items
        evals = [self.apply(y_pred, y_true) for y_pred, y_true in zip(y_preds, y_trues)]
        
        # Calculate the average for all numeric metrics
        float_keys = [k for k, v in evals[0].items() if isinstance(v, (int, float, bool))]
        return {k: sum(e[k] for e in evals) / len(evals) for k in float_keys}

metric_adapter = WordCountMetric()

In [None]:
from amzn_nova_prompt_optimizer.core.inference.adapter import BedrockInferenceAdapter

inference_adapter = BedrockInferenceAdapter(region_name="us-east-1")

In [None]:
from amzn_nova_prompt_optimizer.core.evaluation import Evaluator

evaluator = Evaluator(prompt_adapter, test_set, metric_adapter, inference_adapter)

In [None]:
original_prompt_score = evaluator.aggregate_score(model_id=NOVA_MODEL_ID)

print(f"Original Prompt Evaluation Score = {original_prompt_score}")

2025/11/25 16:53:41 INFO amzn_nova_prompt_optimizer.core.evaluation: Cache miss - Running new inference on Dataset
Running inference: 100%|██████████| 3/3 [00:03<00:00,  1.18s/it]
2025/11/25 16:53:44 INFO amzn_nova_prompt_optimizer.core.evaluation: Running Batch Evaluation on Dataset, using `batch_apply` metric
2025/11/25 16:53:44 INFO amzn_nova_prompt_optimizer.core.evaluation: Using cached inference results
2025/11/25 16:53:44 INFO amzn_nova_prompt_optimizer.core.evaluation: Running Evaluation on Dataset, using `apply` metric


Advanced techniques in Agentic Search optimization using reinforcement learning include:

1. **Comprehensive Training Data Synthesis**: Creating diverse and complex training datasets to improve generalization.
2. **RFT Cold Start**: Techniques to initialize reinforcement learning training effectively.
3. **Improved Efficiency in RL Algorithms**: Optimizing algorithms for better training stability and sample efficiency.
4. **Uncertainty Reduction Strategies**: Methods to handle high initial uncertainty in complex tasks.

These techniques address challenges like training stability, sample efficiency, and generalization in adaptive contexts, enabling agents to perform sophisticated, non-linear reasoning tasks.
The Chain-of-Thought (CoT) approach has shown measurable improvements in Cross Encoder architectures, particularly in terms of accuracy and reasoning capability. According to the provided context, incorporating CoT into prompting and aggregation methods generally improves search per

In [None]:
class WordCountNovaMetric(WordCountMetric):
    def apply(self, y_pred: Any, y_true: Any):
        # Calculate the metrics dictionary using the parent class logic
        metrics = self._calculate_metrics(y_pred, y_true)
        
        # Return ONLY the scalar value (word count) as required by this specific adapter interface
        return metrics["score"]
        
    def batch_apply(self, y_preds: List[Any], y_trues: List[Any]):
        # Keeping this as pass as per your snippet
        # (The framework likely iterates over 'apply' individually or ignores this method for this adapter type)
        pass

nova_metric_adapter = WordCountNovaMetric()

In [None]:
from amzn_nova_prompt_optimizer.core.optimizers import NovaPromptOptimizer

nova_prompt_optimizer = NovaPromptOptimizer(prompt_adapter=prompt_adapter, inference_adapter=inference_adapter, dataset_adapter=train_set, metric_adapter=nova_metric_adapter)

optimized_prompt_adapter = nova_prompt_optimizer.optimize(mode="lite")

2025/11/25 16:57:45 INFO amzn_nova_prompt_optimizer.core.optimizers.nova_meta_prompter.nova_mp_optimizer: Optimizing prompt using Nova Meta Prompter with Model: us.amazon.nova-premier-v1:0
2025/11/25 16:57:47 INFO amzn_nova_prompt_optimizer.core.optimizers.miprov2.miprov2_optimizer: Using us.amazon.nova-lite-v1:0 for Evaluation
2025/11/25 16:57:47 INFO amzn_nova_prompt_optimizer.core.optimizers.miprov2.miprov2_optimizer: Using us.amazon.nova-premier-v1:0 for Prompting
2025/11/25 16:57:47 INFO amzn_nova_prompt_optimizer.core.optimizers.miprov2.custom_adapters.custom_chat_adapter: Initializing CustomChatAdapter with enable_json_fallback=False
2025/11/25 16:57:47 INFO amzn_nova_prompt_optimizer.core.optimizers.miprov2.miprov2_optimizer: Using Nova tips for MIPROv2 optimization
2025/11/25 16:57:47 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/11/25 16:57:47 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates

Bootstrapping set 1/20
Bootstrapping set 2/20
Bootstrapping set 3/20


  0%|          | 0/1 [00:00<?, ?it/s]

**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]


Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 4/20


100%|██████████| 1/1 [00:00<00:00, 2078.45it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 5/20


100%|██████████| 1/1 [00:00<00:00, 2688.66it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 6/20


100%|██████████| 1/1 [00:00<00:00,  1.62it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 7/20


100%|██████████| 1/1 [00:00<00:00,  3.95it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 8/20


100%|██████████| 1/1 [00:00<00:00,  1.78it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 9/20


100%|██████████| 1/1 [00:00<00:00, 2777.68it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 10/20


100%|██████████| 1/1 [00:00<00:00, 2341.88it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 11/20


100%|██████████| 1/1 [00:00<00:00,  1.70it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 12/20


100%|██████████| 1/1 [00:00<00:00,  2.83it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 13/20


100%|██████████| 1/1 [00:00<00:00, 2169.84it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 14/20


100%|██████████| 1/1 [00:00<00:00, 2330.17it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 15/20


100%|██████████| 1/1 [00:00<00:00, 1952.66it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 16/20


100%|██████████| 1/1 [00:00<00:00,  6.29it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 17/20


100%|██████████| 1/1 [00:00<00:00,  1.87it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 18/20


100%|██████████| 1/1 [00:00<00:00, 1721.80it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 19/20


100%|██████████| 1/1 [00:00<00:00,  1.34it/s]


**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 20/20


100%|██████████| 1/1 [00:00<00:00, 3013.15it/s]
2025/11/25 16:57:52 INFO amzn_nova_prompt_optimizer.core.optimizers.miprov2.miprov2_optimizer: Entering patched_propose_instructions, patching GroundedProposer with NovaGroundedProposer
2025/11/25 16:57:52 INFO amzn_nova_prompt_optimizer.core.optimizers.miprov2.miprov2_optimizer: Patched GroundedProposer, current GroundedProposer class=<class 'amzn_nova_prompt_optimizer.core.optimizers.nova_prompt_optimizer.nova_grounded_proposer.NovaGroundedProposer'>
2025/11/25 16:57:52 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/11/25 16:57:52 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.
2025/11/25 16:57:52 INFO amzn_nova_prompt_optimizer.core.optimizers.nova_prompt_optimizer.nova_grounded_proposer: Initializing NovaGroundedPropo

**Top-Down Partitioning Reranking**  
- **Core Steps:**  
  1. Initial search over top-w documents to find a pivot at cutoff k.
  2. Compare each window to the pivot to depth D.
  
- **Advantages:**  
  - Early stopping reduces computation.
  - No stride leads to efficiency.

- **Effective Scenarios:**  
  - High precision of top-k re-ranking documents.
  - Densely annotated queries.
Bootstrapped 1 full traces after 0 examples for up to 1 rounds, amounting to 1 attempts.
[Nova] Selected tip: description
[Nova] Selected tip: simple
[Nova] Selected tip: rules_based
[Nova] Selected tip: format_control
[Nova] Selected tip: creative
[Nova] Selected tip: description
[Nova] Selected tip: format_control
[Nova] Selected tip: high_stakes
[Nova] Selected tip: format_control
[Nova] Selected tip: none
[Nova] Selected tip: none
[Nova] Selected tip: none
[Nova] Selected tip: structured_prompt


In [None]:
optimized_prompt_adapter.show()

2025/11/25 16:56:58 INFO amzn_nova_prompt_optimizer.core.input_adapters.prompt_adapter: 
Standardized Prompt:


{
  "user_prompt": {
    "variables": [
      "context",
      "question"
    ],
    "template": "Assess the context and answer the question succinctly.\nQuestion: [{{question}}]\nContext: [{{context}}]",
    "metadata": {
      "format": "text"
    }
  },
  "system_prompt": {
    "variables": [],
    "template": "Task: Assess the context and answer the question succinctly.\n\nContext:\n\n- The context provides background information necessary to understand and answer the question accurately.\n\nInstructions:\n\n- Answer the question based on the provided context.\n- Ensure the response is as succinct as possible while maintaining accuracy.\n\nAny other section from Original Prompt:\n\n- There are no other sections in the Original Prompt.\n\nResponse Format:\n\n- The response MUST be concise.\n- The response MUST directly address the question.",
    "metadata": {
      "format": "text"
    }
  }
}


In [None]:
print(optimized_prompt_adapter.system_prompt)

Task: Assess the context and answer the question succinctly.

Context:

- The context provides background information necessary to understand and answer the question accurately.

Instructions:

- Answer the question based on the provided context.
- Ensure the response is as succinct as possible while maintaining accuracy.

Any other section from Original Prompt:

- There are no other sections in the Original Prompt.

Response Format:

- The response MUST be concise.
- The response MUST directly address the question.


In [None]:
print(optimized_prompt_adapter.user_prompt)

Assess the context and answer the question succinctly.
Question: [{{question}}]
Context: [{{context}}]


In [None]:
from amzn_nova_prompt_optimizer.core.evaluation import Evaluator

evaluator = Evaluator(optimized_prompt_adapter, test_set, metric_adapter, inference_adapter)

nova_prompt_optimizer_evaluation_score = evaluator.aggregate_score(model_id=NOVA_MODEL_ID)

print(f"Nova Prompt Optimizer = {nova_prompt_optimizer_evaluation_score}")

2025/11/25 16:56:58 INFO amzn_nova_prompt_optimizer.core.evaluation: Cache miss - Running new inference on Dataset
Running inference: 100%|██████████| 3/3 [00:01<00:00,  1.58it/s]
2025/11/25 16:57:00 INFO amzn_nova_prompt_optimizer.core.evaluation: Running Batch Evaluation on Dataset, using `batch_apply` metric
2025/11/25 16:57:00 INFO amzn_nova_prompt_optimizer.core.evaluation: Using cached inference results
2025/11/25 16:57:00 INFO amzn_nova_prompt_optimizer.core.evaluation: Running Evaluation on Dataset, using `apply` metric


Advanced RL techniques like RFT cold start and efficient RL algorithms enhance agentic search optimization by addressing training stability and sample efficiency, enabling superior performance in complex, high-uncertainty tasks.
Incorporating Chain-of-Thought generally improves search performance across prompting and aggregation methods, enhancing understanding and performance as shown in Figure 2.
LLMs use generative abilities, especially CoT prompts, for query expansion, outperforming traditional PRF methods.
Advanced RL techniques like RFT cold start and efficient RL algorithms enhance agentic search optimization by addressing training stability and sample efficiency, enabling superior performance in complex, high-uncertainty tasks.
Incorporating Chain-of-Thought generally improves search performance across prompting and aggregation methods, enhancing understanding and performance as shown in Figure 2.
LLMs use generative abilities, especially CoT prompts, for query expansion, outpe

In [None]:
optimized_prompt_adapter.save("optimized_prompt/")