In [1]:
from src.prompt_manager import PromptManager, PromptSuite, PromptTemplate, create_empty_prompt_template, create_chat_prompt_dict
from pathlib import Path

# **McGill Feedback QA**

In [2]:
suite_folder = Path("../prompts/PromptSuites/MCGILL_QA_FEEDBACK")
pm = PromptManager(suite_folder)

PromptManager initialized with folder: ..\prompts\PromptSuites\MCGILL_QA_FEEDBACK


## Naive holistic prompt

In [22]:
# SYSTEM_PROMPT = """
# Instruction: You will be given a Question & answer pair. Rate the quality of the Answer provided for the Question on a scale of 1 to 4, where 1 is the lowest quality and 4 is the highest quality.
# """

SYSTEM_PROMPT = """
You are an expert evaluator. Your task is to rate the overall quality of the Answer provided for the Question on a scale of 1 to 4, where 1 is the lowest quality and 4 is the highest quality.
"""

USER_PROMPT_TEMPLATE = """Question:
{question}

Answer: 
{answer}
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}


naive_dict = create_chat_prompt_dict(name = 'naive_holistic',
                        description='Naive (baseline) prompt with single holistic dimension', 
                        template_chat=template_chat,
                        token_constraints=["1", "2", "3", "4"],
                        tags = ['naive', 'holistic', 'baseline', 'scale_4'],
                        dimension_name='holistic')


suite = PromptSuite.from_dict(naive_dict)
suite.save(suite_folder, filename = "holistic_baseline")


‚úÖ Saved suite to ..\prompts\PromptSuites\MCGILL_QA_FEEDBACK\holistic_baseline_suite_83458ec049.yml


WindowsPath('../prompts/PromptSuites/MCGILL_QA_FEEDBACK/holistic_baseline_suite_83458ec049.yml')

## BARS style holistic prompt

### Without XML

In [3]:
SYSTEM_PROMPT = """
You are an expert evaluator. Your task is to rate the Overall Quality
of the Answer provided for the Question on a scale of 1 to 4.


*Overall Quality: To what extent does the answer correctly and sufficiently resolve the specific question asked, given its constraints and intent?*

1 = **Bad**: The answer is irrelevant or factually incorrect. It discusses
a completely different topic or fails to address the core question at all.

2 = **Could be Improved**: The answer addresses the topic but is significantly
flawed. It may be too sparse, miss critical constraints, require the user
to infer the answer, or contain significant grammar/logic issues.

3 = **Good**: The answer is adequate and explicitly addresses the question.
It contains the necessary information to resolve the user's intent, though
it may lack additional context or specific recommendations.

4 = **Excellent**: The answer is highly informative, accurate, and unequivocal.
It answers the question directly, covers all necessary constraints or exceptions implied by the question,
and provides useful context/recommendations.'
"""

USER_PROMPT_TEMPLATE = """Question:
{question}

Answer: 
{answer}
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}


bars_dict = create_chat_prompt_dict(name = 'BARS_holistic',
                        description='BARS (improved) prompt with single holistic dimension', 
                        template_chat=template_chat,
                        token_constraints=["1", "2", "3", "4"],
                        tags = ['BARS', 'holistic', 'scale_4'],
                        dimension_name='holistic')


suite = PromptSuite.from_dict(bars_dict)
suite.save(suite_folder, filename = "holistic_BARS")


‚úÖ Saved suite to ..\prompts\PromptSuites\MCGILL_QA_FEEDBACK\holistic_BARS_suite_5067939a72.yml


WindowsPath('../prompts/PromptSuites/MCGILL_QA_FEEDBACK/holistic_BARS_suite_5067939a72.yml')

### With XML

In [4]:
SYSTEM_PROMPT = """
<instruction>
You are an expert evaluator. Your task is to rate the Overall Quality
of the <candidate_answer> provided for the <question> on a scale of 1 to 4.
</instruction>

<rubric>
    <definition>
        *Overall Quality: To what extent does the answer correctly and sufficiently resolve the specific question asked, given its constraints and intent?*
    </definition>

    <scale>
        1 = **Bad**: The answer is irrelevant or factually incorrect. It discusses
        a completely different topic or fails to address the core question at all.

        2 = **Could be Improved**: The answer addresses the topic but is significantly
        flawed. It may be too sparse, miss critical constraints, require the user
        to infer the answer, or contain significant grammar/logic issues.

        3 = **Good**: The answer is adequate and explicitly addresses the question.
        It contains the necessary information to resolve the user's intent, though
        it may lack additional context or specific recommendations.

        4 = **Excellent**: The answer is highly informative, accurate, and unequivocal.
        It answers the question directly, covers all necessary constraints or exceptions implied by the question,
        and provides useful context/recommendations.
    </scale>
</rubric>
"""

USER_PROMPT_TEMPLATE = """
<question>
{question}
</question>

<candidate_answer>
{answer}
</candidate_answer>
"""


template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}


bars_dict = create_chat_prompt_dict(name = 'BARS_holistic_XML',
                        description='BARS (improved) XML formatted prompt with single holistic dimension', 
                        template_chat=template_chat,
                        token_constraints=["1", "2", "3", "4"],
                        tags = ['BARS', 'holistic', 'scale_4', 'XML'],
                        dimension_name='holistic')


suite = PromptSuite.from_dict(bars_dict)
suite.save(suite_folder, filename = "holistic_BARS_XML")


‚úÖ Saved suite to ..\prompts\PromptSuites\MCGILL_QA_FEEDBACK\holistic_BARS_XML_suite_4a3297c956.yml


WindowsPath('../prompts/PromptSuites/MCGILL_QA_FEEDBACK/holistic_BARS_XML_suite_4a3297c956.yml')

## BARS style formative prompt no XML

In [24]:
USER_PROMPT_TEMPLATE = """Question:
{question}

Answer: 
{answer}
"""

SYSTEM_PROMPT = """
You are an expert evaluator. Your task is to rate the Relevance of the Question provided for the Answer on a scale of 1 to 4.

*Relevance: Does the answer discuss the specific topic and entities requested?*
1 = **Irrelevant**: Discusses a completely different topic (e.g., "baby delivery" instead of "respirators").
2 = **Topic Mismatch**: Discusses a related category but the wrong specific entity (e.g., "Work Visa" instead of "Pandemic Visa").
3 = **Broadly Relevant**: Discusses the correct topic but includes significant tangential or unrelated information.
4 = **Precise**: Focuses exclusively on the specific entity and topic requested in the question.
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}


relevance_dict = create_chat_prompt_dict(name = 'Relevance',
                        description='Relevance BARS style prompt', 
                        template_chat=template_chat,
                        token_constraints=["1", "2", "3", "4"],
                        tags = ['BARS', 'relevance', 'scale_4'],
                        dimension_name='relevance')


SYSTEM_PROMPT = """
Instruction: Rate the Completeness of the Answer provided for the Question on a scale of 1 to 4. Use the following rubric to determine the score:

*Completeness: Does the answer contain all necessary information to resolve the user's intent?*
1 = **Deficient**: Misses the core answer entirely; the user cannot solve their problem.
2 = **Sparse**: Provides the basic answer but lacks detail, context, or specific constraints
3 = **Adequate**: Covers the main points and necessary constraints.
4 = **Exhaustive**: "Super detailed" and informative; covers the answer, constraints, exceptions, and provides context/links.
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}


completeness_dict = create_chat_prompt_dict(name = 'Completeness',
                        description='Completeness BARS style prompt', 
                        template_chat=template_chat,
                        token_constraints=["1", "2", "3", "4"],
                        tags = ['BARS', 'completeness', 'scale_4'],
                        dimension_name='completeness')

SYSTEM_PROMPT = """
Instruction: Rate the Directness of the Answer provided for the Question on a scale of 1 to 4. Use the following rubric to determine the score:

*Directness: Is the answer explicit and unequivocal?*
1 = **Confusing/Contradictory**: Logic is flawed (e.g., says "No" but implies "Yes") or grammar obscures meaning.
2 = **Implicit/Inferred**: The user must use logical entailment to deduce the answer (e.g., "Only X is allowed" implies "Y is not").
3 = **Clear**: The answer is stated clearly but may be buried after an intro or disclaimer.
4 = **Unequivocal**: The answer is explicit, immediate, and leaves no room for interpretation.
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}


directness_dict = create_chat_prompt_dict(name = 'Directness',
                        description='directness BARS style prompt', 
                        template_chat=template_chat,
                        token_constraints=["1", "2", "3", "4"],
                        tags = ['BARS', 'directness', 'scale_4'],
                        dimension_name='directness')

pt_relevance = PromptTemplate.from_dict(relevance_dict)
pt_completeness = PromptTemplate.from_dict(completeness_dict)
pt_directness = PromptTemplate.from_dict(directness_dict)


deconstructed_qa_suite = PromptSuite.from_list([pt_relevance, pt_completeness, pt_directness])
deconstructed_qa_suite.save(suite_folder, "formative_BARS")


‚úÖ Saved suite to ..\prompts\PromptSuites\MCGILL_QA_FEEDBACK\formative_BARS_suite_080be5c39ffc.yml


WindowsPath('../prompts/PromptSuites/MCGILL_QA_FEEDBACK/formative_BARS_suite_080be5c39ffc.yml')

In [25]:
pt_directness

PromptTemplate(id='8c93fdb28e', name='Directness', dimension_name='directness', description='directness BARS style prompt', token_constraints=['1', '2', '3', '4'], tags=['BARS', 'directness', 'scale_4'], system_message='Instruction: Rate the Directness of the Answer provided for the Question on a scale of 1 to 4. Use the following rubric to determine the score:\n\n*Directness: Is the answer explicit and unequivocal?*\n1 = **Confusing/Contradictory**: Logic is flawed (e.g., says "No" but implies "Yes") or grammar obscures meaning.\n2 = **Implicit/Inferred**: The user must use logical entailment to deduce the answer (e.g., "Only X is allowed" implies "Y is not").\n3 = **Clear**: The answer is stated clearly but may be buried after an intro or disclaimer.\n4 = **Unequivocal**: The answer is explicit, immediate, and leaves no room for interpretation.', user_message_template='Question:\n{question}\n\nAnswer: \n{answer}', _cached_constraint_ids=None)

## BARS style formative prompt XML

In [26]:
# Relevance
SYSTEM_PROMPT = """
<instruction>
You are an expert evaluator. Your task is to rate the Relevance of the <candidate_answer> provided for the <question> on a scale of 1 to 4.
</instruction>

<rubric>
    <definition>
        *Relevance: Does the answer discuss the specific topic and entities requested?*
    </definition>

    <scale>
    1 = **Irrelevant**: Discusses a completely different topic (e.g., "baby delivery" instead of "respirators").
    2 = **Topic Mismatch**: Discusses a related category but the wrong specific entity (e.g., "Work Visa" instead of "Pandemic Visa").
    3 = **Broadly Relevant**: Discusses the correct topic but includes significant tangential or unrelated information.
    4 = **Precise**: Focuses exclusively on the specific entity and topic requested in the question.
    </scale>
</rubric>
"""

USER_PROMPT_TEMPLATE = """
<question>
{question}
</question>

<candidate_answer>
{answer}
</candidate_answer>
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}


relevance_dict = create_chat_prompt_dict(name = 'Relevance_XML',
                        description='Relevance BARS style prompt with XML tags', 
                        template_chat=template_chat,
                        token_constraints=["1", "2", "3", "4"],
                        tags = ['BARS', 'XML', 'relevance', 'scale_4'],
                        dimension_name='relevance')


# Completeness
SYSTEM_PROMPT = """
<instruction>
You are an expert evaluator. Your task is to rate the Completeness of the <candidate_answer> provided for the <question> on a scale of 1 to 4.
</instruction>

<rubric>
    <definition>
        *Completeness: Does the answer contain all necessary information to resolve the user's intent?*
    </definition>

    <scale>
    1 = **Deficient**: Misses the core answer entirely; the user cannot solve their problem.
    2 = **Sparse**: Provides the basic answer but lacks detail, context, or specific constraints
    3 = **Adequate**: Covers the main points and necessary constraints.
    4 = **Exhaustive**: "Super detailed" and informative; covers the answer, constraints, exceptions, and provides context/links.
    </scale>
</rubric>
"""

USER_PROMPT_TEMPLATE = """
<question>
{question}
</question>

<candidate_answer>
{answer}
</candidate_answer>
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}


completeness_dict = create_chat_prompt_dict(name = 'Completeness_XML',
                        description='Completeness BARS style prompt with XML tags', 
                        template_chat=template_chat,
                        token_constraints=["1", "2", "3", "4"],
                        tags = ['BARS', 'XML', 'completeness', 'scale_4'],
                        dimension_name='completeness')

# Directness
SYSTEM_PROMPT = """
<instruction>
You are an expert evaluator. Your task is to rate the Directness of the <candidate_answer> provided for the <question> on a scale of 1 to 4.
</instruction>

<rubric>
    <definition>
        *Directness: How easily can the user extract the explicit answer?*
    </definition>

    <scale>
        1 = **Obscured**: The answer is hidden behind contradictions, confusing grammar, or total evasiveness. The user is left guessing.
        2 = **Indirect**: The answer is present but requires user effort to decode (e.g., through inference, reading between the lines, or connecting separate points).
        3 = **Delayed**: The answer is explicitly stated, but the user must read through introductory fluff, disclaimers, or excessive context to find it.
        4 = **Immediate**: The answer is the very first thing presented (or clearly highlighted), leaving zero ambiguity or search time.
    </scale>
</rubric>
"""

USER_PROMPT_TEMPLATE = """
<question>
{question}
</question>

<candidate_answer>
{answer}
</candidate_answer>
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}


directness_dict = create_chat_prompt_dict(name = 'Directness_XML',
                        description='directness BARS style prompt with XML tags', 
                        template_chat=template_chat,
                        token_constraints=["1", "2", "3", "4"],
                        tags = ['BARS', 'XML', 'directness', 'scale_4'],
                        dimension_name='directness')

pt_relevance = PromptTemplate.from_dict(relevance_dict)
pt_completeness = PromptTemplate.from_dict(completeness_dict)
pt_directness = PromptTemplate.from_dict(directness_dict)


deconstructed_qa_suite = PromptSuite.from_list([pt_relevance, pt_completeness, pt_directness])
deconstructed_qa_suite.save(suite_folder, "formative_BARS_XML")


‚úÖ Saved suite to ..\prompts\PromptSuites\MCGILL_QA_FEEDBACK\formative_BARS_XML_suite_2f16b595bed1.yml


WindowsPath('../prompts/PromptSuites/MCGILL_QA_FEEDBACK/formative_BARS_XML_suite_2f16b595bed1.yml')

In [27]:
pm = PromptManager(folder=suite_folder)
prompt_suites = pm.load_all()

PromptManager initialized with folder: ..\prompts\PromptSuites\MCGILL_QA_FEEDBACK
Loading 4 suites from ..\prompts\PromptSuites\MCGILL_QA_FEEDBACK...


In [28]:
list_of_sets = [set(vals.tags) for vals in pm.suites['080be5c39ffc'].templates.values()]

In [29]:
"BARS" in set.intersection(*list_of_sets)

True

-------

# **Barter Deals**

In [None]:
suite_folder = Path("../prompts/PromptSuites/BARTER_DEALS")
pm = PromptManager(suite_folder)

## Naive holistic prompt

In [5]:
USER_PROMPT_TEMPLATE = """Deal text:
{deal_text}

Rate the marketing copy on a scale of 1 to 4.
"""

SYSTEM_PROMPT = """
You will be given a deal text, consisting of marketing copy. Rate the marketing copy on a likert scale of 1 to 4.
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}


naive_dict = create_chat_prompt_dict(name = 'naive_holistic',
                        description='Naive (baseline) prompt with single holistic dimension', 
                        template_chat=template_chat,
                        constrained_output=["1", "2", "3", "4"],
                        tags = ['naive', 'holistic', 'baseline', 'scale_4'],
                        dimension_name='holistic')


suite = PromptSuite.from_dict(naive_dict)



In [7]:
suite.save(Path('../prompts/PromptSuites/BARTER_DEALS')) #TODO Add promptname to filename

‚úÖ Saved suite to ..\prompts\PromptSuites\BARTER_DEALS\suite_3d33389357.yml


WindowsPath('../prompts/PromptSuites/BARTER_DEALS/suite_3d33389357.yml')

-----------

# **Tests**

## Generation

In [21]:
from src.modeler import Modeler
from src.prompt_manager import PromptManager
from src.pipeline import run_experiment
import pandas as pd

- Load prompts

In [14]:
pm = PromptManager(Path("../prompts/PromptSuites/BARTER_DEALS"))
suites = pm.load_all()

PromptManager initialized with folder: ..\prompts\PromptSuites\BARTER_DEALS
Loading 1 suites from ..\prompts\PromptSuites\BARTER_DEALS...


- Load input data

In [23]:
df_deals = pd.read_parquet('../data/raw/BARTER_DEALS.parquet')

- Config

In [None]:
model_name = 'google/gemma-3-1b-it'
# model_name = 'google/gemma-3-4b-it'
save_dir = Path('../results_test') #results_folder
file_stem = "barter_test" # analysis_name
variable_names = ['deal_text']
batch_size = 2
id_col = 'deal_id'
top_k = 1000
assistant_prefix = ["", "RATING: "][1]
shards_per_save = 10 # Total rows saved will be shards*batch_size

- Load and configure LLM

In [25]:
modeler = Modeler('google/gemma-3-1b-it')

Using device: cuda
Padding side: left


In [26]:
modeler.set_token_constraints(list("1234"))

In [38]:
for id, suite in suites.items():
    run_experiment(df=df_deals[:20],
               modeler=modeler,
               suite=suite,
               save_dir=save_dir,
               file_stem=file_stem,
               model_name=model_name,
               batch_size=batch_size,
               id_col=id_col,
               top_k=top_k,
               shards_per_save=shards_per_save)
    

running generation function...
Processing 20 new IDs...
Resuming at Shard 0. Starting streaming inference...
Starting streaming inference...


 91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 10/11 [00:01<00:00,  5.68it/s]

 Saved checkpoint: barter_test_part_0000.pt





## Loading results

In [30]:
from src.data_manager import DataManager

In [39]:
dm = DataManager(save_dir)

In [40]:
dm.load_all()

In [41]:
dm.create_analysis_dataframe()

Unnamed: 0,prompt_id,input_id,dimension_name,assistant_prefix,input_length,model_name,top_k,constrained_token_ids,sequences,top_k_logits,constrained_logits,top_k_tokens,constrained_tokens
0,3d33389357,0196edc7-e6a8-00c8-6ff5-ea60fa3f2895,holistic,,214,google/gemma-3-1b-it,1000,"[236770, 236778, 236800, 236812]","[2, 2, 105, 2364, 107, 3048, 795, 577, 2238, 4...","[[31.875, 31.875, 30.125, 28.875, 26.875, 25.6...","[[19.625, 18.125, 25.375, 30.125]]","[[Okay, I, 4, **, Here, ‚òÖ‚òÖ, 3, Overall, Let, Y...","[1, 2, 3, 4]"
1,3d33389357,019689e5-f489-00c8-8c26-06b3ea0961a5,holistic,,214,google/gemma-3-1b-it,1000,"[236770, 236778, 236800, 236812]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[44.25, 35.75, 35.0, 33.5, 30.0, 29.625, 29.1...","[[24.25, 22.0, 24.375, 29.625]]","[[Okay, Let, I, Here, **, 4, Alright, My, Rati...","[1, 2, 3, 4]"
2,3d33389357,019689e2-bfb5-00c8-6485-23352d64b0e5,holistic,,160,google/gemma-3-1b-it,1000,"[236770, 236778, 236800, 236812]","[0, 0, 0, 0, 0, 2, 2, 105, 2364, 107, 3048, 79...","[[37.5, 35.5, 34.0, 30.625, 30.25, 29.0, 26.25...","[[22.25, 20.5, 26.0, 34.0]]","[[Okay, I, 4, Let, **, Here, ‚òÖ‚òÖ, Rating, 3, On...","[1, 2, 3, 4]"
3,3d33389357,019689e3-d7a5-00c8-f5a8-6bfe51a21cd6,holistic,,160,google/gemma-3-1b-it,1000,"[236770, 236778, 236800, 236812]","[2, 2, 105, 2364, 107, 3048, 795, 577, 2238, 4...","[[37.75, 35.75, 31.375, 30.375, 30.375, 29.75,...","[[22.0, 21.625, 28.0, 30.375]]","[[Okay, I, Let, **, 4, Here, 3, My, Overall, O...","[1, 2, 3, 4]"
4,3d33389357,019689e5-7dd0-00c8-615b-7750a8fec2f3,holistic,,189,google/gemma-3-1b-it,1000,"[236770, 236778, 236800, 236812]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[40.5, 33.25, 32.5, 31.875, 31.625, 29.25, 26...","[[21.75, 21.625, 24.25, 31.625]]","[[Okay, I, Here, Let, 4, **, Alright, Rating, ...","[1, 2, 3, 4]"
5,3d33389357,019689e2-efa8-00c8-0aff-9b643cb6337b,holistic,,189,google/gemma-3-1b-it,1000,"[236770, 236778, 236800, 236812]","[2, 2, 105, 2364, 107, 3048, 795, 577, 2238, 4...","[[37.25, 36.25, 34.0, 30.875, 29.625, 29.125, ...","[[21.25, 20.5, 25.5, 34.0]]","[[I, Okay, 4, Let, **, Here, ‚òÖ‚òÖ, Overall, Rati...","[1, 2, 3, 4]"
6,3d33389357,0196cebe-6b09-00c8-c158-41d4ad0c8a0e,holistic,,203,google/gemma-3-1b-it,1000,"[236770, 236778, 236800, 236812]","[2, 2, 105, 2364, 107, 3048, 795, 577, 2238, 4...","[[37.0, 35.0, 34.25, 29.625, 29.0, 28.625, 28....","[[21.75, 19.5, 28.25, 37.0]]","[[4, I, Okay, **, Overall, Let, Here, 3, ‚òÖ‚òÖ, O...","[1, 2, 3, 4]"
7,3d33389357,019689e4-306b-00c8-d98c-70210470c288,holistic,,203,google/gemma-3-1b-it,1000,"[236770, 236778, 236800, 236812]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[44.0, 31.5, 31.375, 29.375, 29.125, 28.125, ...","[[20.125, 18.125, 18.25, 21.5]]","[[Okay, Please, Let, Here, I, Alright, **, OK,...","[1, 2, 3, 4]"
8,3d33389357,019689e4-bce7-00c8-0815-d6a659be9456,holistic,,265,google/gemma-3-1b-it,1000,"[236770, 236778, 236800, 236812]","[2, 2, 105, 2364, 107, 3048, 795, 577, 2238, 4...","[[37.75, 35.0, 31.25, 30.375, 30.0, 29.0, 28.3...","[[20.75, 17.5, 23.625, 31.25]]","[[Okay, I, 4, **, Here, Let, Overall, Out, ‚òÖ‚òÖ,...","[1, 2, 3, 4]"
9,3d33389357,019689e2-f8cf-00c8-f98b-2dd657eec2a8,holistic,,265,google/gemma-3-1b-it,1000,"[236770, 236778, 236800, 236812]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[40.5, 35.25, 33.5, 31.625, 31.375, 29.75, 29...","[[24.875, 24.0, 26.375, 29.75]]","[[Okay, I, Let, Here, **, 4, Rating, My, Alrig...","[1, 2, 3, 4]"


------

# REST

In [3]:
USER_PROMPT_TEMPLATE = """Deal text:
{deal_text}

Rate the persuasiveness of the marketing copy on a scale of 1 to 4.
"""

SYSTEM_PROMPT = """
You will be given a deal text aka marketing copy. Rate the persuasiveness of the copy 1 to 4.
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}



persuasiveness_dict = create_chat_prompt_dict(name = 'test',
                        description='desc_test', 
                        template_chat=template_chat,
                        constrained_output=["1", "2", "3", "4"],
                        tags = ['test'],
                        dimension_name='persuasiveness')


USER_PROMPT_TEMPLATE = """Deal text:
{deal_text}

Rate the clarity of the marketing copy on a scale of 1 to 4.
"""

SYSTEM_PROMPT = """
You will be given a deal text aka marketing copy. Rate the clarity of the copy 1 to 4.
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}


clarity_dict = create_chat_prompt_dict(name = 'test',
                        description='desc_test', 
                        template_chat=template_chat,
                        constrained_output=["1", "2", "3", "4"],
                        tags = ['test'],
                        dimension_name='clarity')

pt_persuasiveness = PromptTemplate.from_dict(persuasiveness_dict)
pt_clarity = PromptTemplate.from_dict(clarity_dict)


suite = PromptSuite.from_list([pt_persuasiveness, pt_clarity])

In [4]:
suite.templates

{'persuasiveness': PromptTemplate(id='0fd7136464', name='test', dimension_name='persuasiveness', description='desc_test', constrained_output=['1', '2', '3', '4'], tags=['test'], system_message='You will be given a deal text aka marketing copy. Rate the persuasiveness of the copy 1 to 4.', user_message_template='Deal text:\n{deal_text}\n\nRate the persuasiveness of the marketing copy on a scale of 1 to 4.', assistant_prefix=''),
 'clarity': PromptTemplate(id='6246301834', name='test', dimension_name='clarity', description='desc_test', constrained_output=['1', '2', '3', '4'], tags=['test'], system_message='You will be given a deal text aka marketing copy. Rate the clarity of the copy 1 to 4.', user_message_template='Deal text:\n{deal_text}\n\nRate the clarity of the marketing copy on a scale of 1 to 4.', assistant_prefix='')}

In [5]:
suite.to_dict()

{'id': '4f019fe61603',
 'dimensions': {'persuasiveness': {'name': 'test',
   'description': 'desc_test',
   'constrained_output': ['1', '2', '3', '4'],
   'tags': ['test'],
   'template_chat': {'system': 'You will be given a deal text aka marketing copy. Rate the persuasiveness of the copy 1 to 4.',
    'user': 'Deal text:\n{deal_text}\n\nRate the persuasiveness of the marketing copy on a scale of 1 to 4.'}},
  'clarity': {'name': 'test',
   'description': 'desc_test',
   'constrained_output': ['1', '2', '3', '4'],
   'tags': ['test'],
   'template_chat': {'system': 'You will be given a deal text aka marketing copy. Rate the clarity of the copy 1 to 4.',
    'user': 'Deal text:\n{deal_text}\n\nRate the clarity of the marketing copy on a scale of 1 to 4.'}}}}

In [13]:
with open("../prompts/test.yml", "w") as f:
    yaml.safe_dump(suite.to_dict(), f, sort_keys = False)

In [7]:
with open("../prompts/test.yaml", "r") as f:
    a = yaml.safe_load(f)

In [9]:
PromptSuite.from_dict(a)

PromptSuite(id='4f019fe61603', templates={'persuasiveness': PromptTemplate(id='0fd7136464', name='test', dimension_name='persuasiveness', description='desc_test', constrained_output=['1', '2', '3', '4'], tags=['test'], system_message='You will be given a deal text aka marketing copy. Rate the persuasiveness of the copy 1 to 4.', user_message_template='Deal text:\n{deal_text}\n\nRate the persuasiveness of the marketing copy on a scale of 1 to 4.', assistant_prefix=''), 'clarity': PromptTemplate(id='6246301834', name='test', dimension_name='clarity', description='desc_test', constrained_output=['1', '2', '3', '4'], tags=['test'], system_message='You will be given a deal text aka marketing copy. Rate the clarity of the copy 1 to 4.', user_message_template='Deal text:\n{deal_text}\n\nRate the clarity of the marketing copy on a scale of 1 to 4.', assistant_prefix='')})

In [14]:
from src.prompt_manager import PromptManager
pm = PromptManager(Path("../prompts"))
pm.load_all()

PromptManager initialized with folder: ..\prompts
Loading 1 suites from ..\prompts...


{'4f019fe61603': PromptSuite(id='4f019fe61603', templates={'persuasiveness': PromptTemplate(id='0fd7136464', name='test', dimension_name='persuasiveness', description='desc_test', constrained_output=['1', '2', '3', '4'], tags=['test'], system_message='You will be given a deal text aka marketing copy. Rate the persuasiveness of the copy 1 to 4.', user_message_template='Deal text:\n{deal_text}\n\nRate the persuasiveness of the marketing copy on a scale of 1 to 4.', assistant_prefix=''), 'clarity': PromptTemplate(id='6246301834', name='test', dimension_name='clarity', description='desc_test', constrained_output=['1', '2', '3', '4'], tags=['test'], system_message='You will be given a deal text aka marketing copy. Rate the clarity of the copy 1 to 4.', user_message_template='Deal text:\n{deal_text}\n\nRate the clarity of the marketing copy on a scale of 1 to 4.', assistant_prefix='')})}

In [9]:
pt = suite.templates['persuasiveness']

-------

In [6]:
pt = create_empty_prompt_template()

USER_PROMPT_TEMPLATE = """Deal text:
{deal_text}

Rate the quality of the marketing copy.
"""

SYSTEM_PROMPT = """
You will be given a deal text aka marketing copy. Rate the quality of the copy. 
"""

pt.user_message_template = USER_PROMPT_TEMPLATE
pt.system_message = SYSTEM_PROMPT
pt.assistant_prefix = "RATING: "

ps = PromptSuite(id= "test", templates = {"abc": pt})

In [None]:
USER_PROMPT_TEMPLATE = """Deal text:
{deal_text}

Rate the quality of the marketing copy.
"""

SYSTEM_PROMPT = """
You will be given a deal text aka marketing copy. Rate the quality of the copy. 
"""

template_chat = {
    "system": SYSTEM_PROMPT.strip(),
    "user": USER_PROMPT_TEMPLATE.strip()}



temp_dict = create_chat_prompt_dict(name = 'test',
                        description='desc_test', 
                        template_chat=template_chat,
                        constrained_output=["1", "2", "3", "4"],
                        tags = ['test'],
                        dimension_name='test')

pt = PromptTemplate.from_dict(temp_dict)

PromptTemplate(id=None, name='test', dimension_name='test', description='desc_test', constrained_output=['1', '2', '3', '4'], tags=['test'], system_message='You will be given a deal text aka marketing copy. Rate the quality of the copy.', user_message_template='Deal text:\n{deal_text}\n\nRate the quality of the marketing copy.', assistant_prefix='')

In [15]:
pt

PromptTemplate(id=None, name='sandbox_temp', dimension_name='holistic', description='Auto-generated for sandbox use.', constrained_output=[''], tags=['sandbox'], system_message='\nYou will be given a deal text aka marketing copy. Rate the quality of the copy. \n', user_message_template='Deal text:\n{deal_text}\n\nRate the quality of the marketing copy.\n', assistant_prefix='RATING: ')

In [11]:
a = PromptSuite.from_list([pt, pt])

In [14]:
a.dimensions

['holistic']