# This Notebook is for Execution of PromptU "the Prompt Tuner" without using Streamlit

Imports

In [None]:
from pathlib import Path
from utils.docs2data import read_docs_to_dataframe
import json
import dspy
from dspy.evaluate import Evaluate
import pandas as pd

from prompt_optimization.signatures import init_signatures
from prompt_optimization.scan_report import ScanReport
from prompt_optimization.main import optimize
from prompt_optimization.output_postprocessing import aggregate_per_category, flatten_dict, dict_to_df
# from prompt_optimization.signatures import SummaryReasoning
from utils.validation import substring_metric, lm_metric, substring_metric

Set configs for prompt optimization

In [None]:
type_of_documents = "business report" #singular
categories = ['FX_HEDGING', 'COMMODITIES_HEDGING', 'INTEREST_RATE_HEDGING', 'CREDIT', 'INSURANCE', 'FACTORING', 'PENSIONS', 'ESG', 'CASH_MANAGEMENT', 'DEPOSITS', 'ASSET_MANAGEMENT', 'OTHER']
class_of_categories = "banking products" #plural
objective = "extracting company specific information that indicate sales opportunities for products relating to capital market or asset management" # present progressive
metric = 'simple' #'llm'
optimizer = 'combined' # 'bootstrap-few-shot'
number_of_items_in_output = 5

Call PomptU model creation and optimization

In [None]:
model_path, model_number = optimize(type_of_documents, categories, class_of_categories, objective, metric, optimizer)

In [None]:
# sys.path.append(os.path.dirname('/home/cdsw/CB2MUHX/project-weeks-promptu/models'))
sys.path.append(os.path.dirname('/home/cdsw/CB2LOI5/project-weeks-promptu/models'))

In [None]:
# intialize LLM, when optimizer is not used
from module.azure_openai import AzureOpenAI

# Load environment variables from .ini file
from configparser import ConfigParser
config_object = ConfigParser()
config_object.read("config.ini")
tud_api_key = config_object["TUD_API_KEY"]['tud_api_key']
dev_api_key = config_object["DEV_API_KEY"]['dev_api_key']

lm_gpt = AzureOpenAI(
    tud_dev = "TUD",
    api_version = '2024-06-01', #'2024-06-01',#'2023-07-01-preview',
    model_name = "gpt-4o", 
    api_key = tud_api_key,
    model_type = "chat"
)

dspy.settings.configure(lm=lm_gpt)

In [None]:
# create df from txt reports
folder_path = Path('data/reports/train_annotated/')
df = read_docs_to_dataframe(folder_path)
    # df = df[df['quote'].apply(len) > 0]
    # Remove quotation marks
df['context'] = df['context'].str.replace(r'["]', '', regex=True)


df.to_excel('train_data.xlsx', index=False)

training_examples = json.loads(df[["context","answer"]].to_json(orient="records"))
train_dspy = [dspy.Example(x).with_inputs('context') for x in training_examples]

hint, ChunkerSignature, PredictRelevance, PredictCategory, Translator = init_signatures(type_of_documents, number_of_items_in_output, objective, class_of_categories, categories)
loaded_program = ScanReport(hint, ChunkerSignature, PredictRelevance, PredictCategory, Translator)

# loaded_program = ScanReport()
loaded_program.load(path='models/0124534897.json')


Display answer and make evaluation

In [None]:
fullana = loaded_program(train_dspy[8].context)
fullana.answer

In [None]:
# Set up the evaluator, which can be re-used in your code.
evaluator = Evaluate(devset=train_dspy[0:13], num_threads=1, display_progress=True, display_table=4)

# Launch evaluation.
evaluator(loaded_program, metric=substring_metric, return_all_scores=True)

Postprocessing

In [None]:
result_dict = aggregate_per_category(fullana.answer, categories)

In [None]:
result_dict

Archived Code

In [None]:
# class SummaryReasoning(dspy.Signature):
#     __doc__ = f"""Given a resoning text, summarize and make a meningful conclusion. Do not include the context in the output. Remove introductions and comments."""
#     context = dspy.InputField()
#     output = dspy.OutputField(desc="German")
    
# # function to aggregate results per category
# def aggregate_per_category(result_list: list, categories) -> dict:
#     """This function aggregates the output results per category. For the relevance score the maximum is taken.
#     input:
#     df : pd.DataFrame
#         Dictionary in json format
#     categories
#         a list of the categories specified by the user
#     output:
#     df_agg:
#         pd.DataFrame with the aggregated results per category
#     """

#     result_dict = flatten_dict(result_list) # flatten
#     df = dict_to_df(result_dict) # convert to df
#     categories = df.categories.unique
#     print(categories)

#     # initialize df_agg
#     df_agg=pd.DataFrame(columns = df.columns)

#     # Loop through categpries
#     for item in categories:
#         print(item)
#         matched = False
#         quote = ""
#         relevance_score = 0
#         reasoning_categories = ""
#         reasoning_relevance = ""
#         n=0
#         for i in range(len(df)):
#             print(df.loc[i].categories)
#             # add results if there are some matching
#             if item in df.loc[i].categories:
#                 print(item)
#                 n += 1
#                 matched = True
#                 relevance_score = max(relevance_score, int(df.loc[i].relevance_score))
#                 quote = f"{quote}# Quote {n}: {str(df.loc[i].quote)}"
#                 reasoning_categories = df.loc[i].reasoning_categories
#                 reasoning_relevance = df.loc[i].reasoning_relevance

#         if matched:
#             new_row = {'quote': quote, 'relevance_score': relevance_score, 'categories': item, 'reasoning_categories': reasoning_categories, 'reasoning_relevance': reasoning_relevance}
#             print('New row:', new_row)
#             df_agg = pd.concat([df_agg, pd.DataFrame([new_row])], ignore_index=True)
    
#     print('DF AGG', df_agg)

#     # sort values descending by relevance_score
#     df_agg = df_agg.sort_values(by=['relevance_score'], ascending=False)

#     print('DF AGG', df_agg)

#     summary_reasoning_categories = []
#     summary_reasoning_relevance = []

#     summarize_reasoning = dspy.Predict(SummaryReasoning)
#     for i in range(len(df_agg)):
#         summary_reasoning_categories.append(extract_output(summarize_reasoning(context = df_agg.loc[i]['reasoning_categories']).output))
#         summary_reasoning_relevance.append(extract_output(summarize_reasoning(context = df_agg.loc[i]['reasoning_relevance']).output))
    
#     df_agg['reasoning_categories'] = summary_reasoning_categories
#     df_agg['reasoning_relevance'] = summary_reasoning_relevance

#     print('DF AGG', df_agg)

#     print('df t', df_agg.transpose())
    
#     dict_agg = df_agg.transpose().to_dict() # convert to dict
#     print(dict_agg)
    
#     return dict_agg

In [None]:
# summary_categories_res = {}

# summarize_reasoning = dspy.Preduct(SummaryReasoning)
# for i in len(result_dict):
#         summary_categories_res[i] = summarize_reasoning(result_dict[i].reasoning_categories)