### Building Prompts Example

In [8]:
import re
import json
from typing import List, Dict

import text_processing
import prompt_templates


# Load text file and taxonomy JSON
example_file_path = 'annotations/Actual_Budget/Accounts_&_Transactions.txt'
target_file_path = 'input/lh-ehr/Direct_Messaging_README.txt'

# Load previously processed example files
example_data = text_processing.process_input(example_file_path)

# Load the privacy ontology
ontology_path = "privacy_ontology.json"
privacy_ontology = prompt_templates.load_privacy_ontology(ontology_path)

# Load one processed example file for demonstration
example_file = text_processing.process_input(example_file_path)[0]


# Define target text to be annotated (for demonstration) & Load annotated version 
target_file_annotations = text_processing.process_input(target_file_path)
new_text_to_annotate = open(target_file_path).read()

# Generate the prompt
prompt_example = prompt_templates.create_annotation_prompt(example_file, new_text_to_annotate, privacy_ontology)

# Print the prompt to review it
print(prompt_example)
print(f"\nToken Count: {prompt_templates.count_tokens(prompt_example)} tokens")


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "c:\Users\Baldw\AppData\Local\Programs\Python\Python311\Lib\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\Baldw\AppData\Local\Temp\ipykernel_34696\784702796.py", line 14, in <module>
    example_data = text_processing.process_input(example_file_path)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Baldw\Desktop\UMO\PERC\privacy_stories_1.1\text_processing.py", line 55, in process_input
ValueError: Invalid path. Please provide a valid file or directory path.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\Baldw\AppData\Local\Programs\Python\Python311\Lib\site-packages\IPython\core\interactiveshell.py", line 2105, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\

### Create and load prompt template dict for all files

In [None]:

import os
import json
import text_processing

prompts_data_json_file = 'data/annotations/prompts_data.json'

if not os.path.exists(prompts_data_json_file):
    prompt_templates_dict = text_processing.create_prompts_data_json(annotations_dir='data/annotations', ontology_path='privacy_ontology.json', output_json=prompts_data_json_file)
else:
    print(f"Using existing prompts data JSON: {prompts_data_json_file}")
    with open(prompts_data_json_file, 'r', encoding='utf-8') as f:
         prompt_templates_dict = json.load(f)


Prompts data JSON saved to data/annotations/prompts_data.json


### Annotating with LLMs

In [2]:

from getpass import getpass
import model_routing

# Set API keys securely.
os.environ['OPENAI_API_KEY'] = getpass('Enter your OPENAI API key: ')
os.environ['GROQ_API_KEY'] = getpass('Enter your Groq API key: ')

output_csv = [
    'LLMAnnotation_4o.csv',
    'LLMAnnotation_qwen-2.5-32b.csv',
    'LLMAnnotation_qwen-2.5-32b-R1.csv',
    'LLMAnnotation_llama-3.3-70b.csv',
]

output_csv = 'LLMAnnotation_4o.csv'
models = [
    'openai:chatgpt-4o-latest',
    # 'groq:qwen-2.5-32b',
    # 'groq:deepseek-r1-distill-qwen-32b',
    # 'groq:llama-3.3-70b-versatile',
]

model_routing.run_multi_file_annotations(prompt_templates_dict, output_csv, models=models, num_runs=1)

output_csv = 'LLMAnnotation_qwen-2.5-32b-R1.csv'
models = [
    # 'openai:chatgpt-4o-latest',
    # 'groq:qwen-2.5-32b',
    'groq:deepseek-r1-distill-qwen-32b',
    # 'groq:llama-3.3-70b-versatile',
]

model_routing.run_multi_file_annotations(prompt_templates_dict, output_csv, models=models, num_runs=1)


output_csv = 'LLMAnnotation_qwen-2.5-32b.csv'
models = [
    # 'openai:chatgpt-4o-latest',
    'groq:qwen-2.5-32b',
    # 'groq:deepseek-r1-distill-qwen-32b',
    # 'groq:llama-3.3-70b-versatile',
]

model_routing.run_multi_file_annotations(prompt_templates_dict, output_csv, models=models, num_runs=1)


output_csv = 'LLMAnnotation_llama-3.3-70b.csv'
models = [
    # 'openai:chatgpt-4o-latest',
    # 'groq:qwen-2.5-32b',
    # 'groq:deepseek-r1-distill-qwen-32b',
    'groq:llama-3.3-70b-versatile',
]

model_routing.run_multi_file_annotations(prompt_templates_dict, output_csv, models=models, num_runs=1)






  from .autonotebook import tqdm as notebook_tqdm


Model: openai:chatgpt-4o-latest - Completed a run for prompt starting with: Your mission is to annotate so...
Processed file: Actual_Budget\Accounts_&_Transactions.txt
Model: openai:chatgpt-4o-latest - Completed a run for prompt starting with: Your mission is to annotate so...
Processed file: Actual_Budget\Backup_&_Restore.txt
Model: openai:chatgpt-4o-latest - Completed a run for prompt starting with: Your mission is to annotate so...
Processed file: Akaunting\CreatingAnewAccount.txt
Model: openai:chatgpt-4o-latest - Completed a run for prompt starting with: Your mission is to annotate so...
Processed file: Attendize\features.txt
Model: openai:chatgpt-4o-latest - Completed a run for prompt starting with: Your mission is to annotate so...
Processed file: Mattermost_Architecture\Navigation.txt
Model: openai:chatgpt-4o-latest - Completed a run for prompt starting with: Your mission is to annotate so...
Processed file: Monica\README.txt
Model: openai:chatgpt-4o-latest - Completed a run for