### Building Prompts Example

In [1]:
import re
import json
import text_processing
import secrets 

from typing import List, Dict
import openai  # Assuming OpenAI API, but adaptable to other models

# Load text file and taxonomy JSON

taxonomy_file_path = 'privacy_ontology_simple.json'  # Replace with actual path
example_file_path = 'annotations/Actual_Budget/Accounts_&_Transactions.txt'
target_file_path = 'input/lh-ehr/Direct_Messaging_README.txt'

# Load previously processed example files
example_data = text_processing.process_input(example_file_path)

In [2]:

import prompt_templates

# Load the privacy ontology
ontology_path = "privacy_ontology_simple.json"
privacy_ontology = prompt_templates.load_privacy_ontology(ontology_path)

# Load one processed example file for demonstration
example_file = text_processing.process_input(example_file_path)[0]


# Define target text to be annotated (for demonstration) & Load annotated version 
target_file_annotations = text_processing.process_input(target_file_path)
new_text_to_annotate = open(target_file_path).read()

# Generate the prompt
prompt_example = prompt_templates.create_annotation_prompt(example_file, new_text_to_annotate, privacy_ontology)

# Print the prompt to review it
print(prompt_example)
print(f"\nToken Count: {prompt_templates.count_tokens(prompt_example)} tokens")


You are a privacy expert annotator tasked with annotating text files with metadata about privacy behaviors and stories. For the given text, annotate the following:

1. Actions: Actions performed or expected in the text.
2. Data Types: Types of data referenced in the text. Data types may include specific subcategories.
3. Purposes: Intentions or purposes related to the actions and data types.
4. Stories: Concise stories that describe how actions, data types, and purposes interact in context.

After providing your annotations, explain your rationale for these annotations. Place <R> tag between your annotations and your rationale.

Use only the categories listed below when annotating:

Actions:
Collect, Use, Share

Data Types:
Contact Data:
  Phone Number:
  Email address:
  User ID:
  Job Title:
  Company:
  Address:
  Name:
  Date of Birth:
  Image:
  Government ID:
  Biographical Data:
    CV:
    Education:
    Employment:
Health Data:
  Physical activity:
Social Media:
Location:
  Ap

### Load Prompt template dict for all files

In [2]:

import text_processing

# Create the prompt templates dictionary
prompt_templates_dict = text_processing.create_prompt_templates_dict()



Error processing input\Actual_Budget\Backup_&_Restore.txt: 'charmap' codec can't decode byte 0x9d in position 232: character maps to <undefined>


### Annotating with LLMs

In [3]:
import os
import csv
import json
from getpass import getpass
import model_routing
from typing import Dict, List

'''
# Store in dictionary
                prompt_templates_dict[relative_key] = {
                    'input_file_path': input_file_path,
                    'annotation_file_path': most_similar_annotation,
                    'prompt_template': prompt_template,
                    'target_annotations': text_processing.process_file(annotation_file_path),
                    'token_count': prompt_templates.count_tokens(prompt_template)
                }
'''

# os.environ['OPENAI_API_KEY'] = getpass('Enter your OPENAI API key: ')
os.environ['GROQ_API_KEY'] = getpass('Enter your Groq API key: ')

# Example usage remains the same
output_csv = 'Annotated_Outputs/LLMAnnotation_grokLLama3.csv'
models = [
    # 'openai:gpt-4o-2024-11-20'
    'groq:llama-3.3-70b-versatile'
    ]
model_routing.run_multi_file_annotations(prompt_templates_dict, output_csv, models=models)

  from .autonotebook import tqdm as notebook_tqdm


Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-versatile - Completed a run
Model: groq:llama-3.3-70b-ve