In [18]:
import email
from email.parser import Parser
from llm import run_prompt
import os
import json

with open('e1.txt', 'r') as f:
    email_text = f.read()

def clean_email(email_text):
    # Parse the email content
    msg = email.message_from_string(email_text)
    parsed = f"Subject: {msg['Subject']}\nFrom: {msg['From']}\nTo: {msg['To']}\nDate: {msg['Date']}\n{msg.get_payload()}"
    return parsed

def load_sample_dir(directory_path):
    """Reads all files in a specified directory and returns their contents in a sorted 2-tuple list.

    Args:
    directory_path (str): The path to the directory containing the files to read.

    Returns:
    list of tuples: A list of 2-tuples where each tuple contains a file name and its content, sorted alphabetically by file name.
    """
    files_content = []
    # Loop through each file in the directory
    for file_name in os.listdir(directory_path):
        file_path = os.path.join(directory_path, file_name)
        # Check if it is a file and not a directory
        if os.path.isfile(file_path):
            with open(file_path, 'r', encoding='utf-8') as file:
                files_content.append((file_name, file.read()))

    # Sort the list by file name
    files_content.sort(key=lambda x: x[0])

    return files_content



def prompt_label(email_text):
    prompt = f"""
1. Urgent (Immediate Action Required)
    - Explicit mentions of immediate action, crisis, emergency, or significant negative consequences if not addressed promptly.
    - Phrases implying critical impact on operations, safety, or essential functions. Imminent deadlines within hours or very short terms are mentioned.
    - Indicates that a response or action is required as soon as possible, ideally within the same working day.

2. High (Blockers, ASAP/Same-day Response)
    - Discussions of obstacles or issues impacting ongoing projects or operations that require a timely decision or input.
    - Mentions of approaching deadlines, time-sensitive decisions, or fast-approaching meetings where input or action is crucial.
    - Suggests a need for a response or action by the end of the day or within 24 hours.

3. Medium (Timely Response Required)
    - Routine business inquiries, updates on ongoing projects, requests for information not indicated as urgent.
    - Standard office communications, coordination emails, or follow-ups on previous discussions.
    - No explicit or implied rush, suggesting a response is expected within a reasonable but not immediate timeframe (2-3 days).

4. Low (Non-Urgent/Casual)
    - General information, newsletters, non-urgent updates, casual check-ins, or social invitations related to work.
    - Language indicating that the content is informative or optional rather than necessary for immediate business processes.
    - No immediate response required or expected, can be attended to as per convenience.

5. None (Promotional)
    - Promotional material, advertisements, unsolicited sales pitches, or clearly non-personalized mass emails.
    - Lack of personalization, generic marketing language, offers, or invitations not directly related to immediate work needs.
    - Generally does not require a response or attention unless of specific personal or professional interest.

Classify the priority of the following emails using the above spec.
Format your response as the following:

{{
    "priority": [1 - 5],
    "justification": [1 or 2 sentences of justification]

}}
Do not output any additional text.

```
{email_text}
```
"""
    response = run_prompt(prompt)
    try: 
        label = json.loads(response)
    except:
        label = {'priority': -1, 'justification': 'error'}

    return label


def label_all(sample_dir, output_file):
    for id, content in load_sample_dir(sample_dir):
        label = prompt_label(content)

        with open(output_file, 'a') as f:
            f.write(f"{id}, {label['priority']}, \"{label['justification']}\"\n")
        
        print(f"Email {id} labeled as {label['priority']}")
        # print(f"Email {id}")
        # print(prompt_label(content))


In [20]:
label_all('sample_maildir', 'labels.csv')

Prompting with: *****
        
        1. Urgent (Immediate Action Required)
            - Explicit mentions of immediate action, crisis, emergency, or significant negative consequences if not addressed promptly.
            - Phrases implying critical impact on operations, safety, or essential functions. Imminent deadlines within hours or very short terms are mentioned.
            - Indicates that a response or action is required as soon as possible, ideally within the same working day.
        
        2. High (Blockers, ASAP/Same-day Response)
            - Discussions of obstacles or issues impacting ongoing projects or operations that require a timely decision or input.
            - Mentions of approaching deadlines, time-sensitive decisions, or fast-approaching meetings where input or action is crucial.
            - Suggests a need for a response or action by the end of the day or within 24 hours.
        
        3. Medium (Timely Response Required)
            - Routine busi

InvalidRequestError: This model's maximum context length is 8193 tokens. However, your messages resulted in 9707 tokens. Please reduce the length of the messages.

In [11]:
e1 = clean_email(email_text)
prompt_label(e1)

Prompting with: *****
        
        1. Urgent (Immediate Action Required)
            - Explicit mentions of immediate action, crisis, emergency, or significant negative consequences if not addressed promptly.
            - Phrases implying critical impact on operations, safety, or essential functions. Imminent deadlines within hours or very short terms are mentioned.
            - Indicates that a response or action is required as soon as possible, ideally within the same working day.
        
        2. High (Blockers, ASAP/Same-day Response)
            - Discussions of obstacles or issues impacting ongoing projects or operations that require a timely decision or input.
            - Mentions of approaching deadlines, time-sensitive decisions, or fast-approaching meetings where input or action is crucial.
            - Suggests a need for a response or action by the end of the day or within 24 hours.
        
        3. Medium (Timely Response Required)
            - Routine busi

{'priority': 4,
 'justification': 'This email is a casual message with no immediate response required or expected.'}

('bass-e_discussion_threads_455',
 "Subject: \nFrom: eric.bass@enron.com\nTo: timothy.blanchard@enron.com, matthew.lenhart@enron.com\nDate: Thu, 12 Oct 2000 04:25:00 -0700 (PDT)\ncheck it out\n---------------------- Forwarded by Eric Bass/HOU/ECT on 10/12/2000 11:24 AM \n---------------------------\n\n\nBrian Hoskins@ENRON COMMUNICATIONS\n10/12/2000 11:04 AM\nTo: Eric Bass/HOU/ECT@ECT, Luis Mena/NA/Enron@Enron\ncc:  \nSubject: \n\nI'm going to be out of town for this, but I thought you might want to \nparticipate.\n\nhttp://downoutup.com/TOURNAMENT.htm\n\n\n\nBrian T. Hoskins\nEnron Broadband Services\n713-853-0380 (office)\n713-412-3667 (mobile)\n713-646-5745 (fax)\nBrian_Hoskins@enron.net\n\n\n")