In [1]:
import os
import re
from openai import OpenAI
import ast
import json
import pandas as pd
import random
import numpy as np
import time


# OPEN_API_KEY = "xxx"


client = OpenAI(
    api_key=OPEN_API_KEY
)

def send_openai_messages(messages, model="gpt-4o"):
    response = client.chat.completions.create(
        model= model, 
        messages=messages,
    )

    resp =response.choices[0].message.content
    return resp

def send_openai_prompt(prompt, model="gpt-4o"):
    response = client.chat.completions.create(
        model= model, 
        messages=[{"role":"user", "content": prompt}],
    )

    resp =response.choices[0].message.content
    return resp

# Basic interest

In [2]:
df_basic_interest = pd.read_excel("./data/CABIN File.xlsx")
interests = list(df_basic_interest.columns)[1:]

In [3]:
def get_prompt_7(occupation, description, tasks, basic_interest, definition, questionnaire_items):
    return f"""Your job is to rate how well the basic interest “{basic_interest}” aligns with work performed by people in the occupation “{occupation}”. 

Use the following information to inform this rating.

This is a description of the occupation “{occupation}”: “{description}”. The most important tasks for the occupation “{occupation}” are: “{tasks}”.

This is the definition of the basic interest “{basic_interest}”: “{definition}”. Here are some questionnaire items used to assess the basic interest “{basic_interest}”: “{questionnaire_items}”.

Using the description and tasks associated with “{occupation}”, and the definition and questionnaire items associated with “{basic_interest}”, ask yourself: “How descriptive is the basic interest “{basic_interest}” of the work performed by the occupation “{occupation}”?”.

Rate how well the basic interest “{basic_interest}” aligns with the occupation “{occupation}” using the following 7-point scale.",
    "1 - Not at all descriptive;”
    "2 - Slightly descriptive;",
    "3 - Somewhat descriptive;",
    "4 - Moderately descriptive;",
    "5 - Quite descriptive;",
    "6 - Highly descriptive;",
    "7 - Extremely descriptive.”

Return your response with only the number.
"""

In [6]:
def main(input_file_name, output_file_name, model):
    # 1.read input file
    df_occupation = pd.read_excel(f"./data/{input_file_name}")


    # Round 1
    print("------------------------------------- Round one -------------------------------------")
    # 2.Create empty df to hold final data
    columns = ['Occupation'] + interests
    occupations = list(df_occupation[["Title"]]["Title"])
    data = {'Occupation': occupations}
    for column in columns[1:]:
        data[column] = [None] * len(occupations)
    df = pd.DataFrame(data)


    # 3.making request to LLM
    for i in range(len(df_occupation)):
        occupation = df_occupation.loc[i, "Title"]
        description = df_occupation.loc[i, "Description"]
        tasks = df_occupation.loc[i, "Task"]
    
        print(occupation, end="  : ")
        # for each of CABIN type
        for j in range(len(interests)):
            basic_interest = interests[j]
            definition = df_basic_interest.loc[0, basic_interest]
            questionnaire_items = df_basic_interest.loc[1, basic_interest]
    
            prompt = get_prompt_7(occupation, description, tasks, basic_interest, definition, questionnaire_items)
    
            resp = send_openai_prompt(prompt, model=model)
            
            df.loc[df['Occupation'] == occupation, basic_interest] = resp
            print(".", end="")
        print()    
    df.to_excel(f"./results/{output_file_name}_{model}_round_1.xlsx")

    
    # Round 2
    print("------------------------------------- Round two -------------------------------------")
    # 2.Create empty df to hold final data
    columns = ['Occupation'] + interests
    occupations = list(df_occupation[["Title"]]["Title"])
    data = {'Occupation': occupations}
    for column in columns[1:]:
        data[column] = [None] * len(occupations)
    df = pd.DataFrame(data)


    # 3.making request to LLM
    for i in range(len(df_occupation)):
        occupation = df_occupation.loc[i, "Title"]
        description = df_occupation.loc[i, "Description"]
        tasks = df_occupation.loc[i, "Task"]
    
        print(occupation, end="  : ")
        # for each of CABIN type
        for j in range(len(interests)):
            basic_interest = interests[j]
            definition = df_basic_interest.loc[0, basic_interest]
            questionnaire_items = df_basic_interest.loc[1, basic_interest]
    
            prompt = get_prompt_7(occupation, description, tasks, basic_interest, definition, questionnaire_items)
    
            resp = send_openai_prompt(prompt, model=model)
            
            df.loc[df['Occupation'] == occupation, basic_interest] = resp
            print(".", end="")
        print()    
    df.to_excel(f"./results/{output_file_name}_{model}_round_2.xlsx")

# 16 Occupations 6 Tasks

In [None]:
main("16 Occupations 6 tasks.xlsx", "6_stasks", "gpt-4o")

# 16 Occupations 10 tasks

In [None]:
main("16 Occupations 10 tasks.xlsx", "10_tasks", "gpt-4o")

# 16 Occupations all core tasks

In [None]:
main("16 Occupations all core tasks.xlsx", "all_tasks", "gpt-4o")