In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
OPENAI_MODEL="gpt-4o-2024-08-06"

In [3]:
import os
import json

# Define the directory containing the JSON files
directory = 'output'

# Initialize an empty list to store the dictionaries
json_list = []

# Iterate over each file in the directory
for filename in os.listdir(directory):
    if filename.endswith('.json'):  # Check if the file is a JSON file
        filepath = os.path.join(directory, filename)
        with open(filepath, 'r') as json_file:
            data = json.load(json_file)  # Load the JSON file into a dict
            json_list.append(data)  # Append the dict to the list

# Now json_list contains all the JSON files as dicts


In [4]:
database = [
    {
        "company_name": x["company_name"],
        "data_points": x["data_points"],
        "company_role_assignments": x["company_role_assignments"] if "company_role_assignments" in x else None
    }
    for x in json_list
]

In [5]:
from pydantic import BaseModel

class NumberResponse(BaseModel):
    answer: float

class NameResponse(BaseModel):
    person_name: str

class BooleanResponse(BaseModel):
    answer: bool

def ask_question(question, schema):
    system_prompt = ("You are an assistant with the task of answering QUESTIONS based on a KNOWLEDGE DATABASE.")                     
    prompt = ("QUESTION\n\n"
              f"{question}\n\n"
              "KNOWLEDGE DATABASE\n\n"
              f"{json.dumps(database)}")
    
    
    from openai import OpenAI
    client = OpenAI()
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
      ]
    
    match schema:
        case "number": 
            response_format = NumberResponse
            value_extractor = lambda x: x.answer
        case "name": 
            response_format = NameResponse
            value_extractor = lambda x: x.person_name
        case "boolean": 
            response_format = BooleanResponse
            value_extractor = lambda x: x.answer
        case _: raise f"unknown schema {schema}"
    
    response = client.beta.chat.completions.parse(
      model=OPENAI_MODEL,
      messages=messages,
      response_format=response_format
    )
    
    return value_extractor(response.choices[0].message.parsed)

In [6]:
results = []

with open('samples/questions.json', 'r') as json_file:
    items = json.load(json_file)
    for item in items:
        question = item["question"]
        print(question)
        answer = ask_question(question, item["schema"])
        print(answer)
        results.append({
            "question": question,
            "schema": item["schema"],
            "answer": answer
        })

with open('results.json', 'w') as json_file:
    json.dump(results, json_file, indent=4)