In [None]:
import requests
import json
import time

# Parameters
seed_professions = [
    "Medical Doctor",
    "Software Engineer",
    "Civil Engineer",
    "Business Analyst",
    "Artist",
    "Scientist",
    "Teacher",
    "Lawyer",
    "Entrepreneur",
    "Mechanical Engineer"
]
LLM_API_URL = "http://localhost:11434/api/generate"
max_queries = 1500  # limit of total queries to perform
retry_limit = 3  # number of times to retry in case of an error
visited_professions = set()  # set of visited professions to avoid duplicates
all_responses = [] 

def query_llm(profession, retries=retry_limit):
    prompt = f'Give me a list of related professions to "{profession}". Try to form a list of 5-10. Respond in JSON like: {{"profession": "{profession}", "related_professions": ["profession_1", "profession_2"]}} Only respond with the JSON and nothing else. Be specific with the job title: instead of "producer", use "music producer".'
    data = {
        "model": "llama3.2:3b",
        "prompt": prompt,
        "stream": False
    }
    
    for attempt in range(retries):
        try:
            response = requests.post(LLM_API_URL, json=data)
            response.raise_for_status() 
            response_json = response.json()
            llm_response = json.loads(response_json["response"])
            
            if isinstance(llm_response, dict) and "related_professions" in llm_response:
                return llm_response

        except (requests.RequestException, json.JSONDecodeError, KeyError, TypeError) as e:
            print(f"Attempt {attempt + 1} failed for profession: {profession} with error: {e}")

    # Return None if all retries fail
    print(f"Failed to get a valid response for {profession} after {retries} attempts.")
    return None

# Sanitize related professions to ensure all are strings
def sanitize_related_professions(related_professions):
    # Convert to string if not already, and filter out non-string entries
    return [str(prof) for prof in related_professions if isinstance(prof, (str, int, float))]

professions_to_query = seed_professions.copy()
query_count = 0

# Main loop to query professions
while professions_to_query and query_count < max_queries:
    profession = professions_to_query.pop(0)

    # Skip if the profession has already been visited
    if profession in visited_professions:
        continue

    try:
        print(f"Querying related professions for: {profession}")
        response = query_llm(profession)

        # if valid response, process it
        if response:
            all_responses.append(response)  
            visited_professions.add(profession) 
            query_count += 1

            # Add related professions to the queue if not visited before
            related_professions = sanitize_related_professions(response.get("related_professions", []))
            for related_profession in related_professions:
                if related_profession not in visited_professions:
                    professions_to_query.append(related_profession)
    
    except Exception as e:
        print(f"An error occurred while processing {profession}: {e}")

# Save responses to a JSON file
with open("related_professions.json", "w") as file:
    json.dump(all_responses, file, indent=4)

print("Completed querying. JSON responses saved to 'related_professions.json'.")
