In [None]:
import pandas as pd
from openai import OpenAI
import os
from dotenv import find_dotenv, load_dotenv

In [None]:
find_dotenv()
load_dotenv()

oai_key = os.getenv("OPENAI_API_KEY")
openai_client = OpenAI(api_key=oai_key)

In [None]:
tsv_path = 'cti-taa-responses.tsv'
model = 'gpt-4.1'
column_name = 'gpt4o-mini'

In [None]:
df = pd.read_csv(tsv_path, sep='\t')  # First row is header
if column_name not in df.columns:
    raise ValueError(f"Column '{model}' not found in file.")

texts = df[column_name].dropna().tolist()

threat_actors = []
prompt = (
    "Extract the threat actor from the following summary. Only return the APT identifier if present, else return the actor name. "
    "Return only the actor name(s), or 'X' if not applicable or the attribution is unclear.\n\n"
)

print(texts)


In [None]:

for text in texts:
    response = openai_client.chat.completions.create(
        model=model,
        messages=[{"role": "system", "content": prompt}, {"role": "user", "content": text}],
        temperature=0,
    )
    actor = response.choices[0].message.content.strip()
    threat_actors.append(actor)


In [None]:
threat_actors

In [None]:
if len(df[column_name]) == len(threat_actors):
    df[column_name] = threat_actors
else:
    raise ValueError("Length of threat actors list does not match the original column length.")

In [None]:
df.to_csv(tsv_path, sep='\t', index=False)