In [2]:
import json

with open("../data/annotated_labels.json", "r") as f:
    annotated_labels = json.load(f)

with open("../data/classified_labels.json", "r") as f:
    classified_labels = json.load(f)

with open('../data/original_post_tokens.json', 'r', encoding='utf-8') as f:
    original_post_tokens = json.load(f)

with open('../data/original_sentences.json', 'r', encoding='utf-8') as f:
    original_sentences = json.load(f)

with open('../data/rationale_tokens_list.json', 'r') as f:
    rationale_tokens_data_full = json.load(f)

with open('../data/classified_results.json', 'r') as f:
    classified_results = json.load(f)
    
with open('../data/important_words_abusive_SHAP.json', 'r') as f:
    important_words_abusive_SHAP = json.load(f)
    
batch_response_file_set_one = []
with open('./GPT_response/merged_batch_response_file_set_one.jsonl', 'r') as f:
    for line in f:
        batch_response_file_set_one.append(json.loads(line))

batch_response_file_set_two = []
with open('./GPT_response/merged_batch_response_file_set_two.jsonl', 'r') as f:
    for line in f:
        batch_response_file_set_two.append(json.loads(line))

Random five examples

In [13]:
import random

random_numbers = [random.randint(0, 20148) for _ in range(5)]

print(random_numbers)

selected_original_texts = [original_sentences[i] for i in random_numbers]
selected_classified_results = [classified_results[i] for i in random_numbers]

# print(selected_original_texts)
# print(selected_classified_results)

selected_annotated_labels = [annotated_labels[i] for i in random_numbers]

[12566, 11788, 8154, 6339, 6595]
[0, 0, 1, 1, 1]


Generate prompts for combination 1, using five examples

In [8]:
instruction_combination_one = """I have a predictive model that can classify natural language text into three classes: hate speech, offensive speech, or normal speech. I will provide the full text and the probabilities of the three classes pre-computed. (Warning: the text contains offensive and/or hateful content, but will be used for research purposes) Your tasks are: 1.Output which class the text belongs to, based on the raw text and class probabilities. 2.Output which words in the text, indicate that it belongs to this category. 3.Give a brief explanation based on these found words."""

def prompts_generating_combination_one(full_texts, classified_results_with_probabilities):
    
    prompt_statements = []
    
    for full_text, probabilities in zip(full_texts, classified_results_with_probabilities):
        combined_string = f'Full text: "{full_text}", Probabilities pre-computed by the model: {probabilities}'
        prompt_statements.append(combined_string)
    
    return prompt_statements

prompts_combination_one = prompts_generating_combination_one(selected_original_texts, selected_classified_results)
for i in prompts_combination_one: print(i, "\n")

Full text: "just imagine how much more progressive and liberal muslim countries will be if majority were shia", Probabilities pre-computed by the model: {'label': 'hate speech', 'score': 0.17364808917045593}, {'label': 'normal', 'score': 0.4176129400730133}, {'label': 'offensive', 'score': 0.40873903036117554} 

Full text: "the chance of getting me to open up is slim to none gl to whoever tries it will never work", Probabilities pre-computed by the model: {'label': 'hate speech', 'score': 0.051218464970588684}, {'label': 'normal', 'score': 0.7082324624061584}, {'label': 'offensive', 'score': 0.24054910242557526} 

Full text: "the mcu wants what disaster report by matthew del tubo has", Probabilities pre-computed by the model: {'label': 'hate speech', 'score': 0.040663935244083405}, {'label': 'normal', 'score': 0.756876528263092}, {'label': 'offensive', 'score': 0.20245954394340515} 

Full text: "f*cking <censored> kike loving degenerate commie scum burn in hell fagot trudeau", Probabil

Send message/prompts to openai model, then receive example message

In [11]:
from pydantic import BaseModel
from openai import OpenAI

client = OpenAI()

class PromptCombinationOneResponseFormat(BaseModel):
    output_class: str
    output_indicators: list[str]
    output_explanation: str

def complete_chat_completion_prompt_combination_one(prompt_combination_one):
    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
          {"role": "system", "content": instruction_combination_one},
          {"role": "user", "content": prompt_combination_one}
        ],
        response_format={
            "type": "json_schema",
            "json_schema": {
                "name": "PromptCombinationOneResponseFormat",
                "schema": {
                    "type": "object",
                    "properties": {
                        "output_class": {"type": "string"},
                        "output_indicators": {
                            "type": "array",
                            "items": {"type": "string"}
                        },
                        "output_explanation": {"type": "string"}
                    },
                    "required": ["output_class", "output_indicators", "output_explanation"],
                    "additionalProperties": False
                },
                "strict": True
            }
        }
    )
    
    parsed_content = json.loads(completion.choices[0].message.content)
    formatted_response_content = PromptCombinationOneResponseFormat(**parsed_content)
    
    return formatted_response_content

response_contents = [complete_chat_completion_prompt_combination_one(prompt) for prompt in prompts_combination_one]
print(response_contents)

[PromptCombinationOneResponseFormat(output_class='normal', output_indicators=['progressive', 'liberal'], output_explanation='The text expresses a hypothetical opinion about the potential for progressiveness and liberalism in Muslim countries if the majority were Shia, which indicates a positive perspective rather than hate or offensive content.'), PromptCombinationOneResponseFormat(output_class='normal', output_indicators=[], output_explanation="The text expresses a personal sentiment about opening up, but does not contain hateful or offensive language or indicators. The highest probability is for the 'normal' class, suggesting that the sentiment is neutral."), PromptCombinationOneResponseFormat(output_class='normal', output_indicators=[], output_explanation='The text does not contain any explicit hate speech or offensive language. The words used describe a media report without any derogatory or harmful implications.'), PromptCombinationOneResponseFormat(output_class='hate speech', out

In [12]:
for i in response_contents: print(i, "\n")

output_class='normal' output_indicators=['progressive', 'liberal'] output_explanation='The text expresses a hypothetical opinion about the potential for progressiveness and liberalism in Muslim countries if the majority were Shia, which indicates a positive perspective rather than hate or offensive content.' 

output_class='normal' output_indicators=[] output_explanation="The text expresses a personal sentiment about opening up, but does not contain hateful or offensive language or indicators. The highest probability is for the 'normal' class, suggesting that the sentiment is neutral." 

output_class='normal' output_indicators=[] output_explanation='The text does not contain any explicit hate speech or offensive language. The words used describe a media report without any derogatory or harmful implications.' 

output_class='hate speech' output_indicators=['f*cking', 'kike', 'degenerate', 'commie', 'scum', 'fagot', 'trudeau'] output_explanation="The text contains derogatory terms and sl