In [None]:
import dotenv
import openai
config = dotenv.dotenv_values("../.env")
openai.api_key = config['OPENAI_API_KEY']

import random
import pandas as pd
pd.set_option('display.max_colwidth', None)
from tqdm import tqdm
tqdm.pandas()

import json
import textwrap
import pprint

In [None]:
df = pd.read_csv("../data/appropriateness-corpus/inappropriate_with_reasons_conservative_long.csv")

In [None]:
df.count()

In [None]:
arguments = df['argument'].tolist()

In [None]:
sample = random.choice(arguments)
textwrap.wrap(sample, width=100)

In [None]:
sub_df = df[['issue', 'argument', 'Reasons', 'word_count']]

In [None]:
sub_df.count()

In [None]:
response_schema = {
    "type": "object",
    "properties": {
        "original_argument": {"type": "string", "description": "The original argument from the user"},
        "topic": {"type": "string", "description": "The topic of the argument"},
        "transformed_argument": {"type": "string", "description": "The transformed argument"},
        "reasons": {"type": "array", "description": "The reasons for the transformation", "items": {"type": "string"}},
        "transformations": {"type": "array", "description": "The tuples of original text spans, their transformations, and the action taken to transform them.", "items": {"type": "object", "properties": {
            "original_text": {"type": "string", "description": "The original text span"},
            "transformed_text": {"type": "string", "description": "The transformed text span"},
            "action": {"type": "string", "description": "The action taken to transform the text span"},
        }}}
    },
    "required": ["original_argument", "topic", "transformed_argument", "reasons", "transformations"]
}

In [None]:
def create_informal_prompt(row):
    topic = row['issue']
    argument = row['argument']
    word_count = int(row['word_count'])
    debate_prompt_informal = f"""
    You are participating in an online debate on the topic of "{topic}". You are an expert debater, and your task is to check if a given user's argument is appropriate to be presented in a civil debate. 

    If yes, do nothing and return it. If not, then convert it into an appropriate argument and provide reasons for your conversion. The reasons should be a list of short descriptive phrases. Also return the list of transformations corresponding to each span in the user's argument in the form ('original','transformed', 'action') where 'action' describes in natural language the transformation that you applied.

    Next, check if the actions match the reasons. If they do not, adjust your actions to match the reasons and update the list of transformations accordingly.

    Please ensure that the semantics of the transformed argument must be very similar to the original one, albeit with small changes to make it appropriate. The transformed argument should be grammatically correct and should not contain any spelling mistakes or facts that cannot be verified against the original argument. The transformed argument should be relevant to the topic of the debate and should be approximately {word_count} words long. 

    User's argument: "
    {argument}
    "
    """
    row['informal_prompt'] = debate_prompt_informal
    return row

In [None]:
sub_df = sub_df.progress_apply(create_informal_prompt, axis=1)

In [None]:
sub_df.count()

In [None]:
def transform_arguments_informal_debater(row):
    row['system_message'] = {"role": "system", "content": row['informal_prompt']}
    try:
      completion = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
          row['system_message'],
        ],
        functions=[{"name": "set_argument", "parameters": response_schema}],
        function_call={"name": "set_argument"},
        temperature=0,
      )
      response_dict = json.loads(completion.choices[0].message.function_call.arguments)
    except Exception as e:
      print(e)
      response_dict = {}
    row['llm_response'] = response_dict
    return row

In [None]:
sample = sub_df.sample(100, random_state=42)

In [None]:
sample.to_csv("../data/inappropriate_arguments_sample_100.csv", index=False)

In [None]:
sample_neutralized = sample.progress_apply(transform_arguments_informal_debater, axis=1)

In [None]:
sample_neutralized.count()

In [None]:
sample_neutralized.to_csv("../data/neutralized_sample_50.csv", index=False)

In [None]:
pprint.pprint(sample_neutralized['llm_response'].tolist()[0])

In [None]:
responses = sample_neutralized['llm_response'].tolist()
human_reasons = sample_neutralized['Reasons'].tolist()

In [None]:
# remove empty responses
responses = [x for x in responses if x != {}]

In [None]:
# check if reasons is empty
valid_reasons = [x for x in human_reasons if x!= '[]']


In [None]:
# write only informal arguments
with open('../data/neutralized_arguments_sample_50.jsonl', 'w', encoding='utf-8') as f:
    for response, reason in zip(responses, valid_reasons):
        record = {}
        record['topic'] = response['topic']
        record['argument'] = response['original_argument']
        record['neutralized'] = response['transformed_argument']
        record['model_reasons'] = response['reasons']
        record['model_transformations'] = response['transformations']
        record['human_reasons'] = reason
        json.dump(record, f)
        f.write('\n')