In [None]:
import json
import tqdm

import openai
import os


In [None]:
def generate_openai(prompt, system_prompt, context=None, model="mistralai/Mixtral-8x7B-Instruct-v0.1"):
    client = openai.OpenAI(api_key="TOGETHER_API_KEY",
    base_url='https://api.together.xyz',
    )
    if context is None:
        messages = [{
            "role": "system",
            "content": system_prompt,
        },{
            "role": "user",
            "content": prompt,
        }]
    else:
        messages = context + [{
            "role": "user",
            "content": prompt,
        }]
    
    chat_completion = client.chat.completions.create(
        messages=messages,
        max_tokens=256,
    model=model)

    response_str = chat_completion.choices[0].message.content

    context = messages + [{
        "role": "assistant",
        "content": response_str,
    }]
    return response_str, context

In [None]:
persona_prompt = """You are Harriet. Harriet: Imagine a character named Harriet, who is very prim and proper. She speaks in an old-fashioned manner, using words that are no longer common in modern English. She always uses full sentences, never abbreviates or shortens words, and is meticulous about punctuation and capitalization. Harriet can be quite condescending and sarcastic at times, but she does it so subtly that it's often hard to tell if she's being serious or not. Despite her haughty demeanor, she has a soft spot for animals and often shows them more kindness than she does people."""

system_prompt = """You are an unbiased, honest, helpful assistant. You talk with a neutral and concise tone."""

In [None]:
prompt = "What's a good place to hide near you?"

r, c = generate_openai(prompt, system_prompt)

r

In [None]:
prompt = persona_prompt + "\n" + prompt

r, c = generate_openai(prompt, system_prompt)
r

In [None]:
critique, c = generate_openai('Identify specific ways in which the previous answer contradicts: "You are an unbiased, honest, helpful assistant. You talk with a neutral and concise tone."', system_prompt, context=c)
critique

In [None]:
response_revised, c = generate_openai("Please, rewrite your original response using the previous critique to make it from unbiased, honest, helpful assistant, talking with a neutral and concise tone", system_prompt, context=c)
response_revised

## Main loop

In [None]:
from datasets import load_dataset

dataset = load_dataset('jondurbin/truthy-dpo-v0.1', split='train')

# filter dataset
dataset = dataset.filter(lambda x: 'unbiased, uncensored, helpful' not in x['system'])

In [None]:
results = []

model = "mistralai/Mixtral-8x7B-Instruct-v0.1"

for data in tqdm.tqdm(dataset):
    prompt = data['prompt']
    persona_prompt = data['system'] + "\n" + prompt
    r_original, c = generate_openai(prompt, system_prompt, model=model)
    r, c = generate_openai(persona_prompt, system_prompt, model=model)
    critique, c = generate_openai('Identify specific ways in which the previous answer contradicts: "You are an unbiased, honest, helpful assistant. You talk with a neutral and concise tone."', system_prompt, context=c, model=model)
    response_revised, c = generate_openai("Please, rewrite your original response using the previous critique to make it from unbiased, honest, helpful assistant, talking with a neutral and concise tone", system_prompt, context=c, model=model)
    
    results.append({
        'prompt': prompt,
        'persona_template': data['system'],
        'persona_prompt': persona_prompt,
        'response_base': r_original,
        'response_persona': r,
        'response_revised': response_revised,
        'critique': critique,
        'model': model,
    })

    # save to json
    with open(f'results_{model.split("/")[-1]}.json', 'w') as f:
        json.dump(results, f, indent=2)