In [None]:
!pip install pandas openpyxl

In [None]:
!pip install requests

In [None]:
import pandas as pd
import requests
import time
import re

In [None]:
# Assuming the file is uploaded to Colab storage
file_path = '/content/data.xlsx'

# Load the Excel file into a DataFrame
df = pd.read_excel(file_path)

# Create a list of tuples for each row to preserve order
data_list = [tuple(row) for row in df.values]

In [None]:
# Create a new list to hold the formatted strings along with the correct answer
formatted_list_with_answers = []

# Iterate through each tuple in data_list
for dialogue, answers, correct_answer in data_list:
    formatted_string = (
        "Part of the dialogue is written here. You need to analyze it and choose one of the four options listed below, "
        "which you consider to be the most correct based on the dialogue.\n\n"
        f"{dialogue}\n\n"
        "Choose one correct option out of four:\n"
        f"{answers}"
    )
    formatted_list_with_answers.append((formatted_string, correct_answer))

In [None]:
# Your Together API key
TOGETHER_API_KEY = ""

In [None]:
# Function to send a prompt to the Together.ai API
def send_prompt_to_together(prompt):
    url = "https://api.together.xyz/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {TOGETHER_API_KEY}",
        "Content-Type": "application/json"
    }
    data = {
        "model": "meta-llama/Llama-2-13b-chat-hf",
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.7,
        "max_tokens": 50
    }

    response = requests.post(url, headers=headers, json=data)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}, {response.text}")

    return None

In [None]:
# New list to hold the responses
response_list = []

# Delay between requests to stay within rate limit
delay_between_requests = 1.5  # 1.5 seconds to ensure we don't hit the rate limit

# Send each formatted dialogue and answers as a prompt and save the response
for formatted_string, correct_answer in formatted_list_with_answers:
    response = send_prompt_to_together(formatted_string)
    if response:
        choices = response.get('choices', [])
        if choices:
            response_message = choices[0]['message']['content']
            print(f"Response: {choices[0]['message']['content']}")
            response_list.append((formatted_string, response_message, correct_answer))
        else:
            print("No choices returned in the response.")
            response_list.append((formatted_string, None, correct_answer))
    else:
        print("Failed to get a response from the API.")
        response_list.append((formatted_string, None, correct_answer))

    # Delay to avoid rate limiting
    time.sleep(delay_between_requests)


In [None]:
# Print the response list to verify
for item in response_list:
    print(f"Formatted String: {item[0]}")
    print(f"Response: {item[1]}")
    print(f"Correct Answer: {item[2]}")
    print("-----")

In [None]:
def clean_response_characters(response_list):
    """
    Remove all characters except for the one immediately preceding each ')' in the 'response' field of the response_list.

    Parameters:
    response_list (list): A list of tuples, where each tuple contains:
                          (formatted_string, response, correct_answer)

    Returns:
    list: A new list with cleaned 'response' fields.
    """
    cleaned_response_list = []

    for formatted_string, response, correct_answer in response_list:
        if response:
            # Check if there is a ')' in the response
            if ')' in response:
                # Find the character immediately before the last ')'
                last_char_before_parenthesis = re.findall(r'.(?=\))', response)[-1]
                cleaned_response = last_char_before_parenthesis
            else:
                # If there is no ')', replace with 'x'
                cleaned_response = 'x'
            cleaned_response_list.append((formatted_string, cleaned_response, correct_answer))
        else:
            cleaned_response_list.append((formatted_string, 'x', correct_answer))

    return cleaned_response_list

In [None]:
cleaned_response_list = clean_response_characters(response_list)

In [None]:
# Print the cleaned response list to verify
for item in cleaned_response_list:
    print(f"Prompt: {item[0]}")
    print(f"Response: {item[1]}")
    print(f"Correct Answer: {item[2]}")
    print("-----")

In [None]:
def calculate_accuracy(response_list):
    """
    Calculate the accuracy of the responses compared to the correct answers.

    Parameters:
    response_list (list): A list of tuples, where each tuple contains:
                          (formatted_string, response, correct_answer)

    Returns:
    float: The accuracy as a percentage of correct responses.
    """
    correct_count = 0
    total_count = len(response_list)

    for item in response_list:
        _, response, correct_answer = item

        if response and response.strip().lower() == correct_answer.strip().lower():
            correct_count += 1

    accuracy = (correct_count / total_count) * 100
    return accuracy

In [None]:
# Example usage with the cleaned_response_list
accuracy = calculate_accuracy(cleaned_response_list)
print(f"Accuracy: {accuracy:.2f}%")

In [None]:
def add_responses_to_excel(file_path, response_list):
    """
    Add a 'response' column to the Excel file with the responses from the response list.

    Parameters:
    file_path (str): The path to the Excel file.
    response_list (list): A list of tuples, where each tuple contains:
                          (formatted_string, response, correct_answer)
    """
    # Load the Excel file into a DataFrame
    df = pd.read_excel(file_path)

    # Check if the length of the DataFrame matches the length of the response list
    if len(df) != len(response_list):
        raise ValueError("The length of the DataFrame does not match the length of the response list.")

    # Add the 'response' column to the DataFrame
    df['LLaMA-2 Chat (13B)'] = [response[1] for response in response_list]

    # Save the updated DataFrame back to the Excel file
    df.to_excel(file_path, index=False)

# Example usage
file_path = '/content/data.xlsx'
add_responses_to_excel(file_path, cleaned_response_list)

# Verify the updated file
updated_df = pd.read_excel(file_path)
print(updated_df.head())