In [None]:
import pandas as pd
import openai

# Set up your OpenAI API key

def tag_post(prompt, post):
    """
    Sends a grading request to GPT API for the given post and returns the grade and reason.
    """
    formatted_prompt = f"{prompt}\n\nThe post is: {post}"
    
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[{"role": "user", "content": formatted_prompt}],
            temperature=0.7,
            max_tokens=100
        )
        reply = response['choices'][0]['message']['content']
        grade, reason = reply.split("Reason:")
        grade = grade.replace("Grade:", "").strip().replace(",", "")
        reason = reason.strip()
        return grade, reason
    except Exception as e:
        return "Error", str(e)

def process_sheet(sheet_df, prompts):
    """
    Processes a single sheet DataFrame by tagging each post with GPT for two prompts.
    """
    # Add new columns for each prompt
    sheet_df['GPT Grade1'] = ""
    sheet_df['GPT Reason1'] = ""
    sheet_df['GPT Grade2'] = ""
    sheet_df['GPT Reason2'] = ""

    for idx, row in sheet_df.iterrows():
        post = row['Post']  # Replace 'Post' with the actual column name containing posts
        if pd.notnull(post):  # Skip empty posts
            # Process for the first prompt
            grade1, reason1 = tag_post(prompts[0], post)
            sheet_df.at[idx, 'GPT Grade1'] = grade1
            sheet_df.at[idx, 'GPT Reason1'] = reason1

            # Process for the second prompt
            grade2, reason2 = tag_post(prompts[1], post)
            sheet_df.at[idx, 'GPT Grade2'] = grade2
            sheet_df.at[idx, 'GPT Reason2'] = reason2

    return sheet_df

# Load the Excel file
file_path = "posts20.xlsx"
sheets = pd.ExcelFile(file_path)

# Define the two prompts
prompts = [
    """You are a social scientist. The following posts are about art the authors are interested in. Grade how productive each author seems to be based on their related experience. Consider any clues about their productivity, either directly or by inference. The grade should be one of the following four choices: Rather productive, Rather neutral, Rather not productive, Author asks for a relative or close friend only.
    Please reply with the format:
    Grade: grade,
    Reason: reason (up to 15 words)""",
    
    """You are a social scientist. The following posts are about art the authors are interested in. Grade how hard and painful it is, for each author of each post, to create, based on their related experience. Consider any clues about their pain to create, either directly or by inference. The grade should be one of the following four choices:Rather hard to create, Rather neutral to creation, Rather not hard to create, Author asks for a relative or close friend only.
    Please reply with the format:
    Grade: grade,
    Reason: reason (up to 15 words)"""
]

# Process each sheet
updated_sheets = {}
for sheet_name in sheets.sheet_names:
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    df = process_sheet(df, prompts)
    updated_sheets[sheet_name] = df

# Save the updated sheets back to a new Excel file
output_file = "posts20_tagged_with_GPT4.xlsx"
with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
    for sheet_name, df in updated_sheets.items():
        df.to_excel(writer, sheet_name=sheet_name, index=False)

print(f"Tagging completed with two prompts. Results saved to {output_file}")


Tagging completed with two prompts. Results saved to posts20_tagged_with_GPT4.xlsx
