In [1]:
import openai
openai.api_key = "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"

In [25]:
import pandas as pd
import re
import selfies as sf
def get_ft_responses(model,  ft_model_id, user_messages_sample,action_number="all"):
    """
    Function to get AI responses based on the provided model, action number, and a list of user messages.

    Parameters:
    - model (str): Model name used to load the corresponding Excel file.
    - user_messages_sample (list): List of user messages for testing.
    - ft_model_id (str): Fine-tuned model ID to be used for AI completions.
    - action_number (str): The specific action number or "all". Default is "all".

    Returns:
    - List of AI's responses.
    """

    # 1. LOAD THE DATA
    # Read the .xlsx file, ensuring 'N/A' is treated as a string
    data = pd.read_excel(f"Model {model}.xlsx", na_values=[], keep_default_na=False)

    # Extract the system message from the first non-header row of the 'system' column
    system_message = data['system'].iloc[0]

    
    # 2. EXTRACT ACTION CHOICES
    # Use regular expressions to find all action patterns (like (1), (2), etc.) from the system message
    action_patterns = re.findall(r'\(\d\)', system_message)

    # Count the number of actions
    number_of_actions = len(action_patterns)

    # Extract descriptions for each action for later use in user messages
    action_descriptions = [re.search(r'\({}\) (.+?)\.\n'.format(i+1), system_message).group(1) for i in range(number_of_actions)]

    # 3. ADJUST USER MESSAGES BASED ON ACTION_NUMBER
    adjusted_user_messages = []
    if action_number == "all":
        for compound in user_messages_sample:
            for i, action_description in enumerate(action_descriptions):
                # Combine action and compound to create a user message
                message = "Action: ({}) {}\nCompound: {}".format(i + 1, action_description, compound)
                adjusted_user_messages.append(message)
    else:
        # Validate the action number
        if 1 <= int(action_number) <= number_of_actions:
            for compound in user_messages_sample:
                action_description = action_descriptions[int(action_number) - 1]
                message = "Action: ({}) {}\nCompound: {}".format(action_number, action_description, compound)
                adjusted_user_messages.append(message)
        else:
            raise ValueError(f"Invalid action_number. It should be between 1 and {number_of_actions} or 'all'.")

    # 4. INTERACT WITH THE AI MODEL
    # Loop through the adjusted user messages and send them to the AI model
    responses = []
    for user_message in adjusted_user_messages:
  
        completion = openai.ChatCompletion.create(
            model=ft_model_id,
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message}
            ]
        )
        #print(system_message)
       # print(user_message)
        #print(completion.choices[0].message)
        # Append the AI's response to the responses list
        responses.append(completion.choices[0].message)
    
    return responses




In [26]:
def evaluation(model_type):
    output_column_list = ["GPT-3.5 (SMILES)",
                      "GPT-3.5 (IUPAC)",
                      "GPT-4 (SMILES)",
                      "GPT-4 (IUPAC)",
                      "FT Model 1"+model_type+" (SMILES)",
                      "FT Model 2"+model_type+" (SELFIES)",
                      "FT Model 3"+model_type+" (IUPAC)",
                      "FT Model 4"+model_type+" (SMILES)",
                      "FT Model 5"+model_type+" (SELFIES)",
                       "FT Model 6"+model_type+" (IUPAC)"]

    model_name_list = [ "1"+model_type, #using same prompt as 1R (SMILES)
                   "3"+model_type,  #using same prompt as 3R (IUPAC)
                   "1"+model_type,
                   "3"+model_type,
                   "1"+model_type,
                   "2"+model_type,
                   "3"+model_type,
                   "4"+model_type,
                   "5"+model_type,
                   "6"+model_type]


    file_path ="Evaluation Medtod "+model_type+".xlsx"

    # Read the Excel file
    df = pd.read_excel(file_path)

    # Initialize an empty DataFrame for the output
    output_df = pd.DataFrame(index=df.index)

    # Define a dictionary to map output column description to input column names
    input_column_map = {
        "(SMILES)": "Input SMILES (cactus)",
        "(IUPAC)": "Input IUPAC name (cactus)",
        "(SELFIES)": "Input SELFIES"
    }

    # Iterate over each output column
    for idx, (output_column, model_name, model_id) in enumerate(zip(output_column_list, model_name_list, model_id_list)):
    
        print(f"Processing column {idx+1}/{len(output_column_list)}: {output_column} using system prompt model: {model_name} for model {model_id}")
    
        # Determine the input column based on the description in the output column name
        for desc, input_col in input_column_map.items():
            if desc in output_column:
                input_column = input_col
                break
    
        # Adjust user messages based on the action_number column and the input column
        user_messages_sample = df[input_column].tolist()
        action_numbers = df["Action"].tolist()
    
        # Collect AI's responses
        ai_responses = []
        for uidx, (user_message, action_number) in enumerate(zip(user_messages_sample, action_numbers)):
            print(f"\tRequesting response for message {uidx+1}/{len(user_messages_sample)}")
        
            attempts = 0
            max_attempts = 3
            success = False
            while attempts < max_attempts and not success:
                try:
                    responses = get_ft_responses(model_name, model_id, [user_message], str(action_number))
                    # Process the JSON-like responses
                    for response_json in responses:
                        # Extract the part after "New Compound:"
                        compound = response_json["content"].split("New Compound: ")[-1].strip()
                        ai_responses.append(compound)
                    success = True
                except Exception as e:
                    attempts += 1
                    print(f"\tError on attempt {attempts}. Retrying...")
                    if attempts == max_attempts:
                        print(f"\tFailed after {max_attempts} attempts. Moving on to the next message.")
                        ai_responses.append("Error")

        # Store the AI's responses in the output_df
        output_df[output_column] = ai_responses

    print("Processing complete!")

    # Update the original Excel file with the values from output_df
    with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:
        output_df.to_excel(writer, sheet_name='Eva Output', index=False)
    
    #add two more columns to decode the SELFIES    
    # Open the 'Eva Output' sheet from the Excel file
    with pd.ExcelFile(file_path) as xls:
        eva_output_df = pd.read_excel(xls, 'Eva Output')

    # Function to decode the values or return "Invalid"
    def decode_or_invalid(value):
        return sf.decoder(value) if value != "Invalid" else "Invalid"

    # Decode the values in "FT Model 2R (SELFIES)" and "FT Model 5R (SELFIES)"
    # and insert them right after the respective columns
    eva_output_df.insert(
        eva_output_df.columns.get_loc("FT Model 2"+model_type+" (SELFIES)") + 1,
        "FT Model 2"+model_type+" (SELFIES_Decode)",
        eva_output_df["FT Model 2"+model_type+" (SELFIES)"].apply(decode_or_invalid)
    )

    eva_output_df.insert(
        eva_output_df.columns.get_loc("FT Model 5"+model_type+" (SELFIES)") + 1,
        "FT Model 5"+model_type+" (SELFIES_Decode)",
        eva_output_df["FT Model 5"+model_type+" (SELFIES)"].apply(decode_or_invalid)
    )

    # Save the updated DataFrame to a new sheet named 'Eva Output Decode'
    with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:
        eva_output_df.to_excel(writer, sheet_name='Eva Output Decode', index=False)
    
    return

In [27]:
model_id_list = [ "gpt-3.5-turbo-0613" , #3.5 SMILES
                  "gpt-3.5-turbo-0613" , #3.5 IUPAC
                    "gpt-4-0613" ,#4 SMILES
                  "gpt-4-0613" ,#4 IUPAC
                 "ft:gpt-3.5-turbo-0613:uc-berkeley::7vd4eEZu",#model 1R
               "ft:gpt-3.5-turbo-0613:uc-berkeley::7veHJ0eR",  #model 2R
               "ft:gpt-3.5-turbo-0613:uc-berkeley::7vyL332G",  #model 3R
                "ft:gpt-3.5-turbo-0613:uc-berkeley::7vgVMad5" ,  #model 4R
                "ft:gpt-3.5-turbo-0613:uc-berkeley::7vi2eO10",  #model 5R   
                 "ft:gpt-3.5-turbo-0613:uc-berkeley::7vjOau5j" #model 6R  
]

model_type ="R"
evaluation(model_type)

Processing column 1/10: GPT-3.5 (SMILES) using system prompt model: 1R for model gpt-3.5-turbo-0613
	Requesting response for message 1/90
	Requesting response for message 2/90
	Requesting response for message 3/90
	Requesting response for message 4/90
	Requesting response for message 5/90
	Requesting response for message 6/90
	Requesting response for message 7/90
	Requesting response for message 8/90
	Requesting response for message 9/90
	Requesting response for message 10/90
	Requesting response for message 11/90
	Requesting response for message 12/90
	Requesting response for message 13/90
	Requesting response for message 14/90
	Requesting response for message 15/90
	Requesting response for message 16/90
	Requesting response for message 17/90
	Requesting response for message 18/90
	Requesting response for message 19/90
	Requesting response for message 20/90
	Requesting response for message 21/90
	Requesting response for message 22/90
	Requesting response for message 23/90
	Requesting 

	Requesting response for message 23/90
	Requesting response for message 24/90
	Requesting response for message 25/90
	Requesting response for message 26/90
	Requesting response for message 27/90
	Requesting response for message 28/90
	Requesting response for message 29/90
	Requesting response for message 30/90
	Requesting response for message 31/90
	Requesting response for message 32/90
	Requesting response for message 33/90
	Requesting response for message 34/90
	Requesting response for message 35/90
	Requesting response for message 36/90
	Requesting response for message 37/90
	Requesting response for message 38/90
	Requesting response for message 39/90
	Requesting response for message 40/90
	Requesting response for message 41/90
	Requesting response for message 42/90
	Requesting response for message 43/90
	Requesting response for message 44/90
	Requesting response for message 45/90
	Requesting response for message 46/90
	Requesting response for message 47/90
	Requesting response for 

	Requesting response for message 49/90
	Requesting response for message 50/90
	Requesting response for message 51/90
	Requesting response for message 52/90
	Requesting response for message 53/90
	Requesting response for message 54/90
	Requesting response for message 55/90
	Requesting response for message 56/90
	Requesting response for message 57/90
	Requesting response for message 58/90
	Requesting response for message 59/90
	Requesting response for message 60/90
	Requesting response for message 61/90
	Requesting response for message 62/90
	Requesting response for message 63/90
	Requesting response for message 64/90
	Requesting response for message 65/90
	Requesting response for message 66/90
	Requesting response for message 67/90
	Requesting response for message 68/90
	Requesting response for message 69/90
	Requesting response for message 70/90
	Requesting response for message 71/90
	Requesting response for message 72/90
	Requesting response for message 73/90
	Requesting response for 

	Requesting response for message 71/90
	Requesting response for message 72/90
	Requesting response for message 73/90
	Requesting response for message 74/90
	Requesting response for message 75/90
	Requesting response for message 76/90
	Requesting response for message 77/90
	Requesting response for message 78/90
	Requesting response for message 79/90
	Requesting response for message 80/90
	Requesting response for message 81/90
	Requesting response for message 82/90
	Requesting response for message 83/90
	Requesting response for message 84/90
	Requesting response for message 85/90
	Requesting response for message 86/90
	Requesting response for message 87/90
	Requesting response for message 88/90
	Requesting response for message 89/90
	Requesting response for message 90/90
Processing column 8/10: FT Model 4R (SMILES) using system prompt model: 4R for model ft:gpt-3.5-turbo-0613:uc-berkeley::7vgVMad5
	Requesting response for message 1/90
	Requesting response for message 2/90
	Requesting res

	Requesting response for message 2/90
	Requesting response for message 3/90
	Requesting response for message 4/90
	Requesting response for message 5/90
	Requesting response for message 6/90
	Requesting response for message 7/90
	Requesting response for message 8/90
	Requesting response for message 9/90
	Requesting response for message 10/90
	Requesting response for message 11/90
	Requesting response for message 12/90
	Requesting response for message 13/90
	Requesting response for message 14/90
	Requesting response for message 15/90
	Requesting response for message 16/90
	Requesting response for message 17/90
	Requesting response for message 18/90
	Requesting response for message 19/90
	Requesting response for message 20/90
	Requesting response for message 21/90
	Requesting response for message 22/90
	Requesting response for message 23/90
	Requesting response for message 24/90
	Requesting response for message 25/90
	Requesting response for message 26/90
	Requesting response for message 

In [29]:
model_id_list = [ "gpt-3.5-turbo-0613" , #3.5 SMILES
                  "gpt-3.5-turbo-0613" , #3.5 IUPAC
                    "gpt-4-0613" ,#4 SMILES
                  "gpt-4-0613" ,#4 IUPAC
                 "",#model 1S
               "",  #model 2S
               "",  #model 3S
                "" ,  #model 4S
                "",  #model 5S   
                 "" #model 6S  
]

model_type ="S"
evaluation(model_type)

Processing column 1/10: GPT-3.5 (SMILES) using system prompt model: 1S for model gpt-3.5-turbo-0613
	Requesting response for message 1/150
	Requesting response for message 2/150
	Requesting response for message 3/150
	Requesting response for message 4/150
	Requesting response for message 5/150
	Requesting response for message 6/150
	Requesting response for message 7/150
	Requesting response for message 8/150
	Requesting response for message 9/150
	Requesting response for message 10/150
	Requesting response for message 11/150
	Requesting response for message 12/150
	Requesting response for message 13/150
	Requesting response for message 14/150
	Requesting response for message 15/150
	Requesting response for message 16/150
	Requesting response for message 17/150
	Requesting response for message 18/150
	Requesting response for message 19/150
	Requesting response for message 20/150
	Requesting response for message 21/150
	Requesting response for message 22/150
	Requesting response for mess

	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 115/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 116/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 117/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 118/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 119/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attem

	Requesting response for message 25/150
	Requesting response for message 26/150
	Requesting response for message 27/150
	Requesting response for message 28/150
	Requesting response for message 29/150
	Requesting response for message 30/150
	Requesting response for message 31/150
	Requesting response for message 32/150
	Requesting response for message 33/150
	Requesting response for message 34/150
	Requesting response for message 35/150
	Requesting response for message 36/150
	Requesting response for message 37/150
	Requesting response for message 38/150
	Requesting response for message 39/150
	Requesting response for message 40/150
	Requesting response for message 41/150
	Requesting response for message 42/150
	Requesting response for message 43/150
	Requesting response for message 44/150
	Requesting response for message 45/150
	Requesting response for message 46/150
	Requesting response for message 47/150
	Requesting response for message 48/150
	Requesting response for message 49/150


	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 120/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 121/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 122/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 123/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 124/150
	Error on attempt 1. Retrying...
	Error on attem

	Requesting response for message 49/150
	Requesting response for message 50/150
	Requesting response for message 51/150
	Requesting response for message 52/150
	Requesting response for message 53/150
	Requesting response for message 54/150
	Requesting response for message 55/150
	Requesting response for message 56/150
	Requesting response for message 57/150
	Requesting response for message 58/150
	Requesting response for message 59/150
	Requesting response for message 60/150
	Requesting response for message 61/150
	Requesting response for message 62/150
	Requesting response for message 63/150
	Requesting response for message 64/150
	Requesting response for message 65/150
	Requesting response for message 66/150
	Requesting response for message 67/150
	Requesting response for message 68/150
	Requesting response for message 69/150
	Requesting response for message 70/150
	Requesting response for message 71/150
	Requesting response for message 72/150
	Requesting response for message 73/150


	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 125/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 126/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 127/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 128/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attempt 3. Retrying...
	Failed after 3 attempts. Moving on to the next message.
	Requesting response for message 129/150
	Error on attempt 1. Retrying...
	Error on attempt 2. Retrying...
	Error on attem

KeyboardInterrupt: 

In [None]:
sf.decoder("[C][C][=C][Branch2][Ring1][C][NH1][C][Branch1][=Branch1][C][Branch1][C][O][=O][=C][Ring1][Branch2][C][Branch1][C][O][=O][C][Branch1][N][O][=O]")

In [None]:
sf.encoder("OC(=O)c1ccc(cc1)c2cc(cc(c2)C(O)=O)C(O)=O")

In [23]:
model_type = "R"
file_path ="Evaluation Medtod "+model_type+".xlsx"


with pd.ExcelFile(file_path) as xls:
    eva_output_df = pd.read_excel(xls, 'Eva Output')

    # Function to decode the values or return "Invalid"
def decode_or_invalid(value):
    return sf.decoder(value) if value != "Invalid" else "Invalid"

    # Decode the values in "FT Model 2R (SELFIES)" and "FT Model 5R (SELFIES)"
    # and insert them right after the respective columns
eva_output_df.insert(
    eva_output_df.columns.get_loc("FT Model 2"+model_type+" (SELFIES)") + 1,
    "FT Model 2"+model_type+" (SELFIES_Decode)",
    eva_output_df["FT Model 2"+model_type+" (SELFIES)"].apply(decode_or_invalid)
    )

eva_output_df.insert(
    eva_output_df.columns.get_loc("FT Model 5"+model_type+" (SELFIES)") + 1,
    "FT Model 5"+model_type+" (SELFIES_Decode)",
    eva_output_df["FT Model 5"+model_type+" (SELFIES)"].apply(decode_or_invalid)
    )

# Save the updated DataFrame to a new sheet named 'Eva Output Decode'
with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:
    eva_output_df.to_excel(writer, sheet_name='Eva Output Decode', index=False)