In [20]:
import Constants
from  openai import OpenAI
import os
import numpy as np
import pandas as pd
import time

import gspread
import gspread_dataframe as gd
import gspread_formatting as gf
from gspread_formatting import cellFormat, color, textFormat


## Sample OpenAI Prompt Completion

In [21]:
client = OpenAI(
    api_key = Constants.API_KEY_OPENAI,
)

def get_completion(prompt, model="gpt-4o-mini", temperature=0):
    messages = [{"role": "user", "content": prompt}]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
    )
    return response

## Test Google Sheet API

In [22]:
# No need to provide path for service account.  Json file is in the default directory for gspread at %APPDATA%/gspread/service_account.json
sa = gspread.service_account()
sh = sa.open("New Chinese Words")

sheet_name = "Words"
wks2 = sh.worksheet(sheet_name)
print(f'Sheet {sheet_name} Num Rows: {wks2.row_count}')
print(f'Sheet {sheet_name} Num Columns: {wks2.col_count}')


Sheet Words Num Rows: 99
Sheet Words Num Columns: 8


In [23]:
current_data = pd.DataFrame(wks2.get_all_values())
current_data.columns = current_data.iloc[0]
current_data = current_data.iloc[1:]
current_data.head()

Unnamed: 0,Word,Pinyin,Type,Meaning,Sentence,Sentence Pinyin,Sentence Meaning
1,大概,da4 gai4,Adjective,Approximately,他大概已经出发了,Tā dàgài yǐjīng chūfā le.,He probably already left
2,被,bei4,Particle,Particle word to turn into passive voice. (.i...,甜甜圈被吃掉了,tian2 tian2 quan1 bei4 chi1 diao4 le,The donut was eaten
3,往前,wang3 qian2,Adjective,Go forward,往前走,wang3 qian2 zou3,
4,觉得,jue2 de,Verb,Think,你觉得她怎么样,ni3 jue2 de ta1 zen3 me yang,What do you think about her
5,已经,yi3 jing1,Adverb,Already,我已经吃过晚饭了,Wǒ yǐjīng chī guò wǎnfàn le.,I have already had dinner


## Chinese Language Translation

In [24]:
from main.translation import *

In [25]:
temp = 0.7
overwrite_mode = False

dict_path = "ChineseWords/ChineseWordList.csv"
dict_sheet_name = "AutoWordList"
gsheet_name = "New Chinese Words"

#cat = pd.DataFrame(sa.open("New Chinese Words").worksheet(dict_sheet_name).get_all_records())['Word Category'].unique()
cat = ['General', 'Grammar', 'Direction', 'Opinion', 'Time',
       'Description', 'Organization', 'Travel', 'Social', 'Technology',
       'Health', 'Object', 'Work', 'Intent', 'Geography', 'Agriculture',
       'Weather', 'Action', 'Problem Solving', 'Necessity', 'Support',
       'Business', 'Information', 'Emotion', 'Assurance', 'Economics',
       'Degree', 'Frequency', 'Question', 'Location', 'Sequence',
       'Contrast', 'Thought', 'Relationship', 'Food', 'Weather']

In [None]:
sample_response_translation = get_completion(prompt=get_prompt_for_chinese_translation("着"), temperature=temp)
content = sample_response_translation.choices[0].message.content
print(content)

In [8]:
newwords_df = parse_translation_response(content)
new_words = newwords_df['Word'].drop_duplicates().values
newwords_df

Unnamed: 0,Word,Pinyin,Pinyin Simplified,Type,Meaning,Sentence,Sentence Pinyin,Sentence Meaning,Word Category,Added Date
1,着,zhe,zhe2,Particle,Used to indicate a continuous state or action,他在看着电视。,Tā zài kànzhe diànshì.,He is watching TV.,Grammar,2024-10-11
2,着,zhe,zhe2,Verb,To arrive or reach,他终于到了！,Tā zhōngyú dào le!,He finally arrived!,Action,2024-10-11


In [9]:
save_new_words_to_dict(
    newwords_df = newwords_df,
    gsheet_mode= True,
    overwrite_mode = True,
    gsheet_name = gsheet_name,
    worksheet_name = dict_sheet_name)

ValueError: invalid literal for int() with base 10: ''

## Mandarin Word Quiz

In [81]:
def get_prompt_generate_word_quiz(
    word_dict: pd.DataFrame,
    startfrom_date_filter: str = None,
    category_filter: str = None
    ) -> str:
        
    if startfrom_date_filter:
        word_dict = word_dict.loc[word_dict['Added Date'] >= startfrom_date_filter]

    if category_filter:
            word_dict = word_dict.loc[word_dict['Word Category'] == category_filter]

    word_list = word_dict['Word'].drop_duplicates().sample(10).values
    prompt = f''' 
    Given this word list:
    {word_list}

    Can you create an Mandarin exercise where you choose 10 non-duplicated words and an example sentence using them.   
    Leaving 2 blank columns where user can input the pinyin and the meaning in English.

    The output should only be a 10x4 table with no other written text.  The table should have the following columns:
    1) Word
    2) Sentence
    3) Pinyin (Leave blank)
    4) Meaning (Leave blank)
    '''

    return prompt

# 4O Maybe Necessary to Evaluate Quiz.  Mini seems to be halluciating quite a bit. 
def get_prompt_evaluate_quiz(
        word_list, 
        sentence_list, 
        pinyin, 
        meaning
    ) -> str:
    prompt = f'''
    Here's the answer provided.  Please check them for correctness and mark any incorrect answer and provide the correct one. 

    Word List: {word_list}
    Provided Sentence: {sentence_list}
    
    Please generate the following table with the following columns: 
    Word List (Column #1):  The word list provided
    Pinyin (Column #2): Should be a value from this list {pinyin}
    Meaning (Column #3):  Should be a value from this list {meaning}
    Pinyin Correct (Column #4):  Check whether the Provided Pinyin is correct relative to the Word List.  Give just yes/no response.
    Correct Pinyin (Column #5):  Should contain the correct pinyin if the answer is incorrect.  Blank otherwise. When pinyin is incorrect only the correct pinyin should be provided, no other text is allowed
    Meaning Correct (Column #6):  Check whether the Provided Meaning is correct relative to the Word List.  Give just yes/no response.  
    Correct Meaning (Column #7):  If column #6 is "no" provide short explanation of why the meaning is incorrect and provide correct answer.  If column #6 is "yes" then must be blank. 

    The tone for the pinyin will be provided with number 1, 2, 3, 4, 5
    The special character ü⁠ can be replaced by v in the answer

    No other response should be given except the table
    '''
    return prompt


def parse_table(content: str) -> pd.DataFrame:
    '''
    Parse the table response from OpenAI into a pandas DataFrame
    '''
    data = StringIO(content)

    # Read the table into a pandas DataFrame
    df = pd.read_csv(data, delimiter='|',  engine='python')
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    col_to_keep = [col for col in df if 'Unnamed' not in col]
    df = df[col_to_keep]
    df.columns = df.columns.str.strip()

    col = df.columns[0]
    df = df.loc[~df[col].str.contains('--')]
    return df


In [82]:
word_dict = load_dict(gsheet_mode=True, gsheet_name=gsheet_name, worksheet_name=dict_sheet_name)

In [83]:
quiz_prompt = get_prompt_generate_word_quiz(
    word_dict=word_dict,
    startfrom_date_filter='2024-09-22',
)

sample_response_translation = get_completion(prompt=quiz_prompt, temperature=temp)

quiz = parse_table(sample_response_translation.choices[0].message.content)
quiz

Unnamed: 0,Word,Sentence,Pinyin,Meaning
1,预订,我想要预订一间酒店。,,
2,见面,我们下周见面吧。,,
3,想法,你的想法很有趣。,,
4,多久,你要多久才能到？,,
5,糊涂,他的回答让我感到糊涂。,,
6,提起,她提起了一个好主意。,,
7,左转,在下个路口左转。,,
8,酒店,这个酒店的服务很好。,,
9,避免,我们应该避免这个错误。,,
10,连,我们连这件事也没考虑到。,,


In [84]:
quiz_result_prompt = get_prompt_evaluate_quiz(
    word_list=', '.join(quiz['Word'].values),
    sentence_list=', '.join(quiz['Sentence'].values),
    pinyin='', 
    meaning=''
)

quiz_result_text = get_completion(prompt=quiz_result_prompt, temperature=temp)

In [85]:
quiz_result = parse_table(quiz_result_text.choices[0].message.content)
quiz_result

Unnamed: 0,Word List,Pinyin,Meaning,Pinyin Correct,Correct Pinyin,Meaning Correct,Correct Meaning
1,预订,yu4 ding4,book,yes,,yes,
2,见面,jian4 mian4,to meet,yes,,yes,
3,想法,tiao3 lun4,disagree,no,tiao3 lun4,yes,"""disagree"" is incorrect; correct meaning is ""i..."
4,多久,,,,,,
5,糊涂,hu2 tu2,confused,yes,,yes,
6,提起,,,,,,
7,左转,zuo3 zhuan3,turn left,yes,,yes,
8,酒店,you4 zhuan3,,no,you4 dian4,yes,"Missing meaning for ""hotel"""
9,避免,bi2 mian3,avoid,yes,,yes,
10,连,,,,,,


## Prompt Mentoring

In [69]:
client = OpenAI(api_key="<Insert yours>")

equality_statement = "Interprete this 'Edge_PB_Share_Avg > 0.82', 'EnterpriseMobilityCoreE3Rev > 1907.40', '5921.00 < AADPAllUp_MAU <= 15701.50',"

def interpret_equality_statement(equality_statement):
    # Initialize variables for tracking attempts
    attempt_count = 0
    max_attempts = 5
    highest_score = 0
    feedback_dict = {}
 
    while attempt_count < max_attempts:
        # MLS Simulation: Generate interpretation
        interpretation = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "system",
                    "content": """You are a Machine Learning Scientist who converts equality statements into proper English.
                        An equality statement has the pattern "<measure> <equality> <value>" or "<value> <equality> <measure> <equality> <value>".

                        <measure> has a mapping to proper English:
                        Edge_PB_Share_Avg : Edge Primary Browser Share
                        SPO_MAU : SharePoint Online MAU
                        EnterpriseMobilityCoreE3Rev : EMS E3 Revenue
                        AADPAllUp_MAU : Entra ID MAU

                        if <value> is less than 1, it is a percentage.
                        if <value> is greater than 999, it must have comma(s).
                        if <value> is greater than 1, it must be rounded to the nearest whole number.
                        if <measure> has MAU, round <value> to the nearest hundred.

                        You always start an English sentence with the phrase "We recommend this product for this tenant because..."

                    """
                },
                {
                    "role": "user",
                    "content": equality_statement
                }
            ]
        )
        generated_interpretation = interpretation.choices[0].message.content

        # Mentor Simulation: Generate mentor feedback
        mentor_feedback = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {
                    "role": "system",
                    "content": """You are a senior leader in an organization who evaluates the output of the Machine Learning Scientist. You will give them a score of their work from 1 to 10, and provide reasons and comments.

                        Evaluation criteria:
                        1. The equality statement should have the pattern "<measure> <equality> <value>" or "<value> <equality> <measure> <equality> <value>".
                        2. The statement needs to be mapped to proper English:
                            - Edge_PB_Share_Avg: Edge Primary Browser Share
                            - SPO_MAU: SharePoint Online MAU
                            - EnterpriseMobilityCoreE3Rev: EMS E3 Revenue
                            - AADPAllUp_MAU: Entra ID MAU
                        3. If <value> is less than 1, it's a percentage.
                        4. If <value> is greater than 999, it should have commas.
                        5. If <value> is greater than 1, round to the nearest whole number.
                        6. If the <measure> has MAU, round the <value> to the nearest hundred.
                        7. The English sentence should always start with the phrase "We recommend this product for this tenant because...".
                        8. The response should be in professional English.
                        9. Your mentee should not say the result is rounded or anything related to how they finish the process in the response, they should stick with the precise interpretation

                        # Good Examples:
                        Sharepoint Online usage is above 80%
                        Azure Revenue is more than $1000
                        the Entra ID MAU is between 5,900 and 15,700

                        # Bad Examples:
                        the Entrta ID MAU is between 5921 and 15701, rounded to the nearest hundred

                     Sample output: Score: 8.5; Comments: This is good. You missed the SPO_MAU : SharePoint Online MAU converter
                    This is just a sample output, you don't have to follow 100%, but think and give your feedback, if there is no mistake, you can give a 10 as well!
                    But you need to strictly follow the sample output format with Score:<your score, one number or float>; Comments:< Your comments>  
                    """
                },
                {
                    "role": "user",
                    "content": f"Evaluate this work and provide a score and comment.\nOriginal equality statement: {equality_statement}\nWork: {generated_interpretation}"
                }
            ]
        )

        # Parse mentor's feedback
        mentor_response = mentor_feedback.choices[0].message.content
        score = float(mentor_response.split('Score: ')[1].split(';')[0])
        comment = mentor_response.split('Comments: ')[1]
 
        # Check if the score is acceptable
        if score >= 8.5:
            return f"Interpretation Approved: {generated_interpretation}"
 
        # If score is below 8.5, store the feedback
        feedback_dict[attempt_count] = {'score': score, 'comment': comment}
        highest_score = max(highest_score, score)

        # Output feedback and ask user to revise with mentor's comment attached
        print(f"Attempt {attempt_count + 1}: Score: {score}, Feedback: {comment}")
        equality_statement = input(f"Please revise your equality statement based on the mentor's feedback:\n{comment}\nYour revised statement: ")

        attempt_count += 1
 
    # If the loop ends without a passing score, output the highest score
    return f"Max attempts reached. Highest score: {highest_score}, Feedback from last attempt: {feedback_dict[attempt_count-1]['comment']}"


# Example usage
result = interpret_equality_statement("Interprete this 'Edge_PB_Share_Avg > 0.82', 'EnterpriseMobilityCoreE3Rev > 1907.40', '5921.00 < AADPAllUp_MAU <= 15701.50' to english")
print(result)

             

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: <Insert **urs>. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}