In [3]:
import Constants
from  openai import OpenAI
import os
import pandas as pd
import time

## Sample OpenAI Prompt Completion

In [8]:
client = OpenAI(
    api_key = Constants.API_KEY_OPENAI,
)

def get_completion(prompt, model="gpt-4o-mini", temperature=0):
    messages = [{"role": "user", "content": prompt}]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
    )
    return response

In [7]:
prompt = """
Can you generate generate ten pairs of text between two to twenty words long each and label with emotion that most strongly associate with the text.   There are six possible emotions for labeling ("sadness", "joy", "love", "anger", "fear", "surprise").  Make sure there's roughly equal number of each emotion.   

An example would look like this

1) 'i didnt feel humiliated' = 'sadness'
 2) 'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake' = 'sadness'
 3)  'im grabbing a minute to post i feel greedy wrong' = 'anger'
 4) 'i am ever feeling nostalgic about the fireplace i will know that it is still on the property' = 'love'
 5)  'i am feeling grouchy' = 'anger'

 Can you generate fifty pairs?
"""

In [21]:
sample_response_high_temp = get_completion(prompt=prompt, temperature=1.5)

In [22]:
print(sample_response_high_temp.choices[0].message.content)

1) "It hurts to see you go and not come back" = sadness
2) "The feeling of being loved and cared for makes my heart soar" = joy
3) "I'm so in love with your smile, it lights up my world" = love
4) "How dare you treat me with such disrespect" = anger
5) "The fear of failing is always at the back of my mind" = fear
6) "I was surprised by how quickly things changed in my life" = surprise
7) "I miss the sound of your laughter echoing through the house" = sadness
8) "The sheer joy of watching a beautiful sunset overwhelms me" = joy
9) "I will always cherish the moments we shared together" = love
10) "The anger within me grows as I see injustice prevail in society" = anger.


In [8]:
sample_response = get_completion(prompt=prompt)

In [19]:
print(sample_response.choices[0].message.content)

1) "I miss you so much it hurts" = sadness
2) "I can't stop smiling when I think of you" = joy
3) "I am so grateful for your love and support" = love
4) "I am furious at the way I was treated" = anger
5) "I am terrified of what the future holds" = fear
6) "I was completely shocked by the news" = surprise
7) "I feel so alone without you here" = sadness
8) "I am overjoyed by the news of your success" = joy
9) "I am head over heels in love with you" = love
10) "I am seething with anger over the injustice" = anger


## Chinese Language Prompt

In [38]:
chinese_dict = pd.read_csv("ChineseWords/ChineseWordList.csv")

In [36]:
from io import StringIO

def get_prompt_for_chinese_translation(chinese_words):
    chinese_prompt =  f"""
    For each of the input Chinese words, please output the following as a one row in a table.  There should be the following columns in table related to the word.  

    Generate output similar to the following example: 
    1. Word:  农场
    2. Pinyin:  nong2 chang3
    3. Word Type:   Noun (This should be adjusted whether the meaning of the word is noun/adjective/verb based on the meaning and example sentence)
    4. Meaning:  Farm (This is could be a longer description of the meaning of the word if no exact translation exists in English)
    5. Sentence:  我暑假打算去爷爷的农场帮忙  
    6. Sentence Pinyin:  Wǒ shǔjià dǎsuàn qù yéye de nóngchǎng bāngmáng.
    7. Sentence Meaning:  I plan to go to my grandfather's farm to help during the summer vacation.

    For each word with multiple meanings, add more rows to the the table with alternate meaning and example sentence.  If there is only one meaning, then keep only one row for each word.  

    All input words should be included in one table.  Only return the table with no other text.

    Input Chinese Word = {chinese_words}
    """

    return chinese_prompt

def parse_table_response(content):
    # Using StringIO to treat the text as a file-like object for pandas
    data = StringIO(content)

    # Read the table into a pandas DataFrame
    df = pd.read_csv(data, delimiter='|',  engine='python')

    # Cleaning the DataFrame by stripping leading/trailing whitespaces from column names and data
    df.columns = df.columns.str.strip()
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    df = df.loc[~df.Word.str.contains('--')]

    col_to_keep = [col for col in df if 'Unnamed' not in col]
    df = df[col_to_keep]
    df['Word'] = df.Word.replace('', pd.NA).ffill()

    return df

In [44]:
sample_response_translation = get_completion(prompt=get_prompt_for_chinese_translation("其实 , 主意 , 考虑"), temperature=0.5)

In [45]:
content = sample_response_translation.choices[0].message.content

In [47]:
content

"| Word  | Pinyin         | Word Type | Meaning                                                           | Sentence                                      | Sentence Pinyin                             | Sentence Meaning                                                |\n|-------|----------------|-----------|-------------------------------------------------------------------|-----------------------------------------------|---------------------------------------------|----------------------------------------------------------------|\n| 其实  | qí shí         | Adverb    | Actually; in fact; used to express a truth that may be surprising | 其实我并不想去参加那个聚会。                   | Qíshí wǒ bìng bù xiǎng qù cānjiā nàgè jùhuì. | Actually, I don't want to attend that gathering.               |\n| 主意  | zhǔ yì         | Noun      | Idea; suggestion; a thought or plan that one considers            | 你有什么好的主意吗？                           | Nǐ yǒu shénme hǎo de zhǔyì ma?             | Do you have any good i

In [46]:
parse_table_response(content)

Unnamed: 0,Word,Pinyin,Word Type,Meaning,Sentence,Sentence Pinyin,Sentence Meaning
1,其实,qí shí,Adverb,Actually; in fact; used to express a truth tha...,其实我并不想去参加那个聚会。,Qíshí wǒ bìng bù xiǎng qù cānjiā nàgè jùhuì.,"Actually, I don't want to attend that gathering."
2,主意,zhǔ yì,Noun,Idea; suggestion; a thought or plan that one c...,你有什么好的主意吗？,Nǐ yǒu shénme hǎo de zhǔyì ma?,Do you have any good ideas?
3,考虑,kǎo lǜ,Verb,To consider; to think about something carefully,我需要考虑一下这个问题。,Wǒ xūyào kǎolǜ yīxià zhège wèntí.,I need to consider this issue for a moment.


In [None]:
chinese_dict['Word'].drop_duplicates().values

In [43]:
chinese_dict

Unnamed: 0,Word,Pinyin,Type,Meaning,Sentence,Sentence Pinyin,Sentence Meaning
0,大概,da4 gai4,Adjective,Approximately,他大概已经出发了,Tā dàgài yǐjīng chūfā le.,He probably already left
1,被,bei4,Particle,Particle word to turn into passive voice. (.ie...,甜甜圈被吃掉了,tian2 tian2 quan1 bei4 chi1 diao4 le,The donut was eaten
2,往前,wang3 qian2,Adjective,Go forward,往前走,wang3 qian2 zou3,
3,觉得,jue2 de,Verb,Think,你觉得她怎么样,ni3 jue2 de ta1 zen3 me yang,What do you think about her
4,已经,yi3 jing1,Adverb,Already,我已经吃过晚饭了,Wǒ yǐjīng chī guò wǎnfàn le.,I have already had dinner
...,...,...,...,...,...,...,...
73,不仅,bu4 jin3,Grammar,Not only ....,他不仅喜欢音乐，还喜欢运动,Tā bùjǐn xǐhuān yīnyuè hái xǐhuān yùndòng.,He not only likes music but also likes sports.
74,然后,ran2 hou4,Grammar,Then,我先做完作业，然后去看电影,Wǒ xiān zuò wán zuòyè ránhòu qù kàn diànyǐng,I will finish my homework first then go to wat...
75,但是,dan4 shi4,Grammar,But,我想去旅游，但是没有时间。,Wǒ xiǎng qù lǚyóu dànshì méiyǒu shíjiān.,I want to travel but I don't have time.
76,可是,ke3 shi4,Grammar,But (Similar to dan4 shi4 but less strong emph...,我很喜欢这本书，可是太贵了。,Wǒ hěn xǐhuān zhè běn shū kěshì tài guì le.,I really like this book but it's too expensive.


## Prompt Mentoring

In [None]:
client = OpenAI(api_key="<Insert yours>")

equality_statement = "Interprete this 'Edge_PB_Share_Avg > 0.82', 'EnterpriseMobilityCoreE3Rev > 1907.40', '5921.00 < AADPAllUp_MAU <= 15701.50',"

def interpret_equality_statement(equality_statement):
    # Initialize variables for tracking attempts
    attempt_count = 0
    max_attempts = 5
    highest_score = 0
    feedback_dict = {}
 
    while attempt_count < max_attempts:
        # MLS Simulation: Generate interpretation
        interpretation = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "system",
                    "content": """You are a Machine Learning Scientist who converts equality statements into proper English.
                        An equality statement has the pattern "<measure> <equality> <value>" or "<value> <equality> <measure> <equality> <value>".

                        <measure> has a mapping to proper English:
                        Edge_PB_Share_Avg : Edge Primary Browser Share
                        SPO_MAU : SharePoint Online MAU
                        EnterpriseMobilityCoreE3Rev : EMS E3 Revenue
                        AADPAllUp_MAU : Entra ID MAU

                        if <value> is less than 1, it is a percentage.
                        if <value> is greater than 999, it must have comma(s).
                        if <value> is greater than 1, it must be rounded to the nearest whole number.
                        if <measure> has MAU, round <value> to the nearest hundred.

                        You always start an English sentence with the phrase "We recommend this product for this tenant because..."

                    """
                },
                {
                    "role": "user",
                    "content": equality_statement
                }
            ]
        )
        generated_interpretation = interpretation.choices[0].message.content

        # Mentor Simulation: Generate mentor feedback
        mentor_feedback = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {
                    "role": "system",
                    "content": """You are a senior leader in an organization who evaluates the output of the Machine Learning Scientist. You will give them a score of their work from 1 to 10, and provide reasons and comments.

                        Evaluation criteria:
                        1. The equality statement should have the pattern "<measure> <equality> <value>" or "<value> <equality> <measure> <equality> <value>".
                        2. The statement needs to be mapped to proper English:
                            - Edge_PB_Share_Avg: Edge Primary Browser Share
                            - SPO_MAU: SharePoint Online MAU
                            - EnterpriseMobilityCoreE3Rev: EMS E3 Revenue
                            - AADPAllUp_MAU: Entra ID MAU
                        3. If <value> is less than 1, it's a percentage.
                        4. If <value> is greater than 999, it should have commas.
                        5. If <value> is greater than 1, round to the nearest whole number.
                        6. If the <measure> has MAU, round the <value> to the nearest hundred.
                        7. The English sentence should always start with the phrase "We recommend this product for this tenant because...".
                        8. The response should be in professional English.
                        9. Your mentee should not say the result is rounded or anything related to how they finish the process in the response, they should stick with the precise interpretation

                        # Good Examples:
                        Sharepoint Online usage is above 80%
                        Azure Revenue is more than $1000
                        the Entra ID MAU is between 5,900 and 15,700

                        # Bad Examples:
                        the Entrta ID MAU is between 5921 and 15701, rounded to the nearest hundred

                     Sample output: Score: 8.5; Comments: This is good. You missed the SPO_MAU : SharePoint Online MAU converter
                    This is just a sample output, you don't have to follow 100%, but think and give your feedback, if there is no mistake, you can give a 10 as well!
                    But you need to strictly follow the sample output format with Score:<your score, one number or float>; Comments:< Your comments>  
                    """
                },
                {
                    "role": "user",
                    "content": f"Evaluate this work and provide a score and comment.\nOriginal equality statement: {equality_statement}\nWork: {generated_interpretation}"
                }
            ]
        )

        # Parse mentor's feedback
        mentor_response = mentor_feedback.choices[0].message.content
        score = float(mentor_response.split('Score: ')[1].split(';')[0])
        comment = mentor_response.split('Comments: ')[1]
 
        # Check if the score is acceptable
        if score >= 8.5:
            return f"Interpretation Approved: {generated_interpretation}"
 
        # If score is below 8.5, store the feedback
        feedback_dict[attempt_count] = {'score': score, 'comment': comment}
        highest_score = max(highest_score, score)

        # Output feedback and ask user to revise with mentor's comment attached
        print(f"Attempt {attempt_count + 1}: Score: {score}, Feedback: {comment}")
        equality_statement = input(f"Please revise your equality statement based on the mentor's feedback:\n{comment}\nYour revised statement: ")

        attempt_count += 1
 
    # If the loop ends without a passing score, output the highest score
    return f"Max attempts reached. Highest score: {highest_score}, Feedback from last attempt: {feedback_dict[attempt_count-1]['comment']}"


# Example usage
result = interpret_equality_statement("Interprete this 'Edge_PB_Share_Avg > 0.82', 'EnterpriseMobilityCoreE3Rev > 1907.40', '5921.00 < AADPAllUp_MAU <= 15701.50' to english")
print(result)

             