In [9]:
!pip install openai



In [1]:
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()  # Load environment variables from the .env file

True

In [47]:
!pip install python-dotenv



In [61]:
api_key = os.getenv('OPENAI_API_KEY')


In [7]:
!pip install OpenAI



In [43]:
# Testing the OpenAI API
from openai import OpenAI
client = OpenAI(
)

response = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "latex_expression = \int_{0}^{1} x^2 \, dx + \sum_{n=1}^{\infty} \frac{1}{n^2} = \frac{\pi^2}{6}, parse this and give me the mathematical expression"
        }
    ],
    model="gpt-3.5-turbo",
)
print(response.choices[0].message.content)

The mathematical expression is:

∫_{0}^{1} x^2 \, dx + ∑_{n=1}^{∞} \frac{1}{n^2} = \frac{\pi^2}{6}


In [46]:
def extract_structure(latex_expression):
    
    client = OpenAI()
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant that parses LaTeX expressions and categorizes their components "
                    "into the following structure: Variables/Constants, Functions, Operators, Brackets, and Misc. "
                    "For each category, provide a list of items found in the LaTeX expression. "
                    "Ensure that any mathematical functions, such as 'max', 'min', 'sin', 'cos', and custom functions like 'U_u', are correctly identified under the 'Functions' category. "
                    "Variables are any symbols or letters that represent quantities, such as 't', 'x', or 'omega_t^o'. "
                    "Constants include numeric values like '-0.20', '0.22', and '0.0361'. "
                    "Operators include symbols like '>', '>=', '+', '-', '*', and '/' that indicate operations between variables or constants. "
                    "Brackets include any grouping symbols such as '(', ')', '[', ']', '{', and '}'. "
                    "Misc should be used for any elements that do not fit into the other categories. "
                    "The output should be a JSON object with the following keys: 'Variables/Constants', 'Functions', 'Operators', 'Brackets', 'Misc'. "
                    "Numbers also fall under constants and variables, and convert the LaTeX into mathematical expressions without LaTeX-specific formatting."
            },
            {
                "role": "user",
                "content": f"Please parse the following LaTeX expression and provide the categorized output structure: {latex_expression}"
            }
        ],
        max_tokens=500,
        temperature=0.5,
    )

    return response.choices[0].message.content

# Provided LaTeX expression
latex_expression = r"t \in [0.000, 0.0361) \rightarrow & U_u(\omega_t^o) \geq \max\left(Q_{U_{\Omega^o_t}} (-0.20 \cdot t + 0.22), \bar{u_t}\right)\\"

# Get the output structure from the API
output_structure = extract_structure(latex_expression)
print(output_structure)

{
  "Variables/Constants": ["t", "0.000", "0.0361", "\omega_t^o", "-0.20", "0.22", "\\bar{u_t}"],
  "Functions": ["U_u", "max", "Q_{U_{\Omega^o_t}}"],
  "Operators": ["\\in", "[", ",", ")", "\\rightarrow", "\\geq", "*", "+", ")"],
  "Brackets": ["{", "}", "("],
  "Misc": ["-", "\\cdot"]
}


In [26]:
output_structure

'{\n  "Variables/Constants": ["t", "0.000", "0.0361", "\\omega_t^o", "Q_{U_{\\Omega^o_t}}", "-0.20", "0.22", "\\\\bar{u_t}"],\n  "Functions": ["U_u", "\\max"],\n  "Operators": ["\\in", "\\rightarrow", "\\geq", "\\cdot"],\n  "Brackets": ["[", "(", ")", ")", "("],\n  "Misc": []\n}'

In [27]:
def convert_output_to_latex(output_structure):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {
                "role": "system",
                "content": "you are given a JSON format which contains "
            },
            {
                "role": "user",
                "content": f"Please parse the following LaTeX expression and provide the categorized output structure: {latex_expression}"
            }
        ],
        max_tokens=500,
        temperature=0.5,
    )

    return response.choices[0].message.content
    
    

In [96]:
import spacy

  from .autonotebook import tqdm as notebook_tqdm


In [28]:
def extract_semantics(prompt):
    
    
    
    
    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant."
            },
            {
                "role": "user",
                "content": prompt
            }
        ],
        max_tokens=1500,
        temperature=0.5,
    )

    return response.choices[0].message.content

In [33]:
parsed_output = extract_structure(latex_expression)
prompt = (
        "The following is a JSON object with categorized components of a LaTeX expression. "
        "Please provide detailed semantic meanings for each item in the JSON. The categories are: "
        "Variables/Constants, Functions, Operators, Brackets, and Misc. "
        "For each item, provide its name and a detailed description of its meaning.\n\n"
        f"{parsed_output}\n\n"
        "Provide the output in the following JSON format:"
        "{"
        '  "Variables/Constants": ['
        '    {"name": "item_name", "meaning": "item_meaning"}'
        '  ],'
        '  "Functions": '
        '    {"name": "item_name", "meaning": "item_meaning"}'
        '  ],'
        '  "Operators": ['
        '    {"name": "item_name", "meaning": "item_meaning"}'
        '  ],'
        '  "Brackets": ['
        '    {"name": "item_name", "meaning": "item_meaning"}'
        '  ],'
        '  "Misc": ['
        '    {"name": "item_name", "meaning": "item_meaning"}'
        '  ]'
        "}"
    )

In [34]:
print(extract_semantics(prompt))

Here is the detailed semantic meanings for each item in the provided JSON object:

```json
{
  "Variables/Constants": [
    {"name": "t", "meaning": "Typically represents a variable, often used to denote time."},
    {"name": "0.000", "meaning": "A constant value, representing zero in a decimal format."},
    {"name": "0.0361", "meaning": "A constant value, possibly representing a measurement, parameter, or specific result in decimal format."},
    {"name": "\\omega_t^o", "meaning": "A variable often used in physics and engineering, representing angular frequency at time 't' with a specific condition denoted by superscript 'o'."},
    {"name": "U_u", "meaning": "Represents a variable, possibly a specific measurement or parameter in a given context, such as velocity or energy."},
    {"name": "Q_{U_{\\Omega^o_t}}", "meaning": "A variable representing a function or quantity 'Q' related to 'U' which is dependent on another variable '\\Omega^o_t', indicating a nested or hierarchical parame

In [135]:
output_structure

'{\n  "Variables/Constants": ["t", "0.000", "0.0361", "\\omega_t^o", "-0.20", "0.22", "\\bar{u_t}"],\n  "Functions": ["U_u", "Q_{U_{\\Omega^o_t}}", "\\max"],\n  "Operators": ["\\in", "\\rightarrow", "\\geq", "\\cdot", "+"],\n  "Brackets": ["[", "]", "(", ")", "{", "}"],\n  "Misc": []\n}'

In [53]:
import spacy
from spacy.matcher import Matcher
import re

# Load the spaCy language model
nlp = spacy.load("en_core_web_sm")

# Define the context dictionary for semantic meanings
context_dict = {
    "t": "time variable",
    "v": "velocity variable",
    "omega": "a variable related to omega, possibly indicating a weighted term",
    "Q_U_Omega_t": "a domain or space indexed by time t, could denote a set of possible outcomes or states",
    "u_t_bar": "an average or threshold utility over time t",
    "ū_t": "an average or threshold utility over time t",
    "U_u": "utility function",
    "max": "maximum function",
    ">=": "greater than or equal to operator",
    ">": "greater than operator",
    "<=": "less than or equal to operator",
    "<": "less than operator",
    "->": "implies",
    "+": "addition operator",
    "-": "subtraction operator",
    "*": "multiplication operator",
    "/": "division operator",
    "in": "membership operator"
}

# Example parsed expression
parsed_output = {
    "Variables/Constants": ["t", "v", "0.000", "0.0361", "omega_t^o", "Q_U_Omega_t", "u_t_bar", "-0.20", "0.22"],
    "Functions": ["U_u", "max"],
    "Operators": ["in", "->", ">=", "*", "+"],
    "Brackets": ["[", "]", "(", ")"],
    "Misc": ["&"]
}

# Initialize the matcher with the shared vocabulary
matcher = Matcher(nlp.vocab)

# Add patterns to the matcher for variables, functions, operators, and brackets
patterns = [
    {"label": "TIME", "pattern": [{"LOWER": "t"}]},
    {"label": "VELOCITY", "pattern": [{"LOWER": "v"}]},
    {"label": "OMEGA", "pattern": [{"LOWER": "omega"}]},
    {"label": "DOMAIN", "pattern": [{"LOWER": "q_u_omega_t"}]},
    {"label": "UTILITY", "pattern": [{"LOWER": "u_t_bar"}]},
    {"label": "UTILITY", "pattern": [{"LOWER": "ū_t"}]},
    {"label": "FUNCTION", "pattern": [{"LOWER": "u_u"}]},
    {"label": "FUNCTION", "pattern": [{"LOWER": "max"}]},
    {"label": "NUMERIC", "pattern": [{"TEXT": {"REGEX": r"^-?\d+\.\d+$"}}]},  # Pattern for decimal numbers
    {"label": "NUMERIC", "pattern": [{"IS_DIGIT": True}]},  # Pattern for integer numbers
    {"label": "BRACKET", "pattern": [{"ORTH": "["}]},  # Pattern for opening square bracket
    {"label": "BRACKET", "pattern": [{"ORTH": "]"}]},  # Pattern for closing square bracket
    {"label": "BRACKET", "pattern": [{"ORTH": "("}]},  # Pattern for opening parenthesis
    {"label": "BRACKET", "pattern": [{"ORTH": ")"}]},  # Pattern for closing parenthesis
    {"label": "OPERATOR", "pattern": [{"ORTH": ">="}]},
    {"label": "OPERATOR", "pattern": [{"ORTH": ">"}]},
    {"label": "OPERATOR", "pattern": [{"ORTH": "<="}]},
    {"label": "OPERATOR", "pattern": [{"ORTH": "<"}]},
    {"label": "OPERATOR", "pattern": [{"ORTH": "+"}]},
    {"label": "OPERATOR", "pattern": [{"ORTH": "-"}]},
    {"label": "OPERATOR", "pattern": [{"ORTH": "*"}]},
    {"label": "OPERATOR", "pattern": [{"ORTH": "/"}]},
    {"label": "OPERATOR", "pattern": [{"ORTH": "->"}]},
    {"label": "OPERATOR", "pattern": [{"ORTH": "in"}]}
]

for pattern in patterns:
    matcher.add(pattern["label"], [pattern["pattern"]])

# Function to identify semantic meaning using spaCy and context
def spacy_identify_role(node, context):
    doc = nlp(node)
    matches = matcher(doc)
    
    # Check for matches with patterns
    for match_id, start, end in matches:
        label = nlp.vocab.strings[match_id]
        return context.get(label.lower(), label.lower())

    # Handle brackets explicitly
    if node in ["[", "]", "(", ")", "{", "}"]:
        return "bracket"

    # General rules based on part-of-speech tagging and dependency parsing
    token = doc[0]
    if token.pos_ == 'NUM' or re.match(r"^-?\d+\.\d+$", node):
        return "numeric constant"
    elif token.pos_ == 'SYM':
        return "symbol"
    elif token.pos_ == 'NOUN' and node in context:
        return context[node]
    elif token.pos_ == 'NOUN':
        return "variable"
    elif token.pos_ == 'VERB' or (len(doc) > 1 and doc[1].text == '('):
        return "function"
    elif token.pos_ == 'ADP':
        return "preposition, indicating a relationship between elements"
    elif token.pos_ == 'PUNCT':
        return "punctuation, used for structure and separation"
    elif token.pos_ == 'AUX':
        return "auxiliary, helping to form a tense, mood, or voice"
    elif token.pos_ == 'CCONJ':
        return "coordinating conjunction, connecting elements of equal rank"
    elif token.pos_ == 'SCONJ':
        return "subordinating conjunction, connecting dependent clauses"
    else:
        return "unidentified element"

# Function to extract semantic meaning
def extract_semantic_meaning(structure, context):
    semantic_rep = {}
    for category, items in structure.items():
        semantic_rep[category] = []
        for node in items:
            semantic_tag = spacy_identify_role(node, context)
            semantic_rep[category].append({"name": node, "meaning": semantic_tag})
    return semantic_rep

# Extract semantic meanings using the defined function and context
semantic_explanations = extract_semantic_meaning(parsed_output, context_dict)

import pprint
pprint.pprint(semantic_explanations)


{'Brackets': [{'meaning': 'bracket', 'name': '['},
              {'meaning': 'bracket', 'name': ']'},
              {'meaning': 'bracket', 'name': '('},
              {'meaning': 'bracket', 'name': ')'}],
 'Functions': [{'meaning': 'function', 'name': 'U_u'},
               {'meaning': 'function', 'name': 'max'}],
 'Misc': [{'meaning': 'coordinating conjunction, connecting elements of equal '
                      'rank',
           'name': '&'}],
 'Operators': [{'meaning': 'operator', 'name': 'in'},
               {'meaning': 'operator', 'name': '->'},
               {'meaning': 'operator', 'name': '>='},
               {'meaning': 'operator', 'name': '*'},
               {'meaning': 'operator', 'name': '+'}],
 'Variables/Constants': [{'meaning': 'time', 'name': 't'},
                         {'meaning': 'velocity', 'name': 'v'},
                         {'meaning': 'numeric', 'name': '0.000'},
                         {'meaning': 'numeric', 'name': '0.0361'},
                        

In [54]:
prompt = f"""
You are an AI model trained to translate mathematical and logical expressions into natural language explanations. Given the semantic output of a mathematical expression, your task is to generate a coherent English explanation. 

The semantic output will include various categories such as Functions, Operators, Variables/Constants, and more. Use the provided information to create a detailed, understandable explanation in plain English.

Here is the semantic output:
{semantic_explanations}

Generate a natural language explanation for this semantic output, making sure to clarify each element's role in the expression.

For example, translate the semantic representation: 
t in [0.000, 0.0361) -> U_u(omega_t^o) >= max(Q_U_Omega_t (-0.20 * t + 0.22), u_t_bar)

into the natural language explanation:  
"At time t, within the range [0.000, 0.0361), the function U_u applied to omega_t^o must yield a value that is greater than or equal to the maximum value between Q_U_Omega_t of (-0.20 * t + 0.22) and the threshold utility u_t_bar."

Please follow a similar structure and level of detail for the provided semantic output.
"""

# Creating Rule based explanations

In [59]:
import re

def generate_rule_based_explanation(semantic_extractions):
    # Start with an empty explanation
    explanation = "Ensures that "

    # Create helper functions to handle specific parts of the explanation
    def handle_functions(semantic_extractions):
        func_explanation = ""
        for func in semantic_extractions['Functions']:
            if func['name'] == "U_u":
                func_explanation += "the utility function U applied to "
            elif func['name'] == "max":
                func_explanation += "the maximum of "
        return func_explanation

    def handle_variables_constants(semantic_extractions):
        var_const_explanation = ""
        for var in semantic_extractions['Variables/Constants']:
            if var['meaning'] == "time":
                var_const_explanation += "time t"
            elif var['meaning'] == "velocity":
                var_const_explanation += "velocity variable"
            elif var['meaning'] == "variable":
                var_const_explanation += f"the variable {var['name']} "
            elif var['meaning'] == "domain":
                var_const_explanation += f"the domain {var['name']} "
            elif var['meaning'] == "utility":
                var_const_explanation += f"a utility threshold {var['name']} "
            elif var['meaning'] == "numeric":
                var_const_explanation += f"the constant value {var['name']} "
        return var_const_explanation

    def handle_operators(semantic_extractions):
        op_explanation = ""
        for op in semantic_extractions['Operators']:
            if op['name'] == ">=":
                op_explanation += "is greater than or equal to "
            elif op['name'] == "->":
                op_explanation += "implies that "
            elif op['name'] == "in":
                op_explanation += "is within the range "
            elif op['name'] == "*":
                op_explanation += "multiplied by "
            elif op['name'] == "+":
                op_explanation += "plus "
        return op_explanation

    # Building the explanation by combining parts
    explanation += handle_functions(semantic_extractions)
    explanation += handle_variables_constants(semantic_extractions)
    explanation += handle_operators(semantic_extractions)

    # Add additional context or wrap-up the sentence if necessary
    explanation += " meets either a calculated statistical value or a specified minimum utility requirement."

    # Return the final explanation
    return explanation

# Example usage with the provided semantic extraction
semantic_extractions = {
    'Brackets': [{'meaning': 'bracket', 'name': '['},
                 {'meaning': 'bracket', 'name': ']'},
                 {'meaning': 'bracket', 'name': '('},
                 {'meaning': 'bracket', 'name': ')'}],
    'Functions': [{'meaning': 'function', 'name': 'U_u'},
                  {'meaning': 'function', 'name': 'max'}],
    'Misc': [{'meaning': 'coordinating conjunction, connecting elements of equal rank', 'name': '&'}],
    'Operators': [{'meaning': 'operator', 'name': 'in'},
                  {'meaning': 'operator', 'name': '->'},
                  {'meaning': 'operator', 'name': '>='},
                  {'meaning': 'operator', 'name': '*'},
                  {'meaning': 'operator', 'name': '+'}],
    'Variables/Constants': [{'meaning': 'time', 'name': 't'},
                            {'meaning': 'velocity', 'name': 'v'},
                            {'meaning': 'numeric', 'name': '0.000'},
                            {'meaning': 'numeric', 'name': '0.0361'},
                            {'meaning': 'variable', 'name': 'omega_t^o'},
                            {'meaning': 'domain', 'name': 'Q_U_Omega_t'},
                            {'meaning': 'utility', 'name': 'u_t_bar'},
                            {'meaning': 'numeric', 'name': '-0.20'},
                            {'meaning': 'numeric', 'name': '0.22'}]
}

# Generate the rule-based explanation
explanation = generate_rule_based_explanation(semantic_extractions)
print(explanation)


Ensures that the utility function U applied to the maximum of time tvelocity variablethe constant value 0.000 the constant value 0.0361 the variable omega_t^o the domain Q_U_Omega_t a utility threshold u_t_bar the constant value -0.20 the constant value 0.22 is within the range implies that is greater than or equal to multiplied by plus  meets either a calculated statistical value or a specified minimum utility requirement.


In [37]:
#evaluate each part of the pipeline

In [38]:
#add a function to compare spacy and llm for semantic extraction