In [10]:
from dotenv import load_dotenv
import rich.progress
from llm_utils import return_azure_llm
import pandas as pd
import rich
from ipdb import set_trace as st
from typing import List, Tuple
from typing import List

from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field

import rich

load_dotenv('.env')

True

In [11]:
llm = return_azure_llm()

In [12]:
annotations_df = pd.read_csv('annotations.csv')
project_ids = annotations_df['ProjectID'].unique().tolist()
project_ids

[20, 35, 34, 19]

In [13]:
requirements_df = pd.read_csv('requirements_grouped.csv')
requirements_df.head()

Unnamed: 0,ProjectID,RequirementText
0,17,The system shall provide an option to withdraw...
1,19,The system must be easy to use by both manager...
2,20,All Web pages generated by the system shall be...
3,21,Programmers and application developers will ha...
4,24,The application should be connected to the Int...


### Generate Workflows for each of the SRS documents.

In [14]:
prompt = """
You are an expert business process consultant. 
You must come up with a workflow from the given software requirements that represents the logic of the process. 
You must respond with a list of lists, where each element of the inner list is a pair of nodes.
Do not provide any extra information.
You must use as few words as possible for each node.
Each node must represent one step in the workflow.
The steps in the graph must describe how the system works end to end.
The process must be representative of BPMN notation. 
If you create an end node, then that node should not have any children, ie, nodes that follow it.

For example, if the requirements say: 
'All Web pages generated by the system shall be fully downloadable in no more than 10 seconds over a 40KBps 
modem connection.', 'Responses to queries shall take no longer than 7 seconds to load onto the screen after the user submits the 
query.', 'The system shall display confirmation messages to users within 4 seconds after the user submits information to
the system.', 'All network transactions that involve financial information or personally identifiable information shall be 
encrypted.', 'Users shall be required to log in to the Cafeteria Ordering System for all operations except viewing a menu.', 'Patrons shall log in according to the restricted computer system access policy.', 'The system shall permit only cafeteria staff members who are on the list of authorized Menu Managers to create or edit menus.', 'Only users who have been authorized for home access to the corporate Intranet may use the COS from non-company locations.'

Then you would respond with: 
    [["Start", "Access System"],
    ["Access System", "View Menu"],
    ["View Menu", "Exclusive Gateway: Need to log in?"],
    ["Exclusive Gateway: Need to log in?", "User Login"],
    ["Exclusive Gateway: Need to log in?", "End"],
    ["User Login", "Verify Credentials"],
    ["Verify Credentials", "Exclusive Gateway: Company network?"],
    ["Exclusive Gateway: Company network?", "Successful Login and Redirection"],
    ["Exclusive Gateway: Company network?", "Home Access Authorization"],
    ["Home Access Authorization", "Exclusive Gateway: Authorized?"],
    ["Exclusive Gateway: Authorized?", "Successful Login and Redirection"],
    ["Exclusive Gateway: Authorized?", "End: Access Denied"],
    ["Successful Login and Redirection", "Exclusive Gateway: User role?"],
    ["Exclusive Gateway: User role?", "Menu Management"],
    ["Exclusive Gateway: User role?", "Order Placement"],
    ["Menu Management", "Save Changes"],
    ["Save Changes", "Display Confirmation"],
    ["Display Confirmation", "End"],
    ["Order Placement", "Submit Order"],
    ["Submit Order", "Encrypt Transactions"],
    ["Encrypt Transactions", "Process Order"],
    ["Process Order", "Display Confirmation"],
    ["Display Confirmation", "End"],
    ["Successful Login and Redirection", "End"],
    ["Order Placement", "End"],
    ["View Menu", "End"],
    ["Submit Query", "Process Query"],
    ["Process Query", "Load Query Results"],
    ["Load Query Results", "End"],
    ["Log Out", "Ensure Secure Termination"],
    ["Ensure Secure Termination", "End"]]

Now you must write out the process for:
{requirements}
"""

In [15]:
prompt = PromptTemplate(
    template=prompt,
    input_variables=["requirements"])

chain = prompt | llm  

In [30]:
output_df = None
for project in project_ids[:2]:
    requirements = " ".join(requirements_df.loc[requirements_df['ProjectID'] == project, 'RequirementText'])
    resp = chain.invoke({"requirements": requirements})
    lists = eval(resp.content)
    temp_df = pd.DataFrame(lists, columns=['from_node', 'to_node'])
    temp_df['ProjectID'] = project
    temp_df = temp_df[['ProjectID', 'from_node', 'to_node']]
    if output_df is None:
        output_df = temp_df
    else:
        output_df = pd.concat([output_df, temp_df], axis=0)

output_df.to_csv("generated_graphs.csv", index=False)

In [32]:
output_df = pd.read_csv("generated_graphs.csv")

In [31]:
output_df.head()

Unnamed: 0,ProjectID,from_node,to_node
0,20,Start,Access System
1,20,Access System,View Menu
2,20,View Menu,Exclusive Gateway: Need to log in?
3,20,Exclusive Gateway: Need to log in?,User Login
4,20,Exclusive Gateway: Need to log in?,End


### Evaluation
Compare the ground truth workflow against the generated workflows. 

In [None]:
def calculate_node_metrics(ground_truth_nodes, generated_nodes):
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    
    # Convert node lists to sets for easier comparison
    ground_truth_set = set(tuple(node) for node in ground_truth_nodes)
    generated_set = set(tuple(node) for node in generated_nodes)
    
    # True Positives: Nodes present in both ground truth and generated set
    true_positives = len(ground_truth_set.intersection(generated_set))
    
    # False Positives: Nodes present in generated set but not in ground truth set
    false_positives = len(generated_set - ground_truth_set)
    
    # False Negatives: Nodes present in ground truth set but not in generated set
    false_negatives = len(ground_truth_set - generated_set)
    
    return true_positives, false_positives, false_negatives

In [None]:
import pickle

def dump_cache(cache):
    with open('cache.pkl', 'wb') as f:
        pickle.dump(cache, f)

def retrieve_cache():
    try:
        with open('cache.pkl', 'rb') as f:
            loaded_cache = pickle.load(f)
    except:
        print("Not loading cache as it does not exist")
        return {}
    return loaded_cache

In [None]:
from tqdm import tqdm

In [None]:
columns = ['ProjectID', 'TP', 'FP', 'FN', 'TP with Semantic', 'FP with Semantic', 'FN with Semantic']
metrics_df = pd.DataFrame(columns=columns)
cache = retrieve_cache()

for project_id in rich.progress.track(project_ids[:1]):

    results = {'ProjectID': project_id, 'TP': 0, 'FP': 0, 'FN': 0, 'TP with Semantic': 0, 'FP with Semantic': 0, 'FN with Semantic': 0}

    #retrieve the ground truth and generated nodes to compare against each other
    generated_nodes = output_df.loc[output_df['ProjectID']==project_id, ['from_node', 'to_node']].values.tolist()
    ground_truth_nodes = annotations_df.loc[annotations_df['ProjectID']==project_id, ['Step', 'Next Step']].values.tolist()

    #calculate TP, FP, FN for each project WITHOUT semantic comparison
    results['TP'], results['FP'], results['FN'] = calculate_node_metrics(ground_truth_nodes, generated_nodes)

    #calculate TP, FP, FN for each project WITH semantic comparison
    results['TP with Semantic'], results['FP with Semantic'], results['FN with Semantic'], cache = calculate_node_metrics_semantic(ground_truth_nodes, generated_nodes, embedding_function, cache)

    #calculate the edge scores for each project WITHOUT semantic comparison

    #calculate the edge scores for each project WITH semantic comparison

    #append all data to metric df
    metrics_df = pd.concat((metrics_df, pd.DataFrame([results])), axis = 0)


Output()

In [None]:
metrics_df

Unnamed: 0,ProjectID,TP,FP,FN,TP with Semantic,FP with Semantic,FN with Semantic
0,20,30,0,0,0,0,0
0,35,11,5,5,0,0,0
0,34,0,0,21,0,0,0
0,19,0,0,13,0,0,0


In [None]:
from langchain_openai import AzureOpenAIEmbeddings
import os
from dotenv import load_dotenv
load_dotenv('.env')

embedding_function = AzureOpenAIEmbeddings(          
            openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
            deployment=os.getenv('EMBEDDING_DEPLOYMENT_NAME'),
            model=os.getenv('EMBEDDING_MODEL'),
            openai_api_version=os.getenv('AZURE_OPENAI_VERSION'),
            azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT'),
            chunk_size=1) 

embedding_function = embedding_function.embed_query

In [None]:
def check_cache(query, cache):
    if query in cache:
        print('in the cache!')
        return cache[query]
    else:
        return None

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def calculate_node_metrics_semantic(ground_truth_nodes, generated_nodes, embed_query, cache):
    true_positives = 0
    false_positives = 0
    false_negatives = 0
    
    # Function to compute semantic similarity between two node descriptions
    def compute_similarity(node1, node2, cache):

        embedding1 = check_cache(node1, cache)
        if embedding1 is None:
            embedding1 = embed_query(node1)
            cache[node1] = embedding1

        embedding2 = check_cache(node2, cache)
        if embedding2 is None:
            embedding2 = embed_query(node2)
            cache[node2] = embedding2
        
        # Compute the cosine similarity between the two embeddings
        similarity_score = cosine_similarity([embedding1], [embedding2], cache)[0, 0]
        return similarity_score, cache
    
    # Convert node lists to sets for easier comparison
    ground_truth_set = set(tuple(node) for node in ground_truth_nodes)
    generated_set = set(tuple(node) for node in generated_nodes)
    
    # Loop through generated nodes and find matches in ground truth using semantic similarity
    for gen_node in generated_set:
        match_found = False
        for gt_node in ground_truth_set:
            similarity_score = compute_similarity(gen_node[0], gt_node[0], cache)
            # You can adjust this threshold as needed
            if similarity_score > 0.8:  # Example threshold
                true_positives += 1
                match_found = True
                break
        if not match_found:
            false_positives += 1
    
    # Count false negatives (ground truth nodes not matched by generated nodes)
    for gt_node in ground_truth_set:
        match_found = False
        for gen_node in generated_set:
            similarity_score = compute_similarity(gen_node[0], gt_node[0], cache)
            if similarity_score > 0.8:  # Example threshold
                match_found = True
                break
        if not match_found:
            false_negatives += 1
    
    return true_positives, false_positives, false_negatives, cache