In [12]:
import spacy

def process_sentence(input_sentence, ground_truth_actions, verbose=False, visualize=False):
    # Load the English language model
    nlp = spacy.load("en_core_web_sm")

    # Process the sentence using spaCy
    doc = nlp(input_sentence)
    detected_verbs = []

    # Print the tokens and their dependencies
    for token in doc:
        if token.pos_ == "VERB":
            detected_verbs.append(token.text)
    if verbose == True:
      print(detected_verbs)
      print(ground_truth_actions)

    if (verbose == True):
      spacy.displacy.serve(doc, style="dep", auto_select_port=True)
    # Compare detected verbs with ground truth actions
    if set(ground_truth_actions).issubset(detected_verbs):
        print("Dependency parser correctly identified the verbs.")
    else:
        print("Dependency parser did not identify the verbs as expected.")


def main():
    # Input sentence
    sentence = "We may store and process your personal data you provide through the usage of the app and through the account creation process solely for the purpose of providing services to you, to improve our service features and other purposes indicated in this Privacy Policy."
    ground_truth_action = ['store', 'process']

    # Process the sentence
    process_sentence(sentence, ground_truth_action, True)


if __name__ == "__main__":
    main()

['store', 'process', 'provide', 'providing', 'improve', 'indicated']
['store', 'process']

Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.
Dependency parser correctly identified the verbs.


# Parser Code with Example Runs

In [43]:
import spacy
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def process_sentence(input_sentence, ground_truth_actions):
    # Load the English language model
    nlp = spacy.load("en_core_web_sm")

    # Process the sentence using spaCy
    doc = nlp(input_sentence)

    # Collect verbs identified by the dependency parser
    detected_verbs = set()
    for token in doc:
        if token.pos_ == "VERB":
            detected_verbs.add(token.text)

    # Retain only the verbs present in ground_truth_actions
    detected_verbs_substituted = [verb if verb in ground_truth_actions else 0 for verb in detected_verbs]

    return detected_verbs, detected_verbs_substituted

def evaluate_performance(ground_truth_actions, detected_verbs):
    # Replace 0s in detected_verbs with empty strings
    detected_verbs_cleaned = [verb if verb != 0 else '' for verb in detected_verbs]

    # Remove empty strings from the list
    detected_verbs_cleaned = [verb for verb in detected_verbs_cleaned if verb != '']

    while (len(detected_verbs_cleaned) != len(ground_truth_actions)):
      detected_verbs_cleaned.append('0')

    detected_verbs_cleaned = sorted(detected_verbs_cleaned)
    ground_truth_actions = sorted(ground_truth_actions)

    # Calculate evaluation metrics
    accuracy = accuracy_score(ground_truth_actions, detected_verbs_cleaned)
    precision = precision_score(ground_truth_actions, detected_verbs_cleaned, average='micro')
    recall = recall_score(ground_truth_actions, detected_verbs_cleaned, average='micro')
    f1 = f1_score(ground_truth_actions, detected_verbs_cleaned, average='micro')

    return accuracy, precision, recall, f1

def evaluate(sentences, ground_truth_actions_list):
  accuracies = []
  precisions = []
  recall_scores = []
  f1_scores = []
  # Evaluate performance for each sentence
  for idx, sentence in enumerate(sentences):
      print(f"\nEvaluation for sentence {idx + 1}:")
      ground_truth_actions = ground_truth_actions_list[idx]

      detected_verbs, detected_verbs_substituted = process_sentence(sentence, ground_truth_actions)

      accuracy, precision, recall, f1 = evaluate_performance(ground_truth_actions, detected_verbs_substituted)

      accuracies.append(accuracy)
      precisions.append(precision)
      recall_scores.append(recall)
      f1_scores.append(f1)

      print(f"Detected Verbs: {detected_verbs}")
      print(f"Detected Verbs with disjointed elements substitution : {detected_verbs_substituted}")
      print(f"Ground Truth Actions: {ground_truth_actions}")
      print(f"Accuracy: {accuracy:.2f}")
      print(f"Precision: {precision:.2f}")
      print(f"Recall: {recall:.2f}")
      print(f"F1-score: {f1:.2f}")
    # Calculate average performance metrics
  avg_accuracy = sum(accuracies) / len(accuracies)
  avg_precision = sum(precisions) / len(precisions)
  avg_recall = sum(recall_scores) / len(recall_scores)
  avg_f1 = sum(f1_scores) / len(f1_scores)

  print("\nConsolidated Performance Metrics:")
  print(f"Average Accuracy: {avg_accuracy:.2f}")
  print(f"Average Precision: {avg_precision:.2f}")
  print(f"Average Recall: {avg_recall:.2f}")
  print(f"Average F1-score: {avg_f1:.2f}")

def main():
    # Ground truth actions for each sentence
    ground_truth_actions_list = [['store', 'process'], ['provide', 'improve'], ['create', 'indicated']]

    # Input sentences
    sentences = [
        "We may store and process your personal data you provide through the usage of the app and through the account creation process solely for the purpose of providing services to you, to improve our service features and other purposes indicated in this Privacy Policy.",
        "The company will provide training to employees to improve their skills.",
        "Please follow the instructions indicated on the screen to create a new account."
    ]
    evaluate(sentences, ground_truth_actions_list)



if __name__ == "__main__":
    main()




Evaluation for sentence 1:
Detected Verbs: {'provide', 'process', 'improve', 'store', 'providing', 'indicated'}
Detected Verbs with disjointed elements substitution : [0, 'process', 0, 'store', 0, 0]
Ground Truth Actions: ['store', 'process']
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-score: 1.00

Evaluation for sentence 2:
Detected Verbs: {'improve', 'provide'}
Detected Verbs with disjointed elements substitution : ['improve', 'provide']
Ground Truth Actions: ['provide', 'improve']
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-score: 1.00

Evaluation for sentence 3:
Detected Verbs: {'create', 'indicated', 'follow'}
Detected Verbs with disjointed elements substitution : ['create', 'indicated', 0]
Ground Truth Actions: ['create', 'indicated']
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-score: 1.00

Consolidated Performance Metrics:
Average Accuracy: 1.00
Average Precision: 1.00
Average Recall: 1.00
Average F1-score: 1.00


# Yunik's Experiments

In [44]:
ground_truth_actions_list = [['store', 'process'],
                             ['provide', 'improve'],
                             ['create', 'indicated'],
                             ['steal', 'info']]

# Input sentences
sentences = [
        "We may store and process your personal data you provide through the usage of the app and through the account creation process solely for the purpose of providing services to you, to improve our service features and other purposes indicated in this Privacy Policy.",
        "The company will provide training to employees to improve their skills.",
        "Please follow the instructions indicated on the screen to create a new account.",
        "We steal your info"
]
evaluate(sentences, ground_truth_actions_list)


Evaluation for sentence 1:
Detected Verbs: {'provide', 'process', 'improve', 'store', 'providing', 'indicated'}
Detected Verbs with disjointed elements substitution : [0, 'process', 0, 'store', 0, 0]
Ground Truth Actions: ['store', 'process']
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-score: 1.00

Evaluation for sentence 2:
Detected Verbs: {'improve', 'provide'}
Detected Verbs with disjointed elements substitution : ['improve', 'provide']
Ground Truth Actions: ['provide', 'improve']
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-score: 1.00

Evaluation for sentence 3:
Detected Verbs: {'create', 'indicated', 'follow'}
Detected Verbs with disjointed elements substitution : ['create', 'indicated', 0]
Ground Truth Actions: ['create', 'indicated']
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-score: 1.00

Evaluation for sentence 4:
Detected Verbs: {'steal'}
Detected Verbs with disjointed elements substitution : ['steal']
Ground Truth Actions: ['steal', 'info']
Accuracy: 0.50
Precision