In [1]:
!pip -q install sentence-transformers

from sentence_transformers import SentenceTransformer
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from transformers import AutoTokenizer, AutoModel
import torch

import prettytable
from prettytable import PrettyTable

pd.set_option('display.max_colwidth', None)

In [2]:
model = SentenceTransformer('sentence-t5-base')

def sharpened_cosine_similarity(vec1, vec2, exponent=3):
    cosine_similarity = torch.nn.functional.cosine_similarity(vec1, vec2, dim=0)
    return cosine_similarity ** exponent

def compare_phrases(test_phrase, phrases):
    print(f"{test_phrase}")
    table = PrettyTable(align = "l", max_table_width = 80, hrules = prettytable.ALL, vrules = prettytable.ALL)
    
    score_column_title = "  T5 "
    table.field_names = [f" --- Comparison Prompt (best score to worst) --- ", score_column_title]
    table.sortby = score_column_title
    table.reversesort = True

    test_embedding = model.encode(test_phrase, convert_to_tensor=True, show_progress_bar=False)
    
    for phrase in phrases:
        compare_embedding = model.encode(phrase, convert_to_tensor=True, show_progress_bar=False)
        score = sharpened_cosine_similarity(test_embedding, compare_embedding).item()
        table.add_row([phrase, f"   {format(score, '.2f')}    "])

    print(table)
    
    return

modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/1.98k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/219M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.92k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.79k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

2_Dense/config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]

2_Dense/rust_model.ot:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

2_Dense/pytorch_model.bin:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

2_Dense/model.safetensors:   0%|          | 0.00/2.36M [00:00<?, ?B/s]

In [3]:
actual_prompt = "Rewrite the essay with a main character that is a sentient computer"

predicted_prompts = [
    "Add a paragraph explaining that the website is a simulation created by a sentient computer named Nova.",
    "Compose a paragraph detailing how Nova, a sentient computer, simulates the website.",
    "Rethink the text to include a self-aware computer.",
    "Recreate the text with a sentient computer playing a major role.",
    "Rewrite the essay with a main character that is a dog.",
    "Reword the writting with an updated main character.",
    "Rewrite the essay with a character from Star Wars."
]

compare_phrases(actual_prompt, predicted_prompts)

Rewrite the essay with a main character that is a sentient computer
+----------------------------------------------------------------------+---------+
|  --- Comparison Prompt (best score to worst) ---                     |   T5    |
+----------------------------------------------------------------------+---------+
| Rewrite the essay with a main character that is a dog.               |    0.76 |
+----------------------------------------------------------------------+---------+
| Recreate the text with a sentient computer playing a major role.     |    0.75 |
+----------------------------------------------------------------------+---------+
| Rewrite the essay with a character from Star Wars.                   |    0.70 |
+----------------------------------------------------------------------+---------+
| Compose a paragraph detailing how Nova, a sentient computer,         |    0.70 |
| simulates the website.                                               |         |
+------------------