Import the necessary libraries

In [None]:
!pip install torch torchvision transformers==2.10.0 rasa==1.10.0 input_reader

Collecting transformers==2.10.0
[?25l  Downloading https://files.pythonhosted.org/packages/12/b5/ac41e3e95205ebf53439e4dd087c58e9fd371fd8e3724f2b9b4cdb8282e5/transformers-2.10.0-py3-none-any.whl (660kB)
[K     |████████████████████████████████| 665kB 4.6MB/s 
[?25hCollecting rasa==1.10.0
[?25l  Downloading https://files.pythonhosted.org/packages/50/aa/e55644c4ae80837f9f6b6fdbe61aa6c6cfff3d2fb1c5fe3d3641fafba31c/rasa-1.10.0-py3-none-any.whl (509kB)
[K     |████████████████████████████████| 512kB 10.8MB/s 
[?25hCollecting input_reader
[?25l  Downloading https://files.pythonhosted.org/packages/5e/c9/f02db78bdf9d73be52f6a28d79b3a4fcd9610649a725159781850e37973d/input_reader-1.2.2.tar.gz (99kB)
[K     |████████████████████████████████| 102kB 8.7MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 17.0MB

In [2]:
import ipywidgets as widgets
import requests, os
from IPython.display import display
from ipywidgets import interact

from rasa.nlu.training_data import TrainingData,Message

Download the model

In [3]:
### Download model

#taken from this StackOverflow answer: https://stackoverflow.com/a/39225039

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

model_class_file_id = '1N1kn2b7i2ND7eNefzyJM-k13IM8tqZvr'
checkpoint_file_id = '1G0nwXlvzGsb8Ar-OAnYBQKFvY97WMzBy'
model_class_destination = 'model.py'
checkpoint_destination = 'model.zip'
checkpoint_unzipped_destination = 'package_models'

if not os.path.exists(checkpoint_unzipped_destination):
  download_file_from_google_drive(checkpoint_file_id, checkpoint_destination)
  !unzip {checkpoint_destination}

if not os.path.exists(model_class_destination):
  download_file_from_google_drive(model_class_file_id, model_class_destination)

Archive:  model.zip
   creating: package_models/
  inflating: package_models/.DS_Store  
   creating: __MACOSX/
   creating: __MACOSX/package_models/
  inflating: __MACOSX/package_models/._.DS_Store  
   creating: package_models/lm_finetune_8/
   creating: package_models/lm_finetune_8/checkpoint-56000/
  inflating: package_models/lm_finetune_8/checkpoint-56000/added_tokens.json  
  inflating: package_models/lm_finetune_8/checkpoint-56000/tokenizer_config.json  
  inflating: package_models/lm_finetune_8/checkpoint-56000/special_tokens_map.json  
  inflating: package_models/lm_finetune_8/checkpoint-56000/optimizer.pt  
  inflating: package_models/lm_finetune_8/checkpoint-56000/config.json  
  inflating: package_models/lm_finetune_8/checkpoint-56000/scheduler.pt  
  inflating: package_models/lm_finetune_8/checkpoint-56000/merges.txt  
  inflating: package_models/lm_finetune_8/checkpoint-56000/training_args.bin  
  inflating: package_models/lm_finetune_8/checkpoint-56000/pytorch_model.bin 

In [4]:
from model import ParaphraseModel
model_path = 'package_models/lm_finetune_8/checkpoint-56000/'

complete_td = TrainingData()
model = ParaphraseModel(model_path)

Generate paraphrases, intent and training data

In [5]:
input_phrase = input("Enter a message for which you would like to generate paraphrases: ")

Enter a message for which you would like to generate paraphrases: I want to book an appointment for tomorrow


In [6]:
number_samples = int(input("Number of paraphrases to generate: "))
stop_words = input("Stop words to be constrained with(multiple semi-colon separated): ")

Number of paraphrases to generate: 5
Stop words to be constrained with(multiple semi-colon separated): 3


In [7]:
paraphrases = model.get_paraphrases(input_phrase, number_samples, stop_words)

100%|██████████| 20/20 [00:00<00:00, 33.72it/s]


In [8]:
print("Steps:\n1. Read all proposed paraphrases below.\n2. Select valid paraphrases that you would like\
 to add to your NLU training data. Use Ctrl/Cmd + Click to select multiple.\n\
3. Enter the name of the intent under which these messages should be categorized\n\
4. Click 'Add to training data'\n\
5. Copy the training data displayed in Rasa Markdown format to your existing training data file.\n\
6. You can go back to 3 cells above this to enter new messages for which you want to generate paraphrases.")

paraphrase_widget = widgets.SelectMultiple(
    options=paraphrases,
    value=[],
    rows=number_samples,
    description='Paraphrases',
    disabled=False,
    layout= widgets.Layout(width='100%')
)
display(paraphrase_widget)

intent = widgets.Text(description="Intent")
display(intent)

button = widgets.Button(description="Add to Training Data")
output = widgets.Output()

display(button, output)

def on_button_clicked(b):
    
    global complete_td
    
    with output:
        intent_value = intent.value
        selected_paraphrases = paraphrase_widget.value
        
        if not len(selected_paraphrases):
            print("Error: You haven't selected any paraphrases")
            return
        if not intent_value:
            print("Error: Please enter the intent name under which these messages should be categorized.")
            return
        
        all_messages = [Message.build(text=input_phrase, intent=intent_value)]
        for paraphrase in selected_paraphrases:
            all_messages.append(Message.build(text=paraphrase,intent=intent_value))
            
        complete_td = complete_td.merge(TrainingData(training_examples=all_messages))
        
        print(complete_td.nlu_as_markdown())

button.on_click(on_button_clicked)

Steps:
1. Read all proposed paraphrases below.
2. Select valid paraphrases that you would like to add to your NLU training data. Use Ctrl/Cmd + Click to select multiple.
3. Enter the name of the intent under which these messages should be categorized
4. Click 'Add to training data'
5. Copy the training data displayed in Rasa Markdown format to your existing training data file.
6. You can go back to 3 cells above this to enter new messages for which you want to generate paraphrases.


SelectMultiple(description='Paraphrases', layout=Layout(width='100%'), options=('i want to book tomorrow.', 'i…

Text(value='', description='Intent')

Button(description='Add to Training Data', style=ButtonStyle())

Output()