In [73]:
import random
import spacy
from spacy.training import Example

nlp = spacy.load("en_core_web_sm")

test_sentence = "We were on that stand up comedy night last Friday, it was awesome!"
doc = nlp(test_sentence)
print("Entities in '%s'" % test_sentence)

for ent in doc.ents:
	print(ent.label_, ent.text)

Entities in 'We were on that stand up comedy night last Friday, it was awesome!'
DATE last Friday


In [74]:
examples = [
    Example.from_dict(nlp("It's a stand up comedy night in our pub this Friday"), {"entities": [(8, 28, "EVENT")]}),
    Example.from_dict(nlp("Stand up comedy night will take place in our pub this Friday"), {"entities": [(0, 21, "EVENT")]}),
    Example.from_dict(nlp("We are going to visit stand up comedy night in our pub"), {"entities": [(22, 43, "EVENT")]}),
    Example.from_dict(nlp("Last stand up comedy night was great"), {"entities": [(5, 26, "EVENT")]}),
]

In [55]:
text = "Last stand up comedy night was great"
nlp(text[5:26])

stand up comedy night

In [75]:
# teach that "stand up comedy night" is an event

other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
with nlp.disable_pipes(*other_pipes):
	optimizer = nlp.create_optimizer()
	for i in range(20):
		random.shuffle(examples)
		for example in examples:
			nlp.update([example], sgd=optimizer)
ner = nlp.get_pipe("ner")
ner.to_disk("new_ner")

In [76]:
test_sentence = "We were on that stand up comedy night last Friday, it was awesome!"
doc = nlp(test_sentence)
print("Entities in '%s'" % test_sentence)

for ent in doc.ents:
	print(ent.label_, ent.text)

Entities in 'We were on that stand up comedy night last Friday, it was awesome!'
EVENT stand up comedy night
DATE last Friday


In [72]:
import spacy
import random
from spacy.training import Example

nlp = spacy.blank("en")
ner = nlp.add_pipe("ner")
ner.add_label("EVENT")

optimizer = nlp.begin_training()
for i in range(20):
	random.shuffle(examples)
	for example in examples:
		nlp.update([example], sgd=optimizer)

test_sentence = "We were on that stand up comedy night last Friday, it was awesome!"
doc = nlp(test_sentence)
print("Entities in '%s'" % test_sentence)

for ent in doc.ents:
	print(ent.label_, ent.text)
	

Entities in 'We were on that stand up comedy night last Friday, it was awesome!'
EVENT stand up comedy night


In [165]:
import json

with open("events.json", "r") as f:
	data = json.load(f)

train_data = []
for item in data:
	for turn in item["turns"]:
		text = turn["utterance"]
		entities = []
		for frame in turn["frames"]:
			if "state" in frame.keys():
				intent = frame["state"]["active_intent"]
				if(intent == "NONE"): continue
				if intent == "GetEventDates": intent = 0  # 0 is the label for GetEventDates
				elif intent == "BuyEventTickets": intent = 1 # 1 is the label for BuyEventTickets
				entities.append(intent)

		if len(entities) > 0:
			train_data.append((text, {"entities": entities[0]}))

print(train_data)



[("Okay, I'm looking for a list of dates for special events in San Francisco. Would you help pull up a list for me, please?", {'entities': 0}), ("Well, I'm thinking the Giants Vs Dodgers game. I've heard it's really entertaining.", {'entities': 0}), ('Are there other dates listed for the events?', {'entities': 0}), ('Okay, that would be fine. Would you arrange for me to purchase 3 tickets for that event, please?', {'entities': 1}), ('Wonderful. Yes, please confirm. That is exactly what I want. Now, what is the address of the venue? And, at what time will the game start?', {'entities': 1}), ('Fine and dandy. I really appreciate your assistance.', {'entities': 1}), ("I'm interested in finding dates for events.", {'entities': 0}), ('I am looking for events in Anaheim and I head that Brown Sabbath is really good.', {'entities': 0}), ('What events are there on other dates?', {'entities': 0}), ('That sounds good.', {'entities': 0}), ('Yes, I would like to buy tickets.', {'entities': 1}), ('I

In [166]:
from spacy.training import Example
from spacy.pipeline.textcat_multilabel import DEFAULT_MULTI_TEXTCAT_MODEL

if "textcat_multilabel" in nlp.pipe_names:
    nlp.remove_pipe("textcat_multilabel")

config = { "threshold": 0.5, "model": DEFAULT_MULTI_TEXTCAT_MODEL }
textcat = nlp.add_pipe("textcat_multilabel", config=config)
textcat.add_label("event")

1

In [167]:
train_examples = [Example.from_dict(nlp.make_doc(text), {"cats": cats}) for text, cats in train_data]
train_examples[:10]

[{'doc_annotation': {'cats': {'entities': 0}, 'entities': ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], 'spans': {}, 'links': {}}, 'token_annotation': {'ORTH': ['Okay', ',', 'I', "'m", 'looking', 'for', 'a', 'list', 'of', 'dates', 'for', 'special', 'events', 'in', 'San', 'Francisco', '.', 'Would', 'you', 'help', 'pull', 'up', 'a', 'list', 'for', 'me', ',', 'please', '?'], 'SPACY': [False, True, False, True, True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, True, False, True, False, False], 'TAG': ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''], 'LEMMA': ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''], 'POS': ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '

In [179]:

textcat.initialize(lambda: train_examples, nlp=nlp)

with nlp.select_pipes(enable="textcat"):
    optimizer = nlp.resume_training()
    for i in range(30):
        random.shuffle(train_examples)
        for example in train_examples:
            nlp.update([example], sgd=optimizer)

In [182]:
test_sentence_second = "We would like to book tickets for the stand up comedy night"
doc2 = nlp(test_sentence_second)
print("event(1) - BuyEventTickets || event(0) - GetEventDates")
print(doc2.cats)

test_sentence_third = "When is the stand up comedy night?"
doc3 = nlp(test_sentence_third)
print(doc3.cats)

event(1) - BuyEventTickets || event(0) - GetEventDates
{'event': 0.728073239326477, 'entities': 0.4386533200740814}
{'event': 0.08010127395391464, 'entities': 0.8308388590812683}
