# **Train and Test LUIS app**

Steps:
- create an Azure LUIS resource
- add the intents and entities
- format the data to be compatible with LUIS
- add the examples
- run a LUIS model training

## Imports and set-up

In [None]:
!pip install azure.cognitiveservices.language.luis

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting azure.cognitiveservices.language.luis
  Downloading azure_cognitiveservices_language_luis-0.7.0-py2.py3-none-any.whl (80 kB)
[K     |████████████████████████████████| 80 kB 3.4 MB/s 
[?25hCollecting msrest>=0.5.0
  Downloading msrest-0.7.1-py3-none-any.whl (85 kB)
[K     |████████████████████████████████| 85 kB 2.2 MB/s 
[?25hCollecting msrestazure<2.0.0,>=0.4.32
  Downloading msrestazure-0.6.4-py2.py3-none-any.whl (40 kB)
[K     |████████████████████████████████| 40 kB 2.6 MB/s 
[?25hCollecting azure-common~=1.1
  Downloading azure_common-1.1.28-py2.py3-none-any.whl (14 kB)
Collecting azure-core>=1.24.0
  Downloading azure_core-1.26.1-py3-none-any.whl (172 kB)
[K     |████████████████████████████████| 172 kB 47.4 MB/s 
Collecting isodate>=0.6.0
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[K     |████████████████████████████████| 41 kB 425 kB/s 
Collecting 

In [None]:
import os
import time
from pathlib import Path

import pandas as pd
from azure.cognitiveservices.language.luis.authoring import LUISAuthoringClient
from azure.cognitiveservices.language.luis.authoring.models import (
    ApplicationCreateObject,
    ExampleLabelObject,
    EntityLabelObject,
)

from msrest.authentication import CognitiveServicesCredentials
from tqdm.notebook import tqdm_notebook as tqdm

In [None]:
AZURE_LUIS_ENDPOINT = \
    "https://flymeluisresource-authoring.cognitiveservices.azure.com/"
AZURE_LUIS_KEY = "8ad33ca9a5fb49589908dcede8f780b5"

AZURE_LUIS_PROJECT_NAME = "flyme-luis-app-colab"
AZURE_LUIS_PROJECT_VERSION = "0.1"

AZURE_APP_ID = ""

DATA_PATH = Path("../data")
FRAMES_JSON_PATH = Path(DATA_PATH, "raw/frames.json")

In [None]:
## Create the LUIS Application

# instanciate LUIS Authoring Client
client = LUISAuthoringClient(
    AZURE_LUIS_ENDPOINT, CognitiveServicesCredentials(AZURE_LUIS_KEY)
)

# define app basics
appDefinition = ApplicationCreateObject(
    name=AZURE_LUIS_PROJECT_NAME,
    culture="en-us",
)

# create app
app_id = client.apps.add(appDefinition)

# get app id - necessary for all other changes
print("Created LUIS app with ID {}".format(app_id))

Created LUIS app with ID ef464365-b28a-4c8c-a752-38ff199adcb5


## Add intents and entities

In [None]:
def add_flyme_intents(client, app_id, version_id):
    book_id = client.model.add_intent(
        app_id=app_id, version_id=version_id, name="Book"
    )
    info_id = client.model.add_intent(
        app_id=app_id, version_id=version_id, name="Info"
    )


def add_flyme_prebuilts(client, app_id, version_id):
    client.model.add_prebuilt(
        app_id=app_id,
        version_id=version_id,
        prebuilt_extractor_names=["geographyV2"],
    )
    client.model.add_prebuilt(
        app_id=app_id,
        version_id=version_id,
        prebuilt_extractor_names=["datetimeV2"],
    )
    client.model.add_prebuilt(
        app_id=app_id,
        version_id=version_id,
        prebuilt_extractor_names=["number"],
    )


def add_flyme_feature_entity(
        client,
        app_id,
        version_id,
        entity_name,
        model_name,
):
    entity_id = client.model.add_entity(
        app_id=app_id, version_id=version_id, name=entity_name
    )
    client.features.add_entity_feature(
        app_id=app_id,
        version_id=version_id,
        entity_id=entity_id,
        feature_relation_create_object={
            "model_name": model_name,
        },
    )


def add_flyme_entities(
        client,
        app_id,
        version_id
):

    add_flyme_feature_entity(
        client,
        app_id,
        version_id,
        "or_city",
        "geographyV2")
    
    add_flyme_feature_entity(
        client,
        app_id,
        version_id,
        "dst_city",
        "geographyV2")
    
    add_flyme_feature_entity(
        client,
        app_id,
        version_id,
        "str_date",
        "datetimeV2")
    
    add_flyme_feature_entity(
        client,
        app_id,
        version_id,
        "end_date",
        "datetimeV2")
    
    add_flyme_feature_entity(
        client,
        app_id,
        version_id,
        "budget",
        "number")

In [None]:
add_flyme_intents(client, app_id, AZURE_LUIS_PROJECT_VERSION)

In [None]:
add_flyme_prebuilts(client, app_id, AZURE_LUIS_PROJECT_VERSION)

In [None]:
add_flyme_entities(client, app_id, AZURE_LUIS_PROJECT_VERSION)

## Format data for LUIS

In [None]:
def format_data_for_luis(
        json_path,
        batch_size,
        app_id,
        version_id
):
    raw_data = pd.read_json(json_path)

    entities = ["or_city", "dst_city", "str_date", "end_date", "budget"]
    examples = []
    unique_utterances = []

    for turn in tqdm(raw_data["turns"]):
        for frame in turn:
            if frame["author"] == "wizard" or frame["text"] in unique_utterances:
                continue

            unique_utterances.append(frame["text"])

            is_book = False
            labels = []

            for act in frame["labels"]["acts_without_refs"]:
                for arg in act["args"]:
                    if arg["key"] == "intent" and arg["val"] == "book":
                        is_book = True

                    if (
                        arg["key"] in entities
                        and arg["val"] is not None
                        and frame["text"].find(arg["val"]) != -1
                    ):
                        labels.append(
                            EntityLabelObject(
                                entity_name=arg["key"],
                                start_char_index=frame["text"].find(arg["val"]),
                                end_char_index=frame["text"].find(arg["val"])
                                + len(arg["val"]),
                            )
                        )

            if len(entities) > 0:
                examples.append(
                    ExampleLabelObject(
                        text=frame["text"],
                        intent_name="Book" if is_book else "Info",
                        entity_labels=labels,
                    )
                )

    # add the examples in batch
    for index in tqdm(range(0, len(examples), batch_size)):
        client.examples.batch(
            app_id=app_id,
            version_id=version_id,
            example_label_object_array=examples[index : index + batch_size],
        )

In [None]:
format_data_for_luis(
    FRAMES_JSON_PATH,
    100,
    app_id,
    AZURE_LUIS_PROJECT_VERSION,
)

  0%|          | 0/1369 [00:00<?, ?it/s]

  0%|          | 0/97 [00:00<?, ?it/s]

## Train data

In [None]:
def train_flyme_data(client, app_id, version_id):
    client.train.train_version(app_id=app_id, version_id=AZURE_LUIS_PROJECT_VERSION)
    waiting = True
    while waiting:
        info = client.train.get_status(app_id=app_id, version_id=AZURE_LUIS_PROJECT_VERSION)

        # get_status returns a list of training statuses, one for each model. Loop through them and make sure all are done.
        waiting = any(
            map(
                lambda x: "Queued" == x.details.status or "InProgress" == x.details.status,
                info,
            )
        )
        if waiting:
            print("Waiting 10 seconds for training to complete...")
            time.sleep(10)
        else:
            print("trained")
            waiting = False

In [None]:
train_flyme_data(
    client,
    app_id,
    AZURE_LUIS_PROJECT_VERSION
)

Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 se

## Test the model

In [None]:
import requests
import json

APP_ID = "5e02b817-af9a-4335-892a-3b9acc444e45"
AZURE_LUIS_PRED_ENDPOINT = \
    "https://flyme-luis-resource.cognitiveservices.azure.com/" + \
    f"luis/prediction/v3.0/apps/{APP_ID}/" + \
    "slots/staging/predict"
AZURE_LUIS_PRED_KEY = "df63ca72a8894564a8bb8602771b30ad"

query = "I want to book a trip from Paris to London for less than $100. " +\
    "I will leave on the first of January 2023 " + \
    "and come back on the 17th of january 2023."

req_url = f"{AZURE_LUIS_PRED_ENDPOINT}" + \
    f"?verbose=true&show-all-intents=true&log=true" + \
    f"&subscription-key={AZURE_LUIS_PRED_KEY}" + \
    f"&query={query}"

print(req_url)

https://flyme-luis-resource.cognitiveservices.azure.com/luis/prediction/v3.0/apps/5e02b817-af9a-4335-892a-3b9acc444e45/slots/staging/predict?verbose=true&show-all-intents=true&log=true&subscription-key=df63ca72a8894564a8bb8602771b30ad&query=I want to book a trip from Paris to London for less than $100. I will leave on the first of January 2023 and come back on the 17th of january 2023.


In [None]:
pred = requests.get(req_url).json()
print(json.dumps(pred, indent=4))

{
    "query": "I want to book a trip from Paris to London for less than $100. I will leave on the first of January 2023 and come back on the 17th of january 2023.",
    "prediction": {
        "topIntent": "Book",
        "intents": {
            "Book": {
                "score": 0.8679513
            },
            "Info": {
                "score": 0.2502006
            },
            "None": {
                "score": 0.00025097784
            }
        },
        "entities": {
            "or_city": [
                "Paris"
            ],
            "geographyV2": [
                {
                    "value": "Paris",
                    "type": "city"
                },
                {
                    "value": "London",
                    "type": "city"
                }
            ],
            "dst_city": [
                "London"
            ],
            "budget": [
                "$100."
            ],
            "number": [
                100,
          