CELL 1: Setup & Imports

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install required libraries
!pip install -q transformers torch seaborn matplotlib safetensors datasets scikit-learn

# Imports
import torch
import re
import json
import numpy as np
import pandas as pd
from datasets import load_from_disk, DatasetDict, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForTokenClassification,
    TrainingArguments,
    Trainer,
    DataCollatorForTokenClassification
)
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

# Setup paths and device
drivepath = "/content/drive/MyDrive/intent_project"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Device: {device}")
print(f"Drive path: {drivepath}")


Mounted at /content/drive
Device: cuda
Drive path: /content/drive/MyDrive/intent_project


CELL 2: Load Raw Data

In [None]:
# Load original annotated data
print("=" * 70)
print("LOADING RAW DATA")
print("=" * 70)

raw_train = load_from_disk(f"{drivepath}/train")
raw_val = load_from_disk(f"{drivepath}/validation")
raw_test = load_from_disk(f"{drivepath}/test")

print(f"\n✓ Train samples: {len(raw_train)}")
print(f"✓ Validation samples: {len(raw_val)}")
print(f"✓ Test samples: {len(raw_test)}")

# Check format
print(f"\nSample train record:")
print(f"  utt: {raw_train[0]['utt']}")
print(f"  annot_utt: {raw_train[0]['annot_utt']}")
print(f"  locale: {raw_train[0]['locale']}")


LOADING RAW DATA

✓ Train samples: 230280
✓ Validation samples: 40660
✓ Test samples: 59480

Sample train record:
  utt: wake me up at nine am on friday
  annot_utt: wake me up at [time : nine am] on [date : friday]
  locale: en-US



CELL 3: Convert Bracket Format to Plain Text NER

In [None]:
print("=" * 70)
print("CONVERTING TO PLAIN TEXT NER FORMAT")
print("=" * 70)

def convert_to_plain_ner_format(dataset, dataset_name=""):
    """
    Convert from bracket format [slot_type : slot_value] to plain text with BIO labels
    """
    plain_utts = []
    bio_labels = []
    locales = []

    for idx, sample in enumerate(dataset):
        annot_utt = sample['annot_utt']
        utt = sample['utt']
        locale = sample['locale']

        # Extract slots from annotation using regex
        pattern = r'\[([^\]]+?)\s*:\s*([^\]]+?)\]'
        slots = []

        for match in re.finditer(pattern, annot_utt):
            slot_type = match.group(1).strip()
            slot_value = match.group(2).strip()

            # Find position in plain utterance
            slot_pos = utt.lower().find(slot_value.lower())
            if slot_pos != -1:
                slots.append({
                    'type': slot_type,
                    'value': slot_value,
                    'start': slot_pos,
                    'end': slot_pos + len(slot_value)
                })

        # Create BIO labels for each word
        words = utt.split()
        labels = ['O'] * len(words)

        # Map words to positions
        word_positions = []
        current_pos = 0
        for word in words:
            word_start = utt.find(word, current_pos)
            word_end = word_start + len(word)
            word_positions.append((word_start, word_end))
            current_pos = word_end

        # Assign labels based on slot overlap
        for word_idx, (word_start, word_end) in enumerate(word_positions):
            for slot in slots:
                # Check if word overlaps with slot
                if word_start < slot['end'] and word_end > slot['start']:
                    # Check if this is the first word of the slot
                    if word_start <= slot['start']:
                        labels[word_idx] = f"B-{slot['type']}"
                    else:
                        labels[word_idx] = f"I-{slot['type']}"
                    break  # Assign only one label per word

        plain_utts.append(utt)
        bio_labels.append(labels)
        locales.append(locale)

        if (idx + 1) % 5000 == 0:
            print(f"  Processed {idx + 1} samples...")

    return plain_utts, bio_labels, locales


# Convert all splits
print("\nConverting train set...")
train_plain, train_labels, train_locales = convert_to_plain_ner_format(raw_train, "train")

print("Converting validation set...")
val_plain, val_labels, val_locales = convert_to_plain_ner_format(raw_val, "validation")

print("Converting test set...")
test_plain, test_labels, test_locales = convert_to_plain_ner_format(raw_test, "test")

print(f"\n✓ Conversion complete!")
print(f"  Train: {len(train_plain)} samples")
print(f"  Val: {len(val_plain)} samples")
print(f"  Test: {len(test_plain)} samples")

# Show examples
print(f"\nConversion examples:")
for i in range(3):
    print(f"\n  Example {i+1}:")
    print(f"    Original: {raw_train[i]['utt']}")
    print(f"    Annotated: {raw_train[i]['annot_utt']}")
    print(f"    Words: {train_plain[i].split()}")
    print(f"    Labels: {train_labels[i]}")


CONVERTING TO PLAIN TEXT NER FORMAT

Converting train set...
  Processed 5000 samples...
  Processed 10000 samples...
  Processed 15000 samples...
  Processed 20000 samples...
  Processed 25000 samples...
  Processed 30000 samples...
  Processed 35000 samples...
  Processed 40000 samples...
  Processed 45000 samples...
  Processed 50000 samples...
  Processed 55000 samples...
  Processed 60000 samples...
  Processed 65000 samples...
  Processed 70000 samples...
  Processed 75000 samples...
  Processed 80000 samples...
  Processed 85000 samples...
  Processed 90000 samples...
  Processed 95000 samples...
  Processed 100000 samples...
  Processed 105000 samples...
  Processed 110000 samples...
  Processed 115000 samples...
  Processed 120000 samples...
  Processed 125000 samples...
  Processed 130000 samples...
  Processed 135000 samples...
  Processed 140000 samples...
  Processed 145000 samples...
  Processed 150000 samples...
  Processed 155000 samples...
  Processed 160000 samples...

CELL 4: Create and Save NER Datasets

In [None]:
print("=" * 70)
print("CREATING NER DATASETS")
print("=" * 70)

# Create datasets
train_dataset = Dataset.from_dict({
    'tokens': [[w for w in utt.split()] for utt in train_plain],
    'ner_tags_str': train_labels,
    'locale': train_locales
})

val_dataset = Dataset.from_dict({
    'tokens': [[w for w in utt.split()] for utt in val_plain],
    'ner_tags_str': val_labels,
    'locale': val_locales
})

test_dataset = Dataset.from_dict({
    'tokens': [[w for w in utt.split()] for utt in test_plain],
    'ner_tags_str': test_labels,
    'locale': test_locales
})

# Save
ner_dataset = DatasetDict({
    'train': train_dataset,
    'validation': val_dataset,
    'test': test_dataset
})

ner_dataset.save_to_disk(f"{drivepath}/ner_dataset_plain")

print(f"\n✓ Datasets saved to {drivepath}/ner_dataset_plain")
print(f"  Train: {len(train_dataset)}")
print(f"  Val: {len(val_dataset)}")
print(f"  Test: {len(test_dataset)}")

# Create label mapping
all_labels = set()
for labels in train_labels:
    all_labels.update(labels)

label2id = {label: idx for idx, label in enumerate(sorted(all_labels))}
id2label = {idx: label for label, idx in label2id.items()}

print(f"\nLabel mapping ({len(label2id)} labels):")
for label, idx in sorted(label2id.items()):
    print(f"  {label}: {idx}")

# Save label mappings
with open(f"{drivepath}/ner_dataset_plain/label2id.json", 'w') as f:
    json.dump(label2id, f)
with open(f"{drivepath}/ner_dataset_plain/id2label.json", 'w') as f:
    json.dump(id2label, f)

print(f"\n✓ Label mappings saved!")


CREATING NER DATASETS


Saving the dataset (0/1 shards):   0%|          | 0/230280 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/40660 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/59480 [00:00<?, ? examples/s]


✓ Datasets saved to /content/drive/MyDrive/intent_project/ner_dataset_plain
  Train: 230280
  Val: 40660
  Test: 59480

Label mapping (111 labels):
  B-alarm_type: 0
  B-app_name: 1
  B-artist_name: 2
  B-audiobook_author: 3
  B-audiobook_name: 4
  B-business_name: 5
  B-business_type: 6
  B-change_amount: 7
  B-coffee_type: 8
  B-color_type: 9
  B-cooking_type: 10
  B-currency_name: 11
  B-date: 12
  B-definition_word: 13
  B-device_type: 14
  B-drink_type: 15
  B-email_address: 16
  B-email_folder: 17
  B-event_name: 18
  B-food_type: 19
  B-game_name: 20
  B-game_type: 21
  B-general_frequency: 22
  B-house_place: 23
  B-ingredient: 24
  B-joke_type: 25
  B-list_name: 26
  B-meal_type: 27
  B-media_type: 28
  B-movie_name: 29
  B-movie_type: 30
  B-music_album: 31
  B-music_descriptor: 32
  B-music_genre: 33
  B-news_topic: 34
  B-order_type: 35
  B-person: 36
  B-personal_info: 37
  B-place_name: 38
  B-player_setting: 39
  B-playlist_name: 40
  B-podcast_descriptor: 41
  B-podcas

CELL 5: Load Tokenizer and Create Label IDs

In [None]:
print("=" * 70)
print("TOKENIZER & LABEL SETUP")
print("=" * 70)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")

# Load NER dataset
ner_dataset = load_from_disk(f"{drivepath}/ner_dataset_plain")

# Load label mappings
with open(f"{drivepath}/ner_dataset_plain/label2id.json", 'r') as f:
    label2id = json.load(f)
with open(f"{drivepath}/ner_dataset_plain/id2label.json", 'r') as f:
    id2label = json.load(f)

print(f"✓ Tokenizer loaded: xlm-roberta-base")
print(f"✓ Labels loaded: {len(label2id)} unique labels")
print(f"✓ Dataset loaded with splits: {list(ner_dataset.keys())}")


TOKENIZER & LABEL SETUP


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

✓ Tokenizer loaded: xlm-roberta-base
✓ Labels loaded: 111 unique labels
✓ Dataset loaded with splits: ['train', 'validation', 'test']



CELL 6: Tokenize and Align Labels

In [None]:
print("=" * 70)
print("TOKENIZING AND ALIGNING LABELS")
print("=" * 70)

def tokenize_and_align_labels(examples):
    """Align BIO labels with XLM-RoBERTa subword tokens"""
    tokenized_inputs = tokenizer(
        examples['tokens'],
        truncation=True,
        is_split_into_words=True,
        max_length=128,
        padding='max_length',
        return_overflowing_tokens=False
    )

    labels = []
    for i, label in enumerate(examples['ner_tags_str']):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        label_ids = []

        previous_word_idx = None
        for word_idx in word_ids:
            if word_idx is None:
                # Special tokens ([CLS], [SEP], [PAD])
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                # New word, use label as-is
                label_ids.append(label2id.get(label[word_idx], label2id.get('O', 0)))
            else:
                # Continuation of same word (subword token)
                # Convert B- to I- for consistency
                current_label = label[word_idx]
                if current_label.startswith('B-'):
                    current_label = 'I-' + current_label[2:]
                label_ids.append(label2id.get(current_label, label2id.get('O', 0)))

            previous_word_idx = word_idx

        labels.append(label_ids)

    tokenized_inputs['labels'] = labels
    return tokenized_inputs

# Tokenize all splits
print("Tokenizing train set...")
tokenized_train = ner_dataset['train'].map(
    tokenize_and_align_labels,
    batched=True,
    batch_size=32,
    remove_columns=ner_dataset['train'].column_names
)

print("Tokenizing validation set...")
tokenized_val = ner_dataset['validation'].map(
    tokenize_and_align_labels,
    batched=True,
    batch_size=32,
    remove_columns=ner_dataset['validation'].column_names
)

print("Tokenizing test set...")
tokenized_test = ner_dataset['test'].map(
    tokenize_and_align_labels,
    batched=True,
    batch_size=32,
    remove_columns=ner_dataset['test'].column_names
)

tokenized_ner = DatasetDict({
    'train': tokenized_train,
    'validation': tokenized_val,
    'test': tokenized_test
})

tokenized_ner.save_to_disk(f"{drivepath}/tokenized_ner_dataset")

print(f"\n✓ Tokenization complete!")
print(f"  Train: {len(tokenized_train)}")
print(f"  Val: {len(tokenized_val)}")
print(f"  Test: {len(tokenized_test)}")


TOKENIZING AND ALIGNING LABELS
Tokenizing train set...
Tokenizing validation set...


Map:   0%|          | 0/40660 [00:00<?, ? examples/s]

Tokenizing test set...


Saving the dataset (0/1 shards):   0%|          | 0/230280 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/40660 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/59480 [00:00<?, ? examples/s]


✓ Tokenization complete!
  Train: 230280
  Val: 40660
  Test: 59480


CELL 7: Define Metrics Function

In [None]:
print("=" * 70)
print("SETTING UP METRICS")
print("=" * 70)

def compute_metrics(eval_pred):
    """Compute accuracy, precision, recall, F1 for NER task"""
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    # Remove padding tokens
    true_preds = []
    true_labels = []

    for pred, label in zip(predictions, labels):
        for pred_id, label_id in zip(pred, label):
            if label_id != -100:  # Not a padding token
                true_preds.append(pred_id)
                true_labels.append(label_id)

    precision, recall, f1, _ = precision_recall_fscore_support(
        true_labels, true_preds, average='weighted', zero_division=0
    )
    accuracy = accuracy_score(true_labels, true_preds)

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

print("✓ Metrics function ready!")


SETTING UP METRICS
✓ Metrics function ready!


In [None]:
print("=" * 70)
print("SETTING UP METRICS")
print("=" * 70)

def compute_metrics(eval_pred):
    """Compute accuracy, precision, recall, F1 for NER task with CRF"""
    logits, labels = eval_pred

    # CRF predictions are already 1D arrays of label IDs
    predictions = np.array(logits)  # Already predictions, not logits

    # Flatten and remove padding tokens
    true_preds = []
    true_labels = []

    # Handle both 1D and 2D predictions
    if predictions.ndim == 1:
        # Already flattened (CRF case)
        for pred_id, label_id in zip(predictions, labels.flatten()):
            if label_id != -100:
                true_preds.append(pred_id)
                true_labels.append(label_id)
    else:
        # 2D array (standard case)
        for pred, label in zip(predictions, labels):
            for pred_id, label_id in zip(pred, label):
                if label_id != -100:
                    true_preds.append(pred_id)
                    true_labels.append(label_id)

    if len(true_preds) == 0 or len(true_labels) == 0:
        return {
            'accuracy': 0.0,
            'precision': 0.0,
            'recall': 0.0,
            'f1': 0.0
        }

    precision, recall, f1, _ = precision_recall_fscore_support(
        true_labels, true_preds, average='weighted', zero_division=0
    )
    accuracy = accuracy_score(true_labels, true_preds)

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

print("✓ Metrics function ready (CRF-compatible)!")


SETTING UP METRICS
✓ Metrics function ready (CRF-compatible)!


CELL 8: Initialize Model

In [None]:
print("=" * 70)
print("INITIALIZING MODEL")
print("=" * 70)

model = AutoModelForTokenClassification.from_pretrained(
    "xlm-roberta-base",
    num_labels=len(label2id),
    id2label=id2label,
    label2id=label2id
)

print(f"✓ Model initialized with {len(label2id)} labels")
print(f"  Model: xlm-roberta-base")
print(f"  Device: {device}")

# Move to device
model.to(device)


INITIALIZING MODEL


model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✓ Model initialized with 111 labels
  Model: xlm-roberta-base
  Device: cuda


XLMRobertaForTokenClassification(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768

CELL 9: Training Arguments

In [None]:
print("=" * 70)
print("SETTING UP TRAINING ARGUMENTS")
print("=" * 70)

training_args = TrainingArguments(
    output_dir=f"{drivepath}/slot_filling_model_plaintext/checkpoint",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    save_total_limit=3,
    logging_steps=500,
    report_to="none",
)

print(f"✓ Training arguments configured")
print(f"  Output dir: {training_args.output_dir}")
print(f"  Batch size: {training_args.per_device_train_batch_size}")
print(f"  Epochs: {training_args.num_train_epochs}")
print(f"  Learning rate: {training_args.learning_rate}")


SETTING UP TRAINING ARGUMENTS
✓ Training arguments configured
  Output dir: /content/drive/MyDrive/intent_project/slot_filling_model_plaintext/checkpoint
  Batch size: 32
  Epochs: 5
  Learning rate: 2e-05


In [None]:
print("=" * 70)
print("SETTING UP TRAINING ARGUMENTS (OPTIMIZED)")
print("=" * 70)

training_args = TrainingArguments(
    output_dir=f"{drivepath}/slot_filling_model_crf/checkpoint",
    eval_strategy="steps",  # Changed from "epoch"
    eval_steps=500,               # Evaluate every 500 steps instead of every epoch
    save_strategy="steps",        # Save based on steps
    save_steps=500,
    learning_rate=2e-5,
    per_device_train_batch_size=64,  # Increased from 32
    per_device_eval_batch_size=64,   # Increased from 32
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    save_total_limit=2,  # Keep only 2 checkpoints
    logging_steps=100,   # Log every 100 steps
    report_to="none",
    dataloader_num_workers=4,  # Parallel data loading
    dataloader_pin_memory=True,
    optim="adamw_8bit",  # Memory efficient
)

print(f"✓ Optimized training arguments configured")
print(f"  Batch size: {training_args.per_device_train_batch_size} (increased)")
print(f"  Eval strategy: steps (not epoch)")
print(f"  Eval every: 500 steps")
print(f"  Optimizer: adamw_8bit (memory efficient)")


SETTING UP TRAINING ARGUMENTS (OPTIMIZED)
✓ Optimized training arguments configured
  Batch size: 64 (increased)
  Eval strategy: steps (not epoch)
  Eval every: 500 steps
  Optimizer: adamw_8bit (memory efficient)


CELL 10: Create Trainer and Train

In [None]:
print("=" * 70)
print("TRAINING NER MODEL ON PLAIN TEXT")
print("=" * 70)

data_collator = DataCollatorForTokenClassification(tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ner["train"],
    eval_dataset=tokenized_ner["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# Train
training_result = trainer.train()

print(f"\n✓ Training complete!")
print(f"  Training loss: {training_result.training_loss:.4f}")

# Save final model
model.save_pretrained(f"{drivepath}/slot_filling_model_plaintext/final_model")
tokenizer.save_pretrained(f"{drivepath}/slot_filling_model_plaintext/final_model")

print(f"✓ Model saved to {drivepath}/slot_filling_model_plaintext/final_model")


TRAINING NER MODEL ON PLAIN TEXT


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.33,0.356628,0.902223,0.900526,0.902223,0.899617
2,0.2396,0.330493,0.91039,0.911192,0.91039,0.909526
3,0.1723,0.336877,0.914728,0.914221,0.914728,0.913533


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.33,0.356628,0.902223,0.900526,0.902223,0.899617
2,0.2396,0.330493,0.91039,0.911192,0.91039,0.909526
3,0.1723,0.336877,0.914728,0.914221,0.914728,0.913533
4,0.1373,0.354439,0.915452,0.915911,0.915452,0.91478
5,0.1129,0.368464,0.915839,0.917309,0.915839,0.915797



✓ Training complete!
  Training loss: 0.2434
✓ Model saved to /content/drive/MyDrive/intent_project/slot_filling_model_plaintext/final_model


CELL 11: Evaluate on Test Set

In [None]:
print("=" * 70)
print("EVALUATING ON TEST SET")
print("=" * 70)

# Evaluate
test_results = trainer.evaluate(tokenized_ner["test"])

print(f"\nTest Set Results:")
print(f"  Accuracy:  {test_results['eval_accuracy']:.4f}")
print(f"  Precision: {test_results['eval_precision']:.4f}")
print(f"  Recall:    {test_results['eval_recall']:.4f}")
print(f"  F1:        {test_results['eval_f1']:.4f}")

# Save results
with open(f"{drivepath}/slot_filling_model_plaintext/test_results.json", 'w') as f:
    json.dump(test_results, f, indent=2)

print(f"\n✓ Results saved!")


EVALUATING ON TEST SET



Test Set Results:
  Accuracy:  0.9102
  Precision: 0.9112
  Recall:    0.9102
  F1:        0.9101

✓ Results saved!


CELL 12: Load Model for Inference

In [None]:
print("=" * 70)
print("LOADING MODEL FOR INFERENCE")
print("=" * 70)

# Load saved model
model = AutoModelForTokenClassification.from_pretrained(
    f"{drivepath}/slot_filling_model_plaintext/final_model"
)
tokenizer = AutoTokenizer.from_pretrained(
    f"{drivepath}/slot_filling_model_plaintext/final_model"
)

# Load label mappings
with open(f"{drivepath}/ner_dataset_plain/id2label.json", 'r') as f:
    id2label = json.load(f)

model.to(device)
model.eval()

print(f"✓ Model loaded for inference")
print(f"✓ Device: {device}")


LOADING MODEL FOR INFERENCE
✓ Model loaded for inference
✓ Device: cuda


CELL 13: Inference Function

In [None]:
def extract_slots_from_plain_utterance(utterance, tokenizer, model, id2label, device):
    """
    Extract slots from PLAIN utterances (no annotations needed)
    """

    # Tokenize with word_ids tracking
    inputs = tokenizer(
        utterance.split(),
        is_split_into_words=True,
        return_tensors="pt",
        truncation=True,
        max_length=128,
        padding="max_length"
    )

    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Predict
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1).squeeze().cpu().numpy()

    # Get word IDs correctly
    # Use the tokenizer's encoding object to get word_ids
    batch_encoding = tokenizer(
        utterance.split(),
        is_split_into_words=True,
        truncation=True,
        max_length=128,
        padding="max_length"
    )

    word_ids = batch_encoding.word_ids(batch_index=0)
    words = utterance.split()

    # Align predictions back to words
    word_predictions = []
    for word_idx in range(len(words)):
        word_preds = []
        for token_idx, wid in enumerate(word_ids):
            if wid == word_idx:
                word_preds.append(predictions[token_idx])
        if word_preds:
            # Use first subword prediction
            word_predictions.append(word_preds[0])

    # Extract slots from BIO tags
    slots = []
    current_slot_type = None
    current_slot_words = []

    for word, pred_id in zip(words, word_predictions):
        label = id2label[str(int(pred_id))]

        if label == 'O':
            if current_slot_type:
                slots.append({
                    'type': current_slot_type,
                    'value': ' '.join(current_slot_words)
                })
            current_slot_type = None
            current_slot_words = []

        elif label.startswith('B-'):
            if current_slot_type:
                slots.append({
                    'type': current_slot_type,
                    'value': ' '.join(current_slot_words)
                })
            current_slot_type = label[2:]
            current_slot_words = [word]

        elif label.startswith('I-'):
            slot_type = label[2:]
            if slot_type == current_slot_type:
                current_slot_words.append(word)
            else:
                if current_slot_type:
                    slots.append({
                        'type': current_slot_type,
                        'value': ' '.join(current_slot_words)
                    })
                current_slot_type = slot_type
                current_slot_words = [word]

    if current_slot_type:
        slots.append({
            'type': current_slot_type,
            'value': ' '.join(current_slot_words)
        })

    return slots

print("✓ Inference function ready!")


✓ Inference function ready!


CELL 14: Test on Plain Utterances

In [None]:
print("=" * 70)
print("PRODUCTION SLOT EXTRACTION (PLAIN TEXT)")
print("=" * 70)

test_utterances = [
    "wake me up at 6 AM tomorrow",
    "order a cappuccino and a croissant",
    "book a table at an Italian restaurant at 7pm",
    "play some jazz music by Miles Davis",
    "remind me to call John next Friday",
    "what's the weather in Paris this weekend",
    "set alarm for 8 AM on Monday",
]

for i, utterance in enumerate(test_utterances, 1):
    print(f"\n{i}. Utterance: {utterance}")

    slots = extract_slots_from_plain_utterance(utterance, tokenizer, model, id2label, device)

    if slots:
        print("   Detected slots:")
        for slot in slots:
            print(f"     ✓ [{slot['type']}]: {slot['value']}")
    else:
        print("   No slots detected")

    print("-" * 60)

print("\n✓ Production extraction complete!")


PRODUCTION SLOT EXTRACTION (PLAIN TEXT)

1. Utterance: wake me up at 6 AM tomorrow
   Detected slots:
     ✓ [time]: 6 AM
     ✓ [date]: tomorrow
------------------------------------------------------------

2. Utterance: order a cappuccino and a croissant
   Detected slots:
     ✓ [coffee_type]: cappuccino
     ✓ [drink_type]: croissant
------------------------------------------------------------

3. Utterance: book a table at an Italian restaurant at 7pm
   Detected slots:
     ✓ [food_type]: Italian
     ✓ [business_type]: restaurant
     ✓ [time]: 7pm
------------------------------------------------------------

4. Utterance: play some jazz music by Miles Davis
   Detected slots:
     ✓ [music_genre]: jazz
     ✓ [artist_name]: Miles Davis
------------------------------------------------------------

5. Utterance: remind me to call John next Friday
   Detected slots:
     ✓ [event_name]: call John
     ✓ [date]: Friday
------------------------------------------------------------

6

CELL 9B: Install pytorch-crf

In [None]:
!pip install -q pytorch-crf


CELL 10B: Custom Model with CRF Head

In [None]:
import torch
import torch.nn as nn
from transformers import AutoModel
from torchcrf import CRF

class XLMRobertaWithCRF(nn.Module):
    def __init__(self, model_name, num_labels, id2label, label2id):
        super().__init__()
        self.transformer = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(self.transformer.config.hidden_size, num_labels)
        self.crf = CRF(num_labels, batch_first=True)
        self.id2label = id2label
        self.label2id = label2id
        self.num_labels = num_labels

    def forward(self, input_ids, attention_mask, token_type_ids=None, labels=None):
        outputs = self.transformer(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids
        )

        sequence_output = outputs[0]
        sequence_output = self.dropout(sequence_output)
        logits = self.classifier(sequence_output)

        if labels is not None:
            # Training: compute CRF loss
            # Convert -100 labels to 0 (CRF doesn't support -100)
            labels = torch.where(labels == -100, torch.zeros_like(labels), labels)
            loss = -self.crf(logits, labels, mask=attention_mask.bool(), reduction='mean')
            return {'loss': loss, 'logits': logits}
        else:
            # Inference: get best path
            predictions = self.crf.decode(logits, mask=attention_mask.bool())
            return {'logits': logits, 'predictions': predictions}

print("✓ Custom CRF model defined!")


✓ Custom CRF model defined!


CELL 10C: Initialize CRF Model

In [None]:
print("=" * 70)
print("INITIALIZING MODEL WITH CRF HEAD")
print("=" * 70)

model = XLMRobertaWithCRF(
    model_name="xlm-roberta-base",
    num_labels=len(label2id),
    id2label=id2label,
    label2id=label2id
)

model.to(device)

print(f"✓ Model with CRF head initialized!")
print(f"  Base model: xlm-roberta-base")
print(f"  Num labels: {len(label2id)}")
print(f"  CRF layer: ✓ Added")
print(f"  Device: {device}")


INITIALIZING MODEL WITH CRF HEAD


model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

✓ Model with CRF head initialized!
  Base model: xlm-roberta-base
  Num labels: 111
  CRF layer: ✓ Added
  Device: cuda


CELL 10D: Custom Trainer for CRF

In [None]:
from transformers import Trainer
import numpy as np

class CRFTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels")
        outputs = model(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            token_type_ids=inputs.get('token_type_ids', None),
            labels=labels
        )
        loss = outputs['loss']
        return (loss, outputs) if return_outputs else loss

    def prediction_step(self, model, inputs, prediction_loss_only, ignore_keys=None):
        labels = inputs.pop("labels")
        with torch.no_grad():
            outputs = model(
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask'],
                token_type_ids=inputs.get('token_type_ids', None),
                labels=labels
            )
            loss = outputs['loss']
            logits = outputs['logits']

        # CRF returns predictions - keep as tensors for Trainer
        predictions = torch.argmax(logits, dim=-1)  # Keep as tensor, don't convert to numpy

        return loss, predictions, labels

print("✓ CRF Trainer defined!")


✓ CRF Trainer defined!


10E: CHECK

In [None]:
print("=" * 70)
print("CHECKING CRF SETUP BEFORE TRAINING")
print("=" * 70)

# 1. Check model structure
print("\n1. Model Structure:")
print(f"   Model type: {type(model)}")
print(f"   Has transformer: {hasattr(model, 'transformer')}")
print(f"   Has CRF: {hasattr(model, 'crf')}")
print(f"   Num labels: {model.num_labels}")

# 2. Check device
print(f"\n2. Device:")
print(f"   Model device: {next(model.parameters()).device}")
print(f"   Expected device: {device}")

# 3. Check data
print(f"\n3. Data:")
print(f"   Train size: {len(tokenized_ner['train'])}")
print(f"   Val size: {len(tokenized_ner['validation'])}")
print(f"   Test size: {len(tokenized_ner['test'])}")

# 4. Check label mapping
print(f"\n4. Label Mapping:")
print(f"   Num unique labels: {len(label2id)}")
print(f"   Sample labels: {list(label2id.items())[:5]}")

# 5. Test forward pass on single batch
print(f"\n5. Testing Forward Pass:")
try:
    sample_batch = tokenized_ner['train'][0:2]  # Get 2 samples

    # Convert to tensors
    batch = {
        'input_ids': torch.tensor(sample_batch['input_ids']).to(device),
        'attention_mask': torch.tensor(sample_batch['attention_mask']).to(device),
        'labels': torch.tensor(sample_batch['labels']).to(device)
    }

    print(f"   Batch shapes:")
    print(f"     input_ids: {batch['input_ids'].shape}")
    print(f"     attention_mask: {batch['attention_mask'].shape}")
    print(f"     labels: {batch['labels'].shape}")

    # Forward pass
    with torch.no_grad():
        outputs = model(
            input_ids=batch['input_ids'],
            attention_mask=batch['attention_mask'],
            labels=batch['labels']
        )

    print(f"\n   ✓ Forward pass successful!")
    print(f"   Loss: {outputs['loss']:.4f}")
    print(f"   Logits shape: {outputs['logits'].shape}")

except Exception as e:
    print(f"\n   ✗ Forward pass FAILED!")
    print(f"   Error: {type(e).__name__}: {e}")

# 6. Check Trainer setup
print(f"\n6. Trainer Setup:")
print(f"   Trainer type: {type(trainer)}")
print(f"   Training args configured: ✓")
print(f"   Compute metrics available: ✓")

print("\n" + "=" * 70)
print("✓ READY FOR TRAINING!" if 'outputs' in locals() else "✗ CHECK FAILED - FIX ISSUES ABOVE")
print("=" * 70)


CHECKING CRF SETUP BEFORE TRAINING

1. Model Structure:
   Model type: <class '__main__.XLMRobertaWithCRF'>
   Has transformer: True
   Has CRF: True
   Num labels: 111

2. Device:
   Model device: cuda:0
   Expected device: cuda

3. Data:
   Train size: 230280
   Val size: 40660
   Test size: 59480

4. Label Mapping:
   Num unique labels: 111
   Sample labels: [('B-alarm_type', 0), ('B-app_name', 1), ('B-artist_name', 2), ('B-audiobook_author', 3), ('B-audiobook_name', 4)]

5. Testing Forward Pass:
   Batch shapes:
     input_ids: torch.Size([2, 128])
     attention_mask: torch.Size([2, 128])
     labels: torch.Size([2, 128])

   ✓ Forward pass successful!
   Loss: 0.2783
   Logits shape: torch.Size([2, 128, 111])

6. Trainer Setup:
   Trainer type: <class '__main__.CRFTrainer'>
   Training args configured: ✓
   Compute metrics available: ✓

✓ READY FOR TRAINING!


CELL 10E: Train with CRF

In [None]:
print("=" * 70)
print("TRAINING NER MODEL WITH CRF HEAD")
print("=" * 70)

data_collator = DataCollatorForTokenClassification(tokenizer)

trainer = CRFTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ner["train"],
    eval_dataset=tokenized_ner["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# Train
training_result = trainer.train()

print(f"\n✓ Training complete!")
print(f"  Training loss: {training_result.training_loss:.4f}")

# Save final model
model.save_pretrained(f"{drivepath}/slot_filling_model_crf/final_model")
tokenizer.save_pretrained(f"{drivepath}/slot_filling_model_crf/final_model")

print(f"✓ Model saved to {drivepath}/slot_filling_model_crf/final_model")


TRAINING NER MODEL WITH CRF HEAD


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
500,1.8513,2.919159,0.908397,0.909105,0.908397,0.908032
1000,1.69,3.004918,0.904485,0.908952,0.904485,0.905483
1500,1.6373,2.979035,0.90907,0.910358,0.90907,0.908547
2000,1.5261,2.984396,0.908216,0.910592,0.908216,0.908265
2500,1.4213,3.041765,0.909736,0.911849,0.909736,0.909508
3000,1.3804,3.003329,0.908225,0.911193,0.908225,0.908617
3500,1.4461,3.018784,0.907311,0.911079,0.907311,0.907882
4000,1.2319,3.002534,0.909572,0.91331,0.909572,0.910364
4500,1.295,2.938002,0.911081,0.912964,0.911081,0.911219
5000,1.3239,3.032623,0.909522,0.912848,0.909522,0.909943



✓ Training complete!
  Training loss: 1.0497


AttributeError: 'XLMRobertaWithCRF' object has no attribute 'save_pretrained'

In [None]:
print(f"\n✓ Training complete!")
print(f"  Training loss: {training_result.training_loss:.4f}")

# Save final model manually (custom PyTorch model doesn't have save_pretrained)
import os

output_dir = f"{drivepath}/slot_filling_model_crf/final_model"
os.makedirs(output_dir, exist_ok=True)

# Save model weights
torch.save(model.state_dict(), f"{output_dir}/pytorch_model.bin")

# Save tokenizer
tokenizer.save_pretrained(output_dir)

# Save label mappings
with open(f"{output_dir}/id2label.json", 'w') as f:
    json.dump(id2label, f)
with open(f"{output_dir}/label2id.json", 'w') as f:
    json.dump(label2id, f)

# Save config
config = {
    "model_type": "xlm-roberta-with-crf",
    "num_labels": len(label2id),
    "hidden_size": model.transformer.config.hidden_size,
}
with open(f"{output_dir}/config.json", 'w') as f:
    json.dump(config, f)

print(f"✓ Model saved to {output_dir}")
print(f"  - pytorch_model.bin (model weights)")
print(f"  - tokenizer.json")
print(f"  - id2label.json")
print(f"  - label2id.json")
print(f"  - config.json")



✓ Training complete!


NameError: name 'training_result' is not defined

CHECKPOINTS-SAVING

In [None]:
import shutil
import os

drivepath = "/content/drive/MyDrive/intent_project"

checkpoint_dir = f"{drivepath}/slot_filling_model_crf/checkpoint"
final_model_dir = f"{drivepath}/slot_filling_model_crf/final_model"

print("=" * 70)
print("COPYING BEST CHECKPOINT (14000) TO FINAL_MODEL")
print("=" * 70)

# Use checkpoint-14000 (it has files, unlike 14500 and 15000)
best_checkpoint = os.path.join(checkpoint_dir, "checkpoint-14000")

print(f"\nSource: {best_checkpoint}")
print(f"Destination: {final_model_dir}")

# Create destination directory
os.makedirs(final_model_dir, exist_ok=True)

# Copy all files
count = 0
for file in os.listdir(best_checkpoint):
    src = os.path.join(best_checkpoint, file)
    dst = os.path.join(final_model_dir, file)
    if os.path.isfile(src):
        shutil.copy2(src, dst)
        print(f"  ✓ {file}")
        count += 1

print(f"\n✓ Copied {count} files!")

# Verify
print(f"\nVerifying final_model directory:")
final_files = os.listdir(final_model_dir)
print(f"Files in final_model: {final_files}")


COPYING BEST CHECKPOINT (14000) TO FINAL_MODEL

Source: /content/drive/MyDrive/intent_project/slot_filling_model_crf/checkpoint/checkpoint-14000
Destination: /content/drive/MyDrive/intent_project/slot_filling_model_crf/final_model
  ✓ model.safetensors
  ✓ tokenizer_config.json
  ✓ special_tokens_map.json
  ✓ sentencepiece.bpe.model
  ✓ tokenizer.json
  ✓ training_args.bin
  ✓ optimizer.pt
  ✓ scheduler.pt
  ✓ rng_state.pth
  ✓ trainer_state.json

✓ Copied 10 files!

Verifying final_model directory:
Files in final_model: ['model.safetensors', 'tokenizer_config.json', 'special_tokens_map.json', 'sentencepiece.bpe.model', 'tokenizer.json', 'training_args.bin', 'optimizer.pt', 'scheduler.pt', 'rng_state.pth', 'trainer_state.json']


In [None]:
import os
import json

drivepath = "/content/drive/MyDrive/intent_project"
final_model_dir = f"{drivepath}/slot_filling_model_crf/final_model"

print("=" * 70)
print("CREATING MODEL CONFIG AND WEIGHTS")
print("=" * 70)

# 1. Save label mappings that were created in Cell 5
with open(f"{drivepath}/ner_dataset_plain/id2label.json", 'r') as f:
    id2label = json.load(f)
with open(f"{drivepath}/ner_dataset_plain/label2id.json", 'r') as f:
    label2id = json.load(f)

# Save to final_model
with open(f"{final_model_dir}/id2label.json", 'w') as f:
    json.dump(id2label, f, indent=2)
    print(f"✓ Saved id2label.json ({len(id2label)} labels)")

with open(f"{final_model_dir}/label2id.json", 'w') as f:
    json.dump(label2id, f, indent=2)
    print(f"✓ Saved label2id.json ({len(label2id)} labels)")

# 2. Create config.json for CRF model
config = {
    "model_type": "xlm-roberta-with-crf",
    "num_labels": len(label2id),
    "architecture": "xlm-roberta-base",
}

with open(f"{final_model_dir}/config.json", 'w') as f:
    json.dump(config, f, indent=2)
    print(f"✓ Saved config.json")

print(f"\n✓ All files ready for inference!")
print(f"\nFinal model directory contains:")
for f in sorted(os.listdir(final_model_dir)):
    print(f"  - {f}")


CREATING MODEL CONFIG AND WEIGHTS
✓ Saved id2label.json (111 labels)
✓ Saved label2id.json (111 labels)
✓ Saved config.json

✓ All files ready for inference!

Final model directory contains:
  - config.json
  - id2label.json
  - label2id.json
  - model.safetensors
  - optimizer.pt
  - rng_state.pth
  - scheduler.pt
  - sentencepiece.bpe.model
  - special_tokens_map.json
  - tokenizer.json
  - tokenizer_config.json
  - trainer_state.json
  - training_args.bin


In [None]:
!pip install -q bitsandbytes

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m45.1 MB/s[0m eta [36m0:00:00[0m
[?25h

CELL 12B: Load CRF Model for Inference

In [None]:
print("=" * 70)
print("LOADING CRF MODEL FOR INFERENCE")
print("=" * 70)

# Load label mappings
with open(f"{drivepath}/slot_filling_model_crf/final_model/id2label.json", 'r') as f:
    id2label = json.load(f)
with open(f"{drivepath}/slot_filling_model_crf/final_model/label2id.json", 'r') as f:
    label2id = json.load(f)

# Initialize model architecture
model = XLMRobertaWithCRF(
    model_name="xlm-roberta-base",
    num_labels=len(label2id),
    id2label=id2label,
    label2id=label2id
)

# Load model weights from safetensors
from safetensors.torch import load_file

safetensors_path = f"{drivepath}/slot_filling_model_crf/final_model/model.safetensors"
model_state = load_file(safetensors_path)
model.load_state_dict(model_state)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    f"{drivepath}/slot_filling_model_crf/final_model"
)

model.to(device)
model.eval()

print(f"✓ CRF Model loaded for inference")
print(f"  Model: XLMRobertaWithCRF")
print(f"  Labels: {len(label2id)}")
print(f"  Device: {device}")


LOADING CRF MODEL FOR INFERENCE
✓ CRF Model loaded for inference
  Model: XLMRobertaWithCRF
  Labels: 111
  Device: cuda


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


CELL 13B: Inference Function for CRF

In [None]:
def extract_slots_from_plain_utterance_crf(utterance, tokenizer, model, id2label, device):
    """
    Extract slots from PLAIN utterances using CRF model
    """

    # Tokenize with word_ids tracking
    inputs = tokenizer(
        utterance.split(),
        is_split_into_words=True,
        return_tensors="pt",
        truncation=True,
        max_length=128,
        padding="max_length"
    )

    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Predict with CRF
    with torch.no_grad():
        outputs = model(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask']
        )
        predictions = torch.tensor(outputs['predictions'][0])  # CRF returns list of predictions

    # Get word IDs
    batch_encoding = tokenizer(
        utterance.split(),
        is_split_into_words=True,
        truncation=True,
        max_length=128,
        padding="max_length"
    )

    word_ids = batch_encoding.word_ids(batch_index=0)
    words = utterance.split()

    # Align predictions back to words
    word_predictions = []
    for word_idx in range(len(words)):
        word_preds = []
        for token_idx, wid in enumerate(word_ids):
            if wid == word_idx:
                word_preds.append(predictions[token_idx])
        if word_preds:
            word_predictions.append(word_preds[0])

    # Extract slots from BIO tags (CRF ensures valid sequences)
    slots = []
    current_slot_type = None
    current_slot_words = []

    for word, pred_id in zip(words, word_predictions):
        label = id2label[str(int(pred_id))]

        if label == 'O':
            if current_slot_type:
                slots.append({
                    'type': current_slot_type,
                    'value': ' '.join(current_slot_words)
                })
            current_slot_type = None
            current_slot_words = []

        elif label.startswith('B-'):
            if current_slot_type:
                slots.append({
                    'type': current_slot_type,
                    'value': ' '.join(current_slot_words)
                })
            current_slot_type = label[2:]
            current_slot_words = [word]

        elif label.startswith('I-'):
            slot_type = label[2:]
            if slot_type == current_slot_type:
                current_slot_words.append(word)
            else:
                if current_slot_type:
                    slots.append({
                        'type': current_slot_type,
                        'value': ' '.join(current_slot_words)
                    })
                current_slot_type = slot_type
                current_slot_words = [word]

    if current_slot_type:
        slots.append({
            'type': current_slot_type,
            'value': ' '.join(current_slot_words)
        })

    return slots

print("✓ CRF Inference function ready!")


✓ CRF Inference function ready!


In [None]:
print("=" * 70)
print("PRODUCTION SLOT EXTRACTION (PLAIN TEXT)-CRF")
print("=" * 70)

test_utterances = [
    "wake me up at 6 AM tomorrow",
    "order a cappuccino and a croissant",
    "book a table at an Italian restaurant at 7pm",
    "play some jazz music by Miles Davis",
    "remind me to call John next Friday",
    "what's the weather in Paris this weekend",
    "set alarm for 8 AM on Monday",
]

for i, utterance in enumerate(test_utterances, 1):
    print(f"\n{i}. Utterance: {utterance}")

    slots = extract_slots_from_plain_utterance_crf(utterance, tokenizer, model, id2label, device)

    if slots:
        print("   Detected slots:")
        for slot in slots:
            print(f"     ✓ [{slot['type']}]: {slot['value']}")
    else:
        print("   No slots detected")

    print("-" * 60)

print("\n✓ Production extraction complete!")


PRODUCTION SLOT EXTRACTION (PLAIN TEXT)-CRF

1. Utterance: wake me up at 6 AM tomorrow
   Detected slots:
     ✓ [time]: 6 AM
     ✓ [date]: tomorrow
------------------------------------------------------------

2. Utterance: order a cappuccino and a croissant
   Detected slots:
     ✓ [food_type]: cappuccino
     ✓ [food_type]: croissant
------------------------------------------------------------

3. Utterance: book a table at an Italian restaurant at 7pm
   Detected slots:
     ✓ [food_type]: Italian
     ✓ [business_type]: restaurant
     ✓ [time]: 7pm
------------------------------------------------------------

4. Utterance: play some jazz music by Miles Davis
   Detected slots:
     ✓ [music_genre]: jazz
     ✓ [artist_name]: Miles Davis
------------------------------------------------------------

5. Utterance: remind me to call John next Friday
   Detected slots:
     ✓ [event_name]: call John
     ✓ [date]: Friday
------------------------------------------------------------

