**MULTILINGUAL NLU SYSTEM - INTENT + SLOT FILLING**

In [1]:
!pip install -q transformers torch safetensors

import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification
import json
import os

# Check GPU
print(f"\nGPU Info:")
print(f"Device: {torch.cuda.get_device_name(0)}")
print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

print("\n Setup complete!")


GPU Info:
Device: NVIDIA A100-SXM4-40GB
GPU Memory: 42.5 GB

 Setup complete!


In [3]:
print("\n=== Loading Intent Classification Model ===")

drive_path = '/content/drive/MyDrive/intent_project'
intent_model_dir = f'{drive_path}/xlm-roberta-intent-classifier-final'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load with optimizations
intent_tokenizer = AutoTokenizer.from_pretrained(intent_model_dir)
intent_model = AutoModelForSequenceClassification.from_pretrained(
    intent_model_dir,
    torch_dtype=torch.float16,  # Use half precision for A100
    device_map="auto"            # Automatic device placement
)
intent_model.eval()

# Load intent mappings
with open(f'{intent_model_dir}/intent2id.json', 'r') as f:
    intent2id = json.load(f)
with open(f'{intent_model_dir}/id2intent.json', 'r') as f:
    id2intent = json.load(f)

print(f"✓ Intent model loaded ({len(id2intent)} intents)")


=== Loading Intent Classification Model ===
✓ Intent model loaded (60 intents)


In [4]:
print("\n=== Loading Slot Filling Model ===")

slot_model_dir = f'{drive_path}/slot_filling_model/final_model'

# Load with optimizations
slot_tokenizer = AutoTokenizer.from_pretrained(slot_model_dir)
slot_model = AutoModelForTokenClassification.from_pretrained(
    slot_model_dir,
    torch_dtype=torch.float16,  # Use half precision
    device_map="auto"
)
slot_model.eval()

# Load slot mappings
with open(f'{drive_path}/slot_filling_model/slot2id.json', 'r') as f:
    slot2id = json.load(f)
with open(f'{drive_path}/slot_filling_model/id2slot.json', 'r') as f:
    id2slot = json.load(f)

print(f"✓ Slot model loaded ({len(id2slot)} slot tags)")
print(f"✓ Using mixed precision (float16) for A100")


=== Loading Slot Filling Model ===
✓ Slot model loaded (111 slot tags)
✓ Using mixed precision (float16) for A100


In [5]:
@torch.inference_mode()  # Context manager for inference optimization
def predict_intent(utterance, tokenizer, model, id2intent, device):
    """Predict intent for utterance"""
    inputs = tokenizer(
        utterance,
        return_tensors='pt',
        truncation=True,
        max_length=128,
        padding='max_length'
    ).to(device)

    with torch.cuda.amp.autocast():  # Enable autocasting for A100
        outputs = model(**inputs)
        logits = outputs.logits
        pred_id = torch.argmax(logits, dim=-1).item()
        confidence = torch.softmax(logits, dim=-1).squeeze()[pred_id].item()

    intent = id2intent[str(pred_id)]
    return intent, confidence

In [6]:
@torch.inference_mode()
def extract_slots(utterance, tokenizer, model, id2slot, device):
    """Extract slot entities from utterance"""
    inputs = tokenizer(
        utterance,
        return_tensors='pt',
        truncation=True,
        max_length=128,
        padding='max_length'
    ).to(device)

    with torch.cuda.amp.autocast():
        outputs = model(**inputs)
        predictions = torch.argmax(outputs.logits, dim=-1).squeeze().cpu().numpy()

    tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'].squeeze().cpu().numpy())

    # Decode BIO tags
    slots_raw = []
    current_slot_type = None
    current_slot_tokens = []

    for token, pred_id in zip(tokens, predictions):
        slot_label = id2slot[str(int(pred_id))]

        if token in ['<s>', '</s>', '<pad>']:
            if current_slot_type and current_slot_tokens:
                slots_raw.append({'type': current_slot_type, 'tokens': current_slot_tokens})
            current_slot_type = None
            current_slot_tokens = []
            continue

        if slot_label.startswith('B-'):
            if current_slot_type and current_slot_tokens:
                slots_raw.append({'type': current_slot_type, 'tokens': current_slot_tokens})
            current_slot_type = slot_label[2:]
            current_slot_tokens = [token]
        elif slot_label.startswith('I-'):
            slot_type = slot_label[2:]
            if slot_type == current_slot_type and current_slot_type:
                current_slot_tokens.append(token)
            else:
                if current_slot_type and current_slot_tokens:
                    slots_raw.append({'type': current_slot_type, 'tokens': current_slot_tokens})
                current_slot_type = slot_type
                current_slot_tokens = [token]
        elif slot_label == 'O':
            if current_slot_type and current_slot_tokens:
                slots_raw.append({'type': current_slot_type, 'tokens': current_slot_tokens})
            current_slot_type = None
            current_slot_tokens = []

    if current_slot_type and current_slot_tokens:
        slots_raw.append({'type': current_slot_type, 'tokens': current_slot_tokens})

    # Post-process: merge subword tokens
    slots_merged = []
    i = 0
    while i < len(slots_raw):
        current = slots_raw[i]
        merged_tokens = list(current['tokens'])
        j = i + 1
        while j < len(slots_raw) and slots_raw[j]['type'] == current['type']:
            next_token = slots_raw[j]['tokens'][0]
            if not next_token.startswith('▁'):
                merged_tokens.extend(slots_raw[j]['tokens'])
                j += 1
            else:
                break

        slot_value = ''
        for token in merged_tokens:
            if token.startswith('▁'):
                if slot_value:
                    slot_value += ' '
                slot_value += token[1:]
            else:
                slot_value += token

        slot_value = slot_value.strip()
        if slot_value:
            slots_merged.append((current['type'], slot_value))
        i = j

    return slots_merged

In [7]:
# ============================================================
# STEP 6: Combined NLU Function
# ============================================================

def process_utterance(utterance):
    """Process utterance: extract intent and slots"""
    intent, confidence = predict_intent(utterance, intent_tokenizer, intent_model, id2intent, device)
    slots = extract_slots(utterance, slot_tokenizer, slot_model, id2slot, device)

    return {
        'utterance': utterance,
        'intent': intent,
        'confidence': confidence,
        'slots': slots
    }

In [8]:
# ============================================================
# STEP 7: Test Combined System
# ============================================================

print("\n" + "="*70)
print("TESTING COMBINED NLU SYSTEM (A100)")
print("="*70 + "\n")

test_utterances = [
    "Wake me up at 6 AM tomorrow",
    "Book a table at an Italian restaurant at 7pm",
    "Play some jazz music by Miles Davis",
    "Turn off the bedroom lights",
    "Remind me to call John next Friday",
    "What's the weather in Paris this weekend",
    "Set alarm for 8 AM on Monday",
    "Order a cappuccino and a croissant",
]

# Batch inference for speed
import time
start = time.time()

results = []
for i, utterance in enumerate(test_utterances, 1):
    result = process_utterance(utterance)
    results.append(result)

    print(f"{i}. Utterance: {result['utterance']}")
    print(f"   Intent: {result['intent']} (confidence: {result['confidence']:.4f})")

    if result['slots']:
        print("   Slots:")
        for slot_type, slot_value in result['slots']:
            print(f"     - [{slot_type}]: {slot_value}")
    else:
        print("   Slots: None")

    print("-" * 70)

elapsed = time.time() - start
print(f"\n✓ Processed {len(test_utterances)} utterances in {elapsed:.2f}s")
print(f"✓ Average: {elapsed/len(test_utterances)*1000:.1f}ms per utterance")



TESTING COMBINED NLU SYSTEM (A100)



  with torch.cuda.amp.autocast():  # Enable autocasting for A100


1. Utterance: Wake me up at 6 AM tomorrow
   Intent: alarm_set (confidence: 0.9998)
   Slots:
     - [time]: 6
     - [time]: AM
     - [date]: tomorrow
----------------------------------------------------------------------
2. Utterance: Book a table at an Italian restaurant at 7pm
   Intent: recommendation_locations (confidence: 0.4810)
   Slots:
     - [business_type]: Italian restaurant
     - [time]: 7pm
----------------------------------------------------------------------
3. Utterance: Play some jazz music by Miles Davis
   Intent: play_music (confidence: 0.9987)
   Slots:
     - [music_genre]: jazz
     - [artist_name]: Miles
     - [artist_name]: Davis
----------------------------------------------------------------------
4. Utterance: Turn off the bedroom lights
   Intent: iot_hue_lightoff (confidence: 0.9989)
   Slots:
     - [house_place]: bedroom
----------------------------------------------------------------------
5. Utterance: Remind me to call John next Friday
   Intent

In [9]:
# STEP 8: Save Results
# ============================================================

output_path = f'{drive_path}/combined_nlu_results.json'
with open(output_path, 'w') as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

print(f"\n✓ Results saved to: {output_path}")


✓ Results saved to: /content/drive/MyDrive/intent_project/combined_nlu_results.json


In [10]:
print("\n" + "="*70)
print("INTERACTIVE NLU TESTING (A100)")
print("="*70)
print("\nType utterances to test (or 'quit' to exit)\n")

while True:
    user_input = input("Enter utterance: ").strip()

    if user_input.lower() in ['quit', 'exit', 'q', '']:
        print("\n Testing complete!")
        break

    result = process_utterance(user_input)

    print(f"\nIntent: {result['intent']} ({result['confidence']:.2%})")
    if result['slots']:
        print("Slots:")
        for slot_type, slot_value in result['slots']:
            print(f"  [{slot_type}]: {slot_value}")
    else:
        print("Slots: None")
    print()


INTERACTIVE NLU TESTING (A100)

Type utterances to test (or 'quit' to exit)

Enter utterance: రేపు జాన్ కి ఈమెయిల్ పంపమని నాకు గుర్తు చేయి.


  with torch.cuda.amp.autocast():  # Enable autocasting for A100



Intent: calendar_set (99.93%)
Slots:
  [date]: రేపు
  [person]: జాన్

Enter utterance: remind me to send an email tomorrow to john

Intent: calendar_set (99.82%)
Slots:
  [date]: tomorrow
  [date]: to
  [person]: john

Enter utterance: రేపు జాన్ కి ఒక ఈమెయిల్ పంపు.

Intent: email_sendemail (99.97%)
Slots:
  [date]: రేపు
  [person]: జాన్

Enter utterance: quit

 Testing complete!
