In [83]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [84]:
# Define the intent labels
labels = ["greeting", "goodbye", "thanks", "unknown"]

# Define the training data
train_data = [
    ("Hi there!", "greeting"),
    ("Hello!", "greeting"),
    ("Goodbye!", "goodbye"),
    ("See you later!", "goodbye"),
    ("Thanks!", "thanks"),
    ("Thank you!", "thanks")
]

# Tokenize the training data
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text for text, label in train_data])

# Convert the training data to sequences
train_sequences = tokenizer.texts_to_sequences([text for text, label in train_data])
print("[text for text, label in train_data]", [text for text, label in train_data])
print("train_sequences", train_sequences)

# Pad the sequences to a fixed length
max_length = max(len(seq) for seq in train_sequences)
train_sequences = pad_sequences(train_sequences, maxlen=max_length, padding='post')

# Convert the labels to one-hot encodings
label_encoder = {label: i for i, label in enumerate(labels)}
train_labels = [label_encoder[label] for text, label in train_data]
train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=len(labels))

# Define the model architecture
model = tf.keras.Sequential()
model.add(layers.Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=64, input_length=max_length))
model.add(layers.Conv1D(64, 5, activation='relu', padding="same"))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(len(labels), activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(train_sequences, train_labels, epochs=50)

# Evaluate the model
test_data = [
    ("Hi there!", "greeting"),
    ("Goodbye!", "goodbye"),
    ("Thanks!", "thanks"),
    ("What's the weather like today?", "unknown")
]

test_sequences = tokenizer.texts_to_sequences([text for text, label in test_data])
test_sequences = pad_sequences(test_sequences, maxlen=max_length, padding='post')
test_labels = [label_encoder[label] for text, label in test_data]
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=len(labels))

loss, accuracy = model.evaluate(test_sequences, test_labels)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")


[text for text, label in train_data] ['Hi there!', 'Hello!', 'Goodbye!', 'See you later!', 'Thanks!', 'Thank you!']
train_sequences [[2, 3], [4], [5], [6, 1, 7], [8], [9, 1]]
Train on 6 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: 0.9598678946495056, Test Accuracy: 0.75


In [85]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, 3, 64)             640       
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 3, 64)             20544     
_________________________________________________________________
global_max_pooling1d_6 (Glob (None, 64)                0         
_________________________________________________________________
dense_12 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_13 (Dense)             (None, 4)                 132       
Total params: 23,396
Trainable params: 23,396
Non-trainable params: 0
_________________________________________________________________


In [95]:
questions = "i have no ideas "
sequence_input = tokenizer.texts_to_sequences([questions])
padded_input = pad_sequences(sequence_input, maxlen=max_length , padding="post")
padded_input

array([[0, 0, 0]], dtype=int32)

In [96]:
result = model.predict(padded_input)

In [97]:
result

array([[0.40442273, 0.18461552, 0.35208997, 0.05887186]], dtype=float32)

In [98]:
predicted_probs = model.predict(padded_input)[0]
predicted_label_index = np.argmax(output)


if predicted_probs[predicted_label_index] < 0.7:
    print("老賽！")
else:
    predicted_label = labels[predicted_label_index]
    print("predicted_label", predicted_label)

老賽！
