In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

In [9]:
# Define the intent labels
labels = ["效率", "良率", "時間","機台", "員工"]

# Define the training data
train_data = [
    ("今天工廠整場表現", "效率"),
    ("哪幾台產出最多良品", "良率"),
    ("有哪些機台異常？", "機台"),
    ("哪些工單會遲交", "時間"),
    ("請問 小王 今天表現如何", "員工"),
    ("請問 小明 這週表現如何", "員工")
]

# Tokenize the training data
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text for text, label in train_data])

# Convert the training data to sequences
train_sequences = tokenizer.texts_to_sequences([text for text, label in train_data])
print("[text for text, label in train_data]", [text for text, label in train_data])
print("train_sequences", train_sequences)

# Pad the sequences to a fixed length
max_length = max(len(seq) for seq in train_sequences)
train_sequences = pad_sequences(train_sequences, maxlen=max_length, padding='post')

# Convert the labels to one-hot encodings
label_encoder = {label: i for i, label in enumerate(labels)}
train_labels = [label_encoder[label] for text, label in train_data]
train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=len(labels))

# Define the model architecture
model = tf.keras.Sequential()
model.add(layers.Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=32, input_length=max_length))
model.add(layers.Conv1D(64, 5, activation='relu', padding="same"))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(len(labels), activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(train_sequences, train_labels, epochs=100)

# Evaluate the model
test_data = [
    ("今天有 Gp122. 來的急出貨嗎？", "時間"),
    ("阿華工作進度如何", "員工"),
    ("機台 H122 怎麼沒在動 ", "機台"),
    ("事不是可以提高訂單量", "效率")
]

test_sequences = tokenizer.texts_to_sequences([text for text, label in test_data])
test_sequences = pad_sequences(test_sequences, maxlen=max_length, padding='post')
test_labels = [label_encoder[label] for text, label in test_data]
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=len(labels))

loss, accuracy = model.evaluate(test_sequences, test_labels)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")


[text for text, label in train_data] ['今天工廠整場表現', '哪幾台產出最多良品', '有哪些機台異常？', '哪些工單會遲交', '請問 小王 今天表現如何', '請問 小明 這週表現如何']
train_sequences [[2], [3], [4], [5], [1, 6, 7], [1, 8, 9]]
Train on 6 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 6

In [10]:
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (None, 3, 32)             320       
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 3, 64)             10304     
_________________________________________________________________
global_max_pooling1d_7 (Glob (None, 64)                0         
_________________________________________________________________
dense_23 (Dense)             (None, 128)               8320      
_________________________________________________________________
dense_24 (Dense)             (None, 64)                8256      
_________________________________________________________________
dense_25 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_26 (Dense)             (None, 32)               

In [22]:
questions = "請問 陳先生表現如合？"
sequence_input = tokenizer.texts_to_sequences([questions])
padded_input = pad_sequences(sequence_input, maxlen=max_length , padding="post")
padded_input

array([[1, 0, 0]], dtype=int32)

In [23]:
result = model.predict(padded_input)

In [24]:
result

array([[1.1739888e-02, 4.5997676e-04, 4.6455115e-03, 9.8133296e-01,
        1.8216027e-03]], dtype=float32)

In [25]:
predicted_probs = model.predict(padded_input)[0]
predicted_label_index = np.argmax(result)


if predicted_probs[predicted_label_index] < 0.9:
    print("老賽！")
else:
    predicted_label = labels[predicted_label_index]
    print("predicted_label", predicted_label)

predicted_label 機台
