In [72]:
import pandas as pd

df_train = pd.read_csv("C:\\Users\\FPTSHOP\\Courses\\NLP\\Lab_05\\data\\hwu\\train.csv", sep=",", header=0, names=["text", "intent"])
df_val = pd.read_csv("C:\\Users\\FPTSHOP\\Courses\\NLP\\Lab_05\\data\\hwu\\val.csv", sep=",", header=0, names=["text", "intent"])
df_test = pd.read_csv("C:\\Users\\FPTSHOP\\Courses\\NLP\\Lab_05\\data\\hwu\\test.csv", sep=",", header=0, names=["text", "intent"])

print("Train shape: ", df_train.shape)
print("Validation shape: ", df_val.shape)
print("Test shape: ", df_test.shape)

print(df_train.head())

Train shape:  (8954, 2)
Validation shape:  (1076, 2)
Test shape:  (1076, 2)
                                                text       intent
0                what alarms do i have set right now  alarm_query
1                    checkout today alarm of meeting  alarm_query
2                              report alarm settings  alarm_query
3  see see for me the alarms that you have set to...  alarm_query
4                       is there an alarm for ten am  alarm_query


In [73]:
from sklearn.preprocessing import LabelEncoder

intents = df_train["intent"]

le = LabelEncoder()
le.fit(intents)

df_train["intent_label"] = le.transform(df_train["intent"])
df_val["intent_label"] = le.transform(df_val["intent"])
df_test["intent_label"] = le.transform(df_test["intent"])

num_classes = len(le.classes_)
print(num_classes)
print(df_train.head())

64
                                                text       intent  \
0                what alarms do i have set right now  alarm_query   
1                    checkout today alarm of meeting  alarm_query   
2                              report alarm settings  alarm_query   
3  see see for me the alarms that you have set to...  alarm_query   
4                       is there an alarm for ten am  alarm_query   

   intent_label  
0             0  
1             0  
2             0  
3             0  
4             0  


In [74]:
from sklearn.metrics import f1_score
import numpy as np

sample_texts = {
    "can you remind me to not call my mom": "reminder_create",
    "is it going to be sunny or rainy tomorrow": "weather_query",
    "find a flight from new york to london but not through paris": "flight_search"
}

# Model 1

In [75]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report

tfidf_lr_pipeline = make_pipeline(
    TfidfVectorizer(max_features=5000),
    LogisticRegression(max_iter=1000)
)

tfidf_lr_pipeline.fit(df_train["text"], df_train["intent_label"])

y_pred = tfidf_lr_pipeline.predict(df_test["text"])
print(classification_report(df_test["intent_label"], y_pred, target_names=le.classes_))

                          precision    recall  f1-score   support

             alarm_query       0.90      0.95      0.92        19
            alarm_remove       1.00      0.73      0.84        11
               alarm_set       0.77      0.89      0.83        19
       audio_volume_down       1.00      0.75      0.86         8
       audio_volume_mute       0.92      0.80      0.86        15
         audio_volume_up       0.93      1.00      0.96        13
          calendar_query       0.45      0.53      0.49        19
         calendar_remove       0.89      0.89      0.89        19
            calendar_set       0.87      0.68      0.76        19
          cooking_recipe       0.59      0.68      0.63        19
        datetime_convert       0.67      0.75      0.71         8
          datetime_query       0.74      0.89      0.81        19
        email_addcontact       0.78      0.88      0.82         8
             email_query       0.83      0.79      0.81        19
      ema

In [76]:
# F1 marco
f1_macro = f1_score(df_test["intent_label"], y_pred, average='macro')
print("F1-score (macro):", f1_macro)

# Test sample
y_pred = tfidf_lr_pipeline.predict(list(sample_texts.keys()))
predicted_intents = le.inverse_transform(y_pred)

for text, pred in zip(sample_texts.keys(), predicted_intents):
    print(f"\nText: {text}")
    print(f"-> Predicted intent: {pred}")
    print(f"-> True intent: {sample_texts[text]}")

F1-score (macro): 0.8352983005857358

Text: can you remind me to not call my mom
-> Predicted intent: calendar_set
-> True intent: reminder_create

Text: is it going to be sunny or rainy tomorrow
-> Predicted intent: weather_query
-> True intent: weather_query

Text: find a flight from new york to london but not through paris
-> Predicted intent: general_negate
-> True intent: flight_search


# Model 2

In [77]:
from gensim.models import Word2Vec
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

sentences = [text.lower().split() for text in df_train["text"]]
w2v_model = Word2Vec(sentences, vector_size=50, window=4, min_count=2, workers=4)

def sentence_to_avg_vector(text, model):
    words = text.lower().split()
    word_vectors = []

    for w in words:
        if (w in model.wv):
            word_vectors.append(model.wv[w])
    
    if (len(word_vectors) > 0):
        avg_vector = np.mean(word_vectors, axis=0)
    else:
        avg_vector = np.zeros(model.vector_size)

    return avg_vector

X_train = np.array([sentence_to_avg_vector(text, w2v_model) for text in df_train["text"]])
X_val = np.array([sentence_to_avg_vector(text, w2v_model) for text in df_val["text"]])
X_test = np.array([sentence_to_avg_vector(text, w2v_model) for text in df_test["text"]])

y_train = to_categorical(df_train["intent_label"], num_classes)
y_val = to_categorical(df_val["intent_label"], num_classes)
y_test = to_categorical(df_test["intent_label"], num_classes)

model = Sequential([
    Input(shape=(w2v_model.vector_size,)),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=4,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=32,
    callbacks=[early_stop]
)

print("\n")
loss, acc = model.evaluate(X_test, y_test)
print(f"\nTest Loss: {loss:.4f}, Test Accuracy: {acc:.4f}")

Epoch 1/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0255 - loss: 4.1287 - val_accuracy: 0.0567 - val_loss: 4.0750
Epoch 2/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0504 - loss: 4.0329 - val_accuracy: 0.0716 - val_loss: 3.9530
Epoch 3/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0639 - loss: 3.8996 - val_accuracy: 0.1041 - val_loss: 3.8035
Epoch 4/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.0778 - loss: 3.7771 - val_accuracy: 0.1143 - val_loss: 3.6745
Epoch 5/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0915 - loss: 3.6731 - val_accuracy: 0.1245 - val_loss: 3.5795
Epoch 6/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.1021 - loss: 3.5881 - val_accuracy: 0.1608 - val_loss: 3.4942
Epoch 7/10
[1m280/280[0m 

In [78]:
# F1 macro
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = df_test["intent_label"]

f1_macro = f1_score(y_true_classes, y_pred_classes, average='macro')
print("F1-score (macro):", f1_macro)

# Test sample
sample = np.array([sentence_to_avg_vector(text, w2v_model) for text in list(sample_texts.keys())])

y_pred_probs = model.predict(sample)
y_pred_classes = np.argmax(y_pred_probs, axis=1)
predicted_intents = le.inverse_transform(y_pred_classes)

for text, pred in zip(sample_texts.keys(), predicted_intents):
    print(f"\nText: {text}")
    print(f"-> Predicted intent: {pred}")
    print(f"-> True intent: {sample_texts[text]}")

[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
F1-score (macro): 0.111145440233925
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step

Text: can you remind me to not call my mom
-> Predicted intent: general_explain
-> True intent: reminder_create

Text: is it going to be sunny or rainy tomorrow
-> Predicted intent: alarm_query
-> True intent: weather_query

Text: find a flight from new york to london but not through paris
-> Predicted intent: email_sendemail
-> True intent: flight_search


# Model 3

In [79]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM
from tensorflow.keras.utils import to_categorical


tokenizer = Tokenizer(num_words=2000, oov_token="<UNK>")
tokenizer.fit_on_texts(df_train["text"])

X_train_sequences = tokenizer.texts_to_sequences(df_train["text"])
X_val_sequences = tokenizer.texts_to_sequences(df_val["text"])
X_test_sequences = tokenizer.texts_to_sequences(df_test["text"])

max_len = 20
X_train_pad = pad_sequences(X_train_sequences, maxlen=max_len, padding="post")
X_val_pad = pad_sequences(X_val_sequences, maxlen=max_len, padding="post")
X_test_pad = pad_sequences(X_test_sequences, maxlen=max_len, padding="post")

y_train = to_categorical(df_train["intent_label"], num_classes)
y_val = to_categorical(df_val["intent_label"], num_classes)
y_test = to_categorical(df_test["intent_label"], num_classes)

vocab_size = len(tokenizer.word_index) + 1
embedding_dim = w2v_model.vector_size
embedding_matrix = np.zeros((vocab_size, embedding_dim))
for word, i in tokenizer.word_index.items():
    if (word in w2v_model.wv):
        embedding_matrix[i] = w2v_model.wv[word]

lstm_model_pretrained = Sequential([
    Embedding(
        input_dim=vocab_size,
        output_dim=embedding_dim,
        weights=[embedding_matrix],
        # input_length=max_len,
        trainable=False
    ),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(num_classes, activation="softmax")
])

lstm_model_pretrained.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=4,
    restore_best_weights=True
)

history = lstm_model_pretrained.fit(
    X_train_pad, y_train,
    validation_data=(X_val_pad, y_val),
    epochs=10,
    batch_size=32,
    callbacks=[early_stop]
)

loss, acc = lstm_model_pretrained.evaluate(X_test_pad, y_test)
print(f"\nTest Loss: {loss:.4f}, Test Accuracy: {acc:.4f}")

Epoch 1/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.0302 - loss: 4.0552 - val_accuracy: 0.0520 - val_loss: 3.8312
Epoch 2/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.0572 - loss: 3.8149 - val_accuracy: 0.0771 - val_loss: 3.6178
Epoch 3/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.0787 - loss: 3.6706 - val_accuracy: 0.0967 - val_loss: 3.5107
Epoch 4/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.0850 - loss: 3.5838 - val_accuracy: 0.1134 - val_loss: 3.4191
Epoch 5/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.1023 - loss: 3.5092 - val_accuracy: 0.1283 - val_loss: 3.3649
Epoch 6/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.1101 - loss: 3.4593 - val_accuracy: 0.1515 - val_loss: 3.2999
Epoch 7/10
[1m280/280[0m 

In [80]:
# F1 macro
y_pred = lstm_model_pretrained.predict(X_test_pad)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = df_test["intent_label"]

f1_macro = f1_score(y_true_classes, y_pred_classes, average='macro')
print("F1-score (macro):", f1_macro)

# Test sample
sample_sequences = tokenizer.texts_to_sequences(list(sample_texts.keys()))
sample_padded = pad_sequences(sample_sequences, maxlen=max_len, padding='post')

y_pred_probs = lstm_model_pretrained.predict(sample_padded)
y_pred = np.argmax(y_pred_probs, axis=1)
predicted_intents = le.inverse_transform(y_pred)

for text, pred in zip(sample_texts.keys(), predicted_intents):
    print(f"\nText: {text}")
    print(f"-> Predicted intent: {pred}")
    print(f"-> True intent: {sample_texts[text]}")

[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
F1-score (macro): 0.11794393951425236
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step

Text: can you remind me to not call my mom
-> Predicted intent: takeaway_order
-> True intent: reminder_create

Text: is it going to be sunny or rainy tomorrow
-> Predicted intent: email_query
-> True intent: weather_query

Text: find a flight from new york to london but not through paris
-> Predicted intent: transport_ticket
-> True intent: flight_search


# Model 4

In [81]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM
from tensorflow.keras.utils import to_categorical


tokenizer = Tokenizer(num_words=2000, oov_token="<UNK>")
tokenizer.fit_on_texts(df_train["text"])

X_train_sequences = tokenizer.texts_to_sequences(df_train["text"])
X_val_sequences = tokenizer.texts_to_sequences(df_val["text"])
X_test_sequences = tokenizer.texts_to_sequences(df_test["text"])

max_len = 20
X_train_pad = pad_sequences(X_train_sequences, maxlen=max_len, padding="post")
X_val_pad = pad_sequences(X_val_sequences, maxlen=max_len, padding="post")
X_test_pad = pad_sequences(X_test_sequences, maxlen=max_len, padding="post")

y_train = to_categorical(df_train["intent_label"], num_classes)
y_val = to_categorical(df_val["intent_label"], num_classes)
y_test = to_categorical(df_test["intent_label"], num_classes)

vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 400

lstm_model_scratch = Sequential([
    Embedding(
        input_dim=vocab_size,
        output_dim=embedding_dim,
        # input_length=max_len,
    ),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(num_classes, activation="softmax")
])

lstm_model_scratch.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=4,
    restore_best_weights=True
)

history = lstm_model_scratch.fit(
    X_train_pad, y_train,
    validation_data=(X_val_pad, y_val),
    epochs=10,
    batch_size=32,
    callbacks=[early_stop]
)

print("\n")
loss, acc = lstm_model_scratch.evaluate(X_test_pad, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {acc:.4f}")

y_pred = lstm_model_scratch.predict(X_test_pad)

y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = df_test["intent_label"]

f1_macro = f1_score(y_true_classes, y_pred_classes, average='macro')
print("F1-score (macro):", f1_macro)

Epoch 1/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 26ms/step - accuracy: 0.0568 - loss: 3.6976 - val_accuracy: 0.1338 - val_loss: 3.0589
Epoch 2/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 22ms/step - accuracy: 0.2980 - loss: 2.4088 - val_accuracy: 0.5613 - val_loss: 1.5993
Epoch 3/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.6399 - loss: 1.3033 - val_accuracy: 0.7639 - val_loss: 0.9478
Epoch 4/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.7939 - loss: 0.7789 - val_accuracy: 0.7918 - val_loss: 0.7690
Epoch 5/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.8534 - loss: 0.5420 - val_accuracy: 0.8290 - val_loss: 0.6552
Epoch 6/10
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 21ms/step - accuracy: 0.8908 - loss: 0.4077 - val_accuracy: 0.8411 - val_loss: 0.6078
Epoch 7/10
[1m280/28

In [82]:
# F1 macro
y_pred = lstm_model_scratch.predict(X_test_pad)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = df_test["intent_label"]

f1_macro = f1_score(y_true_classes, y_pred_classes, average='macro')
print("F1-score (macro):", f1_macro)

# Test sample
sample_sequences = tokenizer.texts_to_sequences(list(sample_texts.keys()))
sample_padded = pad_sequences(sample_sequences, maxlen=max_len, padding='post')

y_pred_probs = lstm_model_scratch.predict(sample_padded)
y_pred_classes = np.argmax(y_pred_probs, axis=1)
predicted_intents = le.inverse_transform(y_pred_classes)

for text, pred in zip(sample_texts.keys(), predicted_intents):
    print(f"\nText: {text}")
    print(f"-> Predicted intent: {pred}")
    print(f"-> True intent: {sample_texts[text]}")

[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
F1-score (macro): 0.8238542875628776
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step

Text: can you remind me to not call my mom
-> Predicted intent: calendar_set
-> True intent: reminder_create

Text: is it going to be sunny or rainy tomorrow
-> Predicted intent: weather_query
-> True intent: weather_query

Text: find a flight from new york to london but not through paris
-> Predicted intent: transport_ticket
-> True intent: flight_search
