# CENG463 PA2

In this programming assignment, you will be dealing with word embeddings and neural networks. You will use Python for this task. You can use libraries such as `pandas`, `nltk`, `numpy` etc. for your implementations, or implement your own functions. However, you are expected to analyse and reason about your implementation and results. The assignment consists of 3 questions.

### IMPORTANT NOTE

Do not move or delete the given cells, only add cells inbetween the questions for your answers.

In [1]:
# UPDATE THIS CELL TO INSTALL NEEDED LIBRARIES.
# MAKE SURE TO ADD EVERYTHING THAT NEEDS TO BE INSTALLED IN THIS CELL!

# we will use pip to install packages - you can add others below
!pip install pandas
!pip install numpy
!pip install nltk
!pip install --upgrade gensim
!pip install scikit-learn
!pip install tensorflow-macos
!pip install tensorflow-metal

# and import them here - you can add others below
import pandas as pd
import numpy as np
import nltk
import gensim
from gensim.models import Word2Vec
import multiprocessing
from collections import Counter
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    confusion_matrix, accuracy_score, precision_score,
    recall_score, f1_score, roc_auc_score
)
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Embedding, LSTM, Bidirectional,
    Dense, Dropout, Conv1D, GlobalMaxPooling1D
)


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1

## Q1 - Word embeddings (50 points)

In this question, you will first train a Word2Vec model, then use it to represent and reason about user reviews.

### Q1.A - training (10 points)
Load the `user_review_train.csv` file shared with you. Using `Word2Vec` module of `gensim.models`, train a **skip-gram** Word2Vec model on the train data.

#### Notes and tips

- Use the given preprocessing function `preprocess_review`.

In [2]:
# PREPROCESSING FUNCTIONS GIVEN FOR YOU

from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

nltk.download('wordnet')  
nltk.download('stopwords')
nltk.download('punkt_tab')

def preprocess_review(review):
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    sentences = sent_tokenize(review)
    
    lemmatized_review = []
    
    for sentence in sentences:
        tokenized_sentence = word_tokenize(sentence)
        lowercased_sentence = [token.lower() for token in tokenized_sentence]
        stopwords_removed_sentence = [token for token in lowercased_sentence if token not in stop_words]
        lemmatized_sentence = [lemmatizer.lemmatize(token) for token in stopwords_removed_sentence]
        
        lemmatized_review = lemmatized_review + lemmatized_sentence
    
    return lemmatized_review

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/yarkinozcan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/yarkinozcan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/yarkinozcan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [3]:
# 1. Load the training data
train_df = pd.read_csv("data/user_review_train.csv")   # adjust if needed

# 2. Preprocess every review using the provided function
tokenized_reviews = []

for review in train_df["review"]:
    if isinstance(review, str):
        tokens = preprocess_review(review)   # uses your preprocessing function
        if len(tokens) > 0:
            tokenized_reviews.append(tokens)

print("Number of tokenized reviews:", len(tokenized_reviews))

# 3. Train the skip-gram Word2Vec model
w2v_model = Word2Vec(
    sentences=tokenized_reviews,
    vector_size=100,          # embedding dimension (change if needed)
    window=5,                 # context window
    min_count=3,              # ignore rare words
    workers=multiprocessing.cpu_count(),
    sg=1,                     # 1 = skip-gram (as required)
    negative=5,               # negative sampling
    epochs=5                  # training epochs
)

print("Model training completed!")

# 4. Save the trained model (optional but recommended)
w2v_model.save("word2vec_skipgram.model")
print("Model saved as word2vec_skipgram.model")

# 5. Quick example test
example_word = "good"
if example_word in w2v_model.wv:
    print("Vector for 'good':")
    print(w2v_model.wv[example_word])
else:
    print("'good' not in vocabulary.")

Number of tokenized reviews: 14671
Model training completed!
Model saved as word2vec_skipgram.model
Vector for 'good':
[-0.23602006  0.24360152 -0.07802209 -0.1251965   0.04575636 -0.3528469
  0.18785103  0.65172344 -0.14485672 -0.40459403 -0.00607558 -0.12559274
 -0.1475443   0.09000729  0.3747157  -0.14832015  0.28471318  0.09323986
 -0.5221527  -0.40019146 -0.05742176  0.24395053  0.27098238 -0.02191029
 -0.21682064  0.27232128 -0.12760954  0.10146224  0.10668667  0.00525942
  0.11954835  0.03860368 -0.13652475 -0.25755656 -0.20633948  0.33619738
  0.13563193  0.00927034 -0.25880328 -0.18385689 -0.02204607  0.15687074
  0.00668585 -0.03372012  0.39808056 -0.25141117 -0.17338398 -0.1351257
  0.09212077  0.07840645  0.04531801 -0.23774526  0.14126235 -0.04810696
 -0.03855121 -0.20607518  0.3029945  -0.16748565 -0.18449892  0.13847631
  0.20515808 -0.25355488  0.33550352 -0.00530628 -0.18030615  0.2553994
  0.36877277  0.24912655 -0.71450436  0.47260785  0.14221273  0.15705653
  0.1821

### Q1.B - word similarity (10 points)

Using the trained model, report the following:

- Similarity between "good" and "bad"
- Similar words to "good"
- Similar words to "bad"
- Similar words to "good" but not similar to "bad"
- Similar words to "good" but not similar to "bad"

and discuss the reported words and scores. Is it possible to identify specific good/bad features of the product that is being reviewed? What other words can be looked up to get more insight?

#### Notes and tips

- Check the [documentation](https://tedboy.github.io/nlps/generated/generated/gensim.models.Word2Vec.html) of `gensim.models.Word2Vec` to find relevant methods.

In [4]:
# 1) Similarity between "good" and "bad"
sim_good_bad = w2v_model.wv.similarity("good", "bad")
print(f'Similarity between "good" and "bad": {sim_good_bad:.4f}\n')

# 2) Similar words to "good"
print('Top similar words to "good":')
similar_to_good = w2v_model.wv.most_similar("good", topn=10)
for word, score in similar_to_good:
    print(f"  {word:15s}  {score:.4f}")
print()

# 3) Similar words to "bad"
print('Top similar words to "bad":')
similar_to_bad = w2v_model.wv.most_similar("bad", topn=10)
for word, score in similar_to_bad:
    print(f"  {word:15s}  {score:.4f}")
print()

# 4) Words similar to "good" but not similar to "bad"
print('Words similar to "good" but NOT similar to "bad":')
good_not_bad = w2v_model.wv.most_similar(
    positive=["good"],
    negative=["bad"],
    topn=10
)
for word, score in good_not_bad:
    print(f"  {word:15s}  {score:.4f}")
print()

# 5) Words similar to "bad" but NOT similar to "good"
print('Words similar to "bad" but NOT similar to "good":')
bad_not_good = w2v_model.wv.most_similar(
    positive=["bad"],
    negative=["good"],
    topn=10
)
for word, score in bad_not_good:
    print(f"  {word:15s}  {score:.4f}")


Similarity between "good" and "bad": 0.6645

Top similar words to "good":
  nice             0.8896
  perfect          0.8851
  awesome          0.8694
  gud              0.8669
  superb           0.8634
  except           0.8628
  impressive       0.8620
  excellent        0.8589
  described        0.8567
  okay             0.8565

Top similar words to "bad":
  poor             0.8133
  worst            0.7557
  nd               0.7514
  bed              0.7438
  3rd              0.7429
  satisfactory     0.7413
  dull             0.7391
  vry              0.7366
  bettry           0.7345
  pathetic         0.7336

Words similar to "good" but NOT similar to "bad":
  ram              0.3879
  budget           0.3850
  gb               0.3666
  except           0.3608
  memory           0.3582
  internal         0.3545
  everything       0.3482
  cool             0.3395
  slightly         0.3384
  spec             0.3345

Words similar to "bad" but NOT similar to "good":
  customer     

### Q1.B - discussion
Write your discussion here

### Q1.C - representation (15 points)

An important use of word embeddings is representing "documents" (reviews in our case). For this question, before creating the representations, do the following:

- Randomly sample 2 reviews from sentiment label 0, refer to them as sent0_a and sent0_b.
- Randomly sample 2 reviews from sentiment label 1, refer to them as sent1_a and sent1_b.

After the sampling, follow these steps to represent each review:

- Preprocess the review with the given `preprocess_review` function.
- For each token in the review, fetch the vector of that token.
- Take the average of the token vectors in the review to represent that review.

Then, calculate and report the cosine similarity of the two vectors representing:
    - sent0_a and sent0_b
    - sent0_a and sent1_a
    - sent1_a and sent1_b

Does this representation work to capture the labels of the reviews? Do you think there is a better way to represent each review instead of taking the average of the word vectors? Discuss your findings with respect to these questions. Repeating the sampling process several times might give you a better insight.

#### Notes and tips

- You can use `numpy` for your calculations.

In [5]:
# 1) Randomly sample 2 reviews from each sentiment

# Sentiment 0 (e.g., negative)
sent0_df = train_df[train_df["sentiment"] == 0]
sent0_samples = sent0_df.sample(2, random_state=1)  # change random_state to resample

# Sentiment 1 (e.g., positive)
sent1_df = train_df[train_df["sentiment"] == 1]
sent1_samples = sent1_df.sample(2, random_state=1)

sent0_a_text = sent0_samples.iloc[0]["review"]
sent0_b_text = sent0_samples.iloc[1]["review"]
sent1_a_text = sent1_samples.iloc[0]["review"]
sent1_b_text = sent1_samples.iloc[1]["review"]

print("sent0_a (label 0):\n", sent0_a_text, "\n")
print("sent0_b (label 0):\n", sent0_b_text, "\n")
print("sent1_a (label 1):\n", sent1_a_text, "\n")
print("sent1_b (label 1):\n", sent1_b_text, "\n")


# 2) Helper: get average embedding for a review

def get_review_vector(review_text, model):
    tokens = preprocess_review(review_text)
    vectors = [model.wv[token] for token in tokens if token in model.wv]
    
    if len(vectors) == 0:
        # If no token has a vector (very rare), return a zero vector
        return np.zeros(model.vector_size)
    
    return np.mean(vectors, axis=0)


# 3) Helper: cosine similarity between two vectors

def cosine_similarity(v1, v2):
    norm1 = np.linalg.norm(v1)
    norm2 = np.linalg.norm(v2)
    if norm1 == 0 or norm2 == 0:
        return 0.0
    return float(np.dot(v1, v2) / (norm1 * norm2))


# 4) Compute review vectors

sent0_a_vec = get_review_vector(sent0_a_text, w2v_model)
sent0_b_vec = get_review_vector(sent0_b_text, w2v_model)
sent1_a_vec = get_review_vector(sent1_a_text, w2v_model)
sent1_b_vec = get_review_vector(sent1_b_text, w2v_model)


# 5) Compute requested cosine similarities

sim_00 = cosine_similarity(sent0_a_vec, sent0_b_vec)  # 0 vs 0
sim_01 = cosine_similarity(sent0_a_vec, sent1_a_vec)  # 0 vs 1
sim_11 = cosine_similarity(sent1_a_vec, sent1_b_vec)  # 1 vs 1

print(f"cosine(sent0_a, sent0_b) = {sim_00:.4f}")
print(f"cosine(sent0_a, sent1_a) = {sim_01:.4f}")
print(f"cosine(sent1_a, sent1_b) = {sim_11:.4f}")

sent0_a (label 0):
 It takes lot of time for charging full battery, more than 6 hours for one full charge and heating problem.. 

sent0_b (label 0):
 Bluetooth is not working properly with my speaker getting breaking sound most of the time 

sent1_a (label 1):
 Superb 

sent1_b (label 1):
 Cons: 1.Heating Issue - Phone easily gets at around 40 degrees while charging and also while playing heavy games or camera.2. Camera - After the software update you may experience depth mode little better, still the edges get blurred.3. Battery Backup - Its written 4000mah but equivalent to 3500mah.4. Slight Slutter, No Lag5. 4 Gb Ram Variant is preferable & for performance as well and also black variant looks cool and premium.6. You cannot put a tempered glass on its display as 2.5D curved edges will show up bubbles around edges.7. They did provide a turbo charger but it takes 2.2 hours for this phone to charge fully. 

cosine(sent0_a, sent0_b) = 0.7643
cosine(sent0_a, sent1_a) = 0.6476
cosine(sent1

### Q1.C - discussion
Write your discussion here

### Q1.D - training and comparing classifiers (15 points)

For this task, you will use the `user_review_train.csv` and `user_review_test.csv` files to train a binary classification model with Word2Vec representations, and compare its performance with a binary classifier using Bag-of-Words representation.

As the Bag-of-Words classifier, you can either choose the best performing classifier you have implemented in Question 3 of Programming Assignment 1, or you can follow these steps:

- Preprocess the review with the given `preprocess_review` function.
- Order all unique tokens by frequency, take the most frequent 100.
- Use these 100 words as the corpus for Bag-of-Words representation.

For the Word2Vec model, represent the reviews by following these steps:

- Preprocess the review with the given `preprocess_review` function.
- For each token in the review that is also in the most frequent 100 tokens, fetch the vector of that token.
- Take the average of the token vectors selected to represent that review.

After training both classifiers on `user_review_train.csv`, test them with `user_review_test.csv` and report the performance of your models with four metrics: accuracy, precision, recall and F1-score. Compare the performance of both models and discuss in detail.

#### Notes and tips

- You can use `CountVectorizer` from `scikit-learn` or any other library available for Bag-of-Words representation.
- You should select a classification method from the following set of classifiers: `[Naive Bayes, Support Vector Machine, Logistic Regression, Random Forest]`. You can use `scikit-learn`, `nltk`, or any other library for the classifier implementations. 
- You should **not** use the test set `user_reviews_test.csv` during your training process. You should use `user_reviews_train.csv` only.
- You may add a validation step in your training process. To do this, you can further split the `user_reviews_train.csv` data and apply k-fold cross validation.

In [6]:
# 1) Load train and test data
train_df = pd.read_csv("data/user_review_train.csv")
test_df  = pd.read_csv("data/user_review_test.csv")

# Make sure we have 'review' and 'sentiment' columns
y_train = train_df["sentiment"].values
y_test  = test_df["sentiment"].values

# 2) Preprocess all reviews using the given function

train_tokens_list = [preprocess_review(str(r)) for r in train_df["review"]]
test_tokens_list  = [preprocess_review(str(r)) for r in test_df["review"]]

# 3) Build vocabulary: most frequent 100 tokens in the TRAIN set

freq = Counter()
for tokens in train_tokens_list:
    freq.update(tokens)

most_common_100 = [w for w, c in freq.most_common(100)]
vocab_index = {w: i for i, w in enumerate(most_common_100)}
vocab_set = set(most_common_100)

print("Top 10 of the 100 most frequent tokens:", most_common_100[:10])
print("Vocabulary size:", len(most_common_100))


# 4) Bag-of-Words representation (counts of top-100 tokens)

def bow_from_tokens(tokens, vocab_index):
    vec = np.zeros(len(vocab_index), dtype=np.float32)
    for t in tokens:
        if t in vocab_index:
            vec[vocab_index[t]] += 1.0
    return vec

X_train_bow = np.vstack([bow_from_tokens(tokens, vocab_index) for tokens in train_tokens_list])
X_test_bow  = np.vstack([bow_from_tokens(tokens, vocab_index) for tokens in test_tokens_list])

print("BoW feature matrix shapes:", X_train_bow.shape, X_test_bow.shape)


# 5) Word2Vec-based representation (average of W2V vectors for tokens in top-100)

def w2v_review_vector(tokens, model, allowed_tokens):
    vecs = []
    for t in tokens:
        if t in allowed_tokens and t in model.wv:
            vecs.append(model.wv[t])
    if len(vecs) == 0:
        return np.zeros(model.vector_size, dtype=np.float32)
    return np.mean(vecs, axis=0)

X_train_w2v = np.vstack([
    w2v_review_vector(tokens, w2v_model, vocab_set) for tokens in train_tokens_list
])
X_test_w2v = np.vstack([
    w2v_review_vector(tokens, w2v_model, vocab_set) for tokens in test_tokens_list
])

print("Word2Vec feature matrix shapes:", X_train_w2v.shape, X_test_w2v.shape)


# 6) Train classifiers (Logistic Regression for both)

clf_bow = LogisticRegression(max_iter=1000)
clf_bow.fit(X_train_bow, y_train)
y_pred_bow = clf_bow.predict(X_test_bow)

clf_w2v = LogisticRegression(max_iter=1000)
clf_w2v.fit(X_train_w2v, y_train)
y_pred_w2v = clf_w2v.predict(X_test_w2v)


# 7) Compute evaluation metrics

def evaluate_model(y_true, y_pred, name="model"):
    acc  = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred)
    rec  = recall_score(y_true, y_pred)
    f1   = f1_score(y_true, y_pred)
    print(f"=== {name} ===")
    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1-score : {f1:.4f}")
    print()
    return acc, prec, rec, f1

metrics_bow  = evaluate_model(y_test, y_pred_bow,  name="Bag-of-Words + LogisticRegression")
metrics_w2v  = evaluate_model(y_test, y_pred_w2v,  name="Word2Vec-avg (top-100) + LogisticRegression")

Top 10 of the 100 most frequent tokens: ['.', 'phone', ',', 'good', 'camera', 'battery', 'mobile', '...', '..', 'product']
Vocabulary size: 100
BoW feature matrix shapes: (14675, 100) (1675, 100)
Word2Vec feature matrix shapes: (14675, 100) (1675, 100)
=== Bag-of-Words + LogisticRegression ===
Accuracy : 0.8119
Precision: 0.8313
Recall   : 0.8442
F1-score : 0.8377

=== Word2Vec-avg (top-100) + LogisticRegression ===
Accuracy : 0.7904
Precision: 0.8408
Recall   : 0.7840
F1-score : 0.8114



In [None]:
# Q2.A - RNN (Bidirectional LSTM) MODEL

embedding_dim = 100

rnn_model = Sequential([
    Embedding(
        input_dim=vocab_size,
        output_dim=embedding_dim,
        input_length=maxlen
    ),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

rnn_model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

rnn_model.summary()

# Train and measure time
start_time = time.time()
history_rnn = rnn_model.fit(
    X_train_pad,
    y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.1,
    verbose=1
)
end_time = time.time()
rnn_train_time = end_time - start_time
print("RNN training time (seconds):", rnn_train_time)

# Predictions
y_pred_rnn_proba = rnn_model.predict(X_test_pad).reshape(-1)
y_pred_rnn = (y_pred_rnn_proba >= 0.5).astype(int)

# Metrics
cm_rnn = confusion_matrix(y_test, y_pred_rnn)
acc_rnn = accuracy_score(y_test, y_pred_rnn)
prec_rnn = precision_score(y_test, y_pred_rnn)
rec_rnn = recall_score(y_test, y_pred_rnn)
f1_rnn = f1_score(y_test, y_pred_rnn)
auc_rnn = roc_auc_score(y_test, y_pred_rnn_proba)

print("=== RNN Confusion Matrix ===")
print(cm_rnn)
print(f"RNN Accuracy : {acc_rnn:.4f}")
print(f"RNN Precision: {prec_rnn:.4f}")
print(f"RNN Recall   : {rec_rnn:.4f}")
print(f"RNN F1-score : {f1_rnn:.4f}")
print(f"RNN AUC      : {auc_rnn:.4f}")
print(f"RNN Training time (s): {rnn_train_time:.2f}")

# Q2.A - RNN (Bidirectional LSTM) MODEL

embedding_dim = 100

rnn_model = Sequential([
    Embedding(
        input_dim=vocab_size,
        output_dim=embedding_dim,
        input_length=maxlen
    ),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

rnn_model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

rnn_model.summary()

# Train and measure time
start_time = time.time()
history_rnn = rnn_model.fit(
    X_train_pad,
    y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.1,
    verbose=1
)
end_time = time.time()
rnn_train_time = end_time - start_time
print("RNN training time (seconds):", rnn_train_time)

# Predictions
y_pred_rnn_proba = rnn_model.predict(X_test_pad).reshape(-1)
y_pred_rnn = (y_pred_rnn_proba >= 0.5).astype(int)

# Metrics
cm_rnn = confusion_matrix(y_test, y_pred_rnn)
acc_rnn = accuracy_score(y_test, y_pred_rnn)
prec_rnn = precision_score(y_test, y_pred_rnn)
rec_rnn = recall_score(y_test, y_pred_rnn)
f1_rnn = f1_score(y_test, y_pred_rnn)
auc_rnn = roc_auc_score(y_test, y_pred_rnn_proba)

print("=== RNN Confusion Matrix ===")
print(cm_rnn)
print(f"RNN Accuracy : {acc_rnn:.4f}")
print(f"RNN Precision: {prec_rnn:.4f}")
print(f"RNN Recall   : {rec_rnn:.4f}")
print(f"RNN F1-score : {f1_rnn:.4f}")
print(f"RNN AUC      : {auc_rnn:.4f}")
print(f"RNN Training time (s): {rnn_train_time:.2f}")

In [None]:
# Q2.B - implementation of TextCNN
# you can add cells below if needed

# Q2.B - TextCNN MODEL

cnn_model = Sequential([
    Embedding(
        input_dim=vocab_size,
        output_dim=embedding_dim,
        input_length=maxlen
    ),
    Conv1D(filters=128, kernel_size=3, activation='relu'),
    GlobalMaxPooling1D(),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

cnn_model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

cnn_model.summary()

# Train and measure time
start_time = time.time()
history_cnn = cnn_model.fit(
    X_train_pad,
    y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.1,
    verbose=1
)
end_time = time.time()
cnn_train_time = end_time - start_time
print("TextCNN training time (seconds):", cnn_train_time)

# Predictions
y_pred_cnn_proba = cnn_model.predict(X_test_pad).reshape(-1)
y_pred_cnn = (y_pred_cnn_proba >= 0.5).astype(int)

# Metrics
cm_cnn = confusion_matrix(y_test, y_pred_cnn)
acc_cnn = accuracy_score(y_test, y_pred_cnn)
prec_cnn = precision_score(y_test, y_pred_cnn)
rec_cnn = recall_score(y_test, y_pred_cnn)
f1_cnn = f1_score(y_test, y_pred_cnn)
auc_cnn = roc_auc_score(y_test, y_pred_cnn_proba)

print("=== TextCNN Confusion Matrix ===")
print(cm_cnn)
print(f"TextCNN Accuracy : {acc_cnn:.4f}")
print(f"TextCNN Precision: {prec_cnn:.4f}")
print(f"TextCNN Recall   : {rec_cnn:.4f}")
print(f"TextCNN F1-score : {f1_cnn:.4f}")
print(f"TextCNN AUC      : {auc_cnn:.4f}")
print(f"TextCNN Training time (s): {cnn_train_time:.2f}")

### Q2.C - discussion

Write your discussion here.