In [None]:
try:
    import IPython
    import subprocess
    get_ipython = IPython.get_ipython
    if get_ipython() is not None:
        # Notebook/Colab/Kaggle
        print("Upgrading keras-nlp and keras...")
        subprocess.run(['pip', 'install', '-U', 'keras-nlp', 'keras'], check=False)
except Exception:
    # Fallback for script environments
    import os
    os.system('pip install -U keras-nlp keras')

Upgrading keras-nlp and keras...


In [None]:
import kagglehub
kagglehub.login()

VBox(children=(HTML(value='<center> <img\nsrc=https://www.kaggle.com/static/images/site-logo.png\nalt=\'Kaggle…

Kaggle credentials set.
Kaggle credentials successfully validated.


In [None]:
from google.colab import files
import pandas as pd
import numpy as np
import tensorflow as tf
import keras_nlp
from sklearn.model_selection import train_test_split

In [None]:
# --- Upload and Load Data ---
print("Please upload Sample.csv when prompted.")
uploaded = files.upload()
df = pd.read_csv('Sample.csv')

df['Review'] = df['Review'].fillna('').astype(str)
df['Implicit'] = df['Implicit'].fillna('').astype(str)
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

Please upload Sample.csv when prompted.


Saving Sample.csv to Sample.csv


In [None]:
# --- Prepare Dataset ---
max_length = 256
BATCH_SIZE = 2
EPOCHS = 3

def make_dataset(df, input_col, output_col, max_length, batch_size):
    ds = tf.data.Dataset.from_generator(
        lambda: (
            {"prompts": row[input_col], "responses": row[output_col]} for _, row in df.iterrows()
        ),
        output_signature={
            "prompts": tf.TensorSpec(shape=(), dtype=tf.string),
            "responses": tf.TensorSpec(shape=(), dtype=tf.string),
        }
    )
    return ds.batch(batch_size)

train_ds = make_dataset(train_df, "Review", "Implicit", max_length, BATCH_SIZE).repeat()
val_ds = make_dataset(val_df, "Review", "Implicit", max_length, BATCH_SIZE)

In [None]:
# --- Load and Compile Model ---
gemma_lm = keras_nlp.models.Gemma3CausalLM.from_preset("kaggle://keras/gemma3/Keras/gemma3_1b/3")
gemma_lm.backbone.enable_lora(rank=4)
gemma_lm.preprocessor.sequence_length = max_length
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
gemma_lm.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=optimizer,
)

In [None]:
# --- Train ---
steps_per_epoch = len(train_df) // BATCH_SIZE
history = gemma_lm.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch
)

Epoch 1/3
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1515s[0m 72s/step - loss: 0.3058 - sparse_categorical_accuracy: 0.5376 - val_loss: 0.2348 - val_sparse_categorical_accuracy: 0.4436
Epoch 2/3




[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1268s[0m 64s/step - loss: 0.2999 - sparse_categorical_accuracy: 0.5413 - val_loss: 0.2296 - val_sparse_categorical_accuracy: 0.4436
Epoch 3/3
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1255s[0m 63s/step - loss: 0.2915 - sparse_categorical_accuracy: 0.5451 - val_loss: 0.2216 - val_sparse_categorical_accuracy: 0.4436


In [None]:
# --- Evaluate on 5 samples ---
print("\nSample implicit aspect predictions (aspect phrase generation):\n")
for i, row in val_df.head(5).iterrows():
    prompt = row['Review']
    target = row['Implicit']
    pred = gemma_lm.generate(prompt, max_length=max_length)
    print(f"Review: {prompt}\nTarget: {target}\nPred:   {pred}\n{'-'*40}")


Sample implicit aspect predictions (aspect phrase generation):

Review: terrible blue order ko pero pink yung dumating sakin para sana sa anak kong lalaki pero pink yung dumating sakin nakaka dissapoint
Target: blue order ko pero pink yung dumating - DEL#CORR, para sana sa anak kong lalaki pero pink yung dumating - DEL#CORR
Pred:   terrible blue order ko pero pink yung dumating sakin para sana sa anak kong lalaki pero pink yung dumating sakin nakaka dissapoint talaga
I ordered a blue shirt and it came in pink. I was so disappointed. I was hoping for a blue shirt.
I ordered a blue shirt and it came in pink. I was so disappointed. I was hoping for a blue shirt.
I ordered a blue shirt and it came in pink. I was so disappointed. I was hoping for a blue shirt.
I ordered a blue shirt and it came in pink. I was so disappointed. I was hoping for a blue shirt.
I ordered a blue shirt and it came in pink. I was so disappointed. I was hoping for a blue shirt.
I ordered a blue shirt and it came in

In [None]:
# --- String Match Accuracy (on 5 samples) ---
print("\nEvaluating Implicit Aspect Extraction (String Match Accuracy) on 5 samples...")
implicit_preds = []
implicit_trues = []
for i, row in val_df.head(5).iterrows():
    prompt = row['Review']
    target = row['Implicit']
    pred = gemma_lm.generate(prompt, max_length=max_length)
    implicit_preds.append(pred.strip())
    implicit_trues.append(target.strip())
implicit_acc = np.mean([p == t for p, t in zip(implicit_preds, implicit_trues)])
print(f"Implicit String Match Accuracy (5 samples): {implicit_acc:.3f}")


Evaluating Implicit Aspect Extraction (String Match Accuracy) on 5 samples...
Implicit String Match Accuracy (5 samples): 0.000


In [None]:
# --- Phrase-level F1 Score (on 5 samples) ---
print("\nEvaluating Implicit Aspect Extraction (Phrase-level F1 Score) on 5 samples...")
from sklearn.metrics import f1_score

def split_phrases(s):
    return set([x.strip() for x in s.split(',') if x.strip()])

all_f1s = []
for pred, true in zip(implicit_preds, implicit_trues):
    pred_set = split_phrases(pred)
    true_set = split_phrases(true)
    all_phrases = list(pred_set | true_set)
    y_true = [1 if phrase in true_set else 0 for phrase in all_phrases]
    y_pred = [1 if phrase in pred_set else 0 for phrase in all_phrases]
    if sum(y_true) > 0 or sum(y_pred) > 0:
        all_f1s.append(f1_score(y_true, y_pred, zero_division=0))
if all_f1s:
    print(f"Implicit Phrase-level F1 (5 samples): {np.mean(all_f1s):.3f}")
else:
    print("No valid F1 scores to report.")


Evaluating Implicit Aspect Extraction (Phrase-level F1 Score) on 5 samples...
Implicit Phrase-level F1 (5 samples): 0.000
