In [3]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Custom layer to handle tf.reduce_mean
class ReduceMeanLayer(layers.Layer):
    def __init__(self, axis, **kwargs):
        super(ReduceMeanLayer, self).__init__(**kwargs)
        self.axis = axis

    def call(self, inputs):
        return tf.reduce_mean(inputs, axis=self.axis)

# Building the model
def build_model(vocab_size, max_len, embed_dim, num_heads):
    inputs = layers.Input(shape=(max_len,), dtype=tf.int32)  # Fixed length for now
    
    # Embedding layer
    embedding_layer = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)(inputs)
    
    # Multi-Head Attention layer
    attention_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(embedding_layer, embedding_layer)
    
    # Apply ReduceMeanLayer to process the output from attention
    attention_output = ReduceMeanLayer(axis=1)(attention_output)
    
    # Add dropout and dense layers
    dropout = layers.Dropout(0.2)(attention_output)
    outputs = layers.Dense(1, activation="sigmoid")(dropout)
    
    # Create and compile the model
    model = models.Model(inputs, outputs)
    return model

# Example parameters (you can adjust them as needed)
vocab_size = 5000  # Vocabulary size
max_len = 100  # Max sentence length (you can adjust this as needed)
embed_dim = 128  # Embedding dimension
num_heads = 4  # Number of attention heads

# Build and compile the model
model = build_model(vocab_size, max_len, embed_dim, num_heads)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Sample data (for demonstration)
texts = [
    "The food in the restaurant is not good",  # Negative sentiment
    "I love this place, the food is amazing",   # Positive sentiment
    "It was a terrible experience",            # Negative sentiment
    "Best meal I've had in years",             # Positive sentiment
]

# Sample labels (1 for positive, 0 for negative)
labels = [0, 1, 0, 1]

# Tokenize the sentences
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to ensure uniform input length
padded_sequences = pad_sequences(sequences, maxlen=max_len)

# Convert labels to numpy array
labels = np.array(labels)

# Model Training
model.fit(padded_sequences, labels, epochs=5, batch_size=2)

# Test the model with a new sentence
test_text = "The service was horrible and the food was cold"
test_sequence = tokenizer.texts_to_sequences([test_text])
test_padded_sequence = pad_sequences(test_sequence, maxlen=max_len)

# Predict sentiment
pred = model.predict(test_padded_sequence)
print(f"Predicted sentiment: {'Positive' if pred[0] > 0.5 else 'Negative'}")


Epoch 1/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - accuracy: 0.8333 - loss: 0.6947
Epoch 2/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.6667 - loss: 0.7083
Epoch 3/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.5000 - loss: 0.6980
Epoch 4/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.5000 - loss: 0.6947
Epoch 5/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.6667 - loss: 0.6897
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
Predicted sentiment: Negative
