In [1]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
data = [
    ("I love this movie", 1),
    ("This film is terrible", 0),
    ("What a great movie", 1),
    ("I hated this film", 0),
    ("Amazing acting and good story", 1),
    ("Bad plot and boring", 0),
]

In [3]:
tokens = [word.split() for word in [sentence[0].lower() for sentence in data]]

unique_tokens = set([word for sentence in tokens for word in sentence])

vocab = {word: idx for idx, word in enumerate(unique_tokens, start=2)}
(vocab['<PAD>'], vocab['<UNK>']) = (0, 1)

In [4]:
X_tensor = torch.tensor(
    pad_sequences(
        [[vocab.get(word, vocab['<PAD>']) for word in token] for token in tokens],
        padding="post"
    ),
    dtype=torch.long
)

In [5]:
y_tensor = torch.tensor([label for _, label in data], dtype=torch.long)

In [6]:
class SimpleNNetwork(nn.Module):
    def __init__(self, vocab_size, emd_dim):
        super().__init__()
        self.embedder = nn.Embedding(
            num_embeddings=vocab_size,
            embedding_dim=emd_dim,
            padding_idx=vocab['<PAD>']
        )
        self.rnn = nn.RNN(emd_dim, 64, batch_first=True)
        self.flinear = nn.Linear(64, 2) 
        
    def forward(self, x):
        emb = self.embedder(x)
        output, hidden = self.rnn(emb)
        return self.flinear(hidden.squeeze(0))

In [7]:
vocab_size = len(vocab)
embedding_dim = 50

model = SimpleNNetwork(vocab_size, embedding_dim)

criterion = nn.CrossEntropyLoss()   
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [11]:
epochs = 10
for epoch in range(epochs):
    total_loss = 0
    model.train()
    optimizer.zero_grad()
    output = model(X_tensor)
    loss = criterion(output, y_tensor)
    loss.backward()
    optimizer.step()
    total_loss += loss.item()
    if (epoch + 1) % 1 == 0:
        print(f"Epoch {epoch + 1}, Loss = {total_loss:.4f}")

Epoch 1, Loss = 0.0000
Epoch 2, Loss = 0.0000
Epoch 3, Loss = 0.0000
Epoch 4, Loss = 0.0000
Epoch 5, Loss = 0.0000
Epoch 6, Loss = 0.0000
Epoch 7, Loss = 0.0000
Epoch 8, Loss = 0.0000
Epoch 9, Loss = 0.0000
Epoch 10, Loss = 0.0000


In [14]:
test_data = [
    ("I love this film", 1),
    ("The plot is boring", 0),
    ("Amazing movie", 1),
    ("I hated the story", 0),
]

test_tokens = [sentence.lower().split() for sentence, _ in test_data]

In [15]:
X_test = torch.tensor(
    pad_sequences(
        [[vocab.get(word, vocab['<PAD>']) for word in token] for token in test_tokens],
        padding="post"
    ),
    dtype=torch.long
)

In [16]:
y_test = torch.tensor([label for _, label in test_data], dtype=torch.long)

In [None]:
model.eval()
with torch.no_grad():
    logits = model(X_test)
    preds = torch.argmax(logits, dim=1)
    print("Predictions:", preds.tolist())

Predictions: [1, 1, 0, 1]


In [18]:
from sklearn.metrics import accuracy_score
print("Accuracy : ", accuracy_score(y_test, preds))

Accuracy :  0.25
