## Load IMDB Dataset (from Keras)

In [3]:
pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-win_amd64.whl.metadata (4.1 kB)
Collecting libclang>=13.0.0 (from tensorflow)
  Using cached libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting tensorboard~=2.19.0 (from tensorflow)
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1 (from tensorflow)
  Using cached tensorflow_io_gcs_filesystem-0.31.0-cp311-cp311-win_amd64.whl.metadata (14 kB)
Downloading tensorflow-2.19.0-cp311-cp311-win_amd64.whl (375.9 MB)
   ---------------------------------------- 0.0/375.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/375.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/375.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/375.9 MB ? eta -:--:--
   ---------------------------------------- 0.0/375.9 MB ? eta -:--:--
   ---------------------------------------- 0.5/375.9 MB 1.9 MB/s eta 0:03:21
   

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-cpu 2.18.1 requires tensorboard<2.19,>=2.18, but you have tensorboard 2.19.0 which is incompatible.


In [1]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn

ModuleNotFoundError: No module named 'tensorflow.keras'

In [None]:
# parameters

vocab_size = 10000
max_len = 500

# Load the dataset

(X_train,y_train), (X_test, y_test) = imdb.load_data(num_words=vocab_size)

In [None]:
X_train = pad_sequences(X_train,maxlen=max_len, padding='post',truncating='post')
X_test = pad_sequences(X_test,maxlen=max_len, padding='post',truncating='post')

##  PyTorch Dataset and DataLoader

In [None]:
class IMDBDataset(Dataset):
  def __init__(self, texts, labels):
    self.texts = torch.LongTensor(texts)
    self.labels = torch.FloatTensor(labels)

  def __len__(self):
    return len(self.labels)

  def __getitem__(self, idx):
    return self.texts[idx], self.labels[idx]

In [None]:
## DataLoader

train_data = IMDBDataset(X_train, y_train)
test_data = IMDBDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

## Define the LSTM Model

In [None]:
class LSTM(nn.Module):
  def __init__(self,vocab_size, embed_dim, hidden_dim):
    super(LSTM, self).__init__()
    self.embedding = nn.Embedding(vocab_size, embed_dim)
    self.lstm = nn.LSTM(embed_dim, hidden_dim,batch_first=True)
    self.fc = nn.Linear(hidden_dim, 1)

  def forward(self,x):
    x = self.embedding(x)
    output, _ = self.lstm(x)
    last_hidden = output[:,-1,:] # last time step
    out = self.fc(last_hidden)
    return torch.sigmoid(out).squeeze(1)

## ⚙️ Model Initialization


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model_1 = LSTM(vocab_size=vocab_size, embed_dim=128, hidden_dim=64).to(device)

loss_fn = nn.BCELoss()

optimizer = torch.optim.Adam(model_1.parameters(),lr=0.001)

NameError: name 'torch' is not defined

## Training and Evaluation Functions

In [8]:
def train(model,dataloader):
  model.train()
  total_loss = 0

  for x, y in dataloader:
    x, y = x.to(device), y.to(device)

    outputs = model(x)
    loss = loss_fn(outputs, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_loss += loss.item()

  return total_loss / len(dataloader)

In [9]:
def evaluate(model, dataloader):
  model.eval()

  correct, total = 0, 0

  with torch.no_grad():
    for x, y in dataloader:
      x, y = x.to(device), y.to(device)

      outputs = model(x)
      preds = (outputs > 0.5).float()

      correct += (preds == y).sum().item()
      total += y.size(0)

    return correct / total

In [10]:
for epoch in range(5):
  train_loss = train(model_1, train_loader)
  test_acc = evaluate(model_1, test_loader)
  print(f"Epoch {epoch+1}, Loss : {train_loss:.4f}, Test Acc :{test_acc:.4f}")

Epoch 1, Loss : 0.6940, Test Acc :0.5042
Epoch 2, Loss : 0.6912, Test Acc :0.4990
Epoch 3, Loss : 0.6837, Test Acc :0.5002
Epoch 4, Loss : 0.6649, Test Acc :0.5066
Epoch 5, Loss : 0.6478, Test Acc :0.5068


## Define the BI-LSTM Model

In [None]:
class LSTM(nn.Module):
  def __init__(self,vocab_size, embed_dim, hidden_dim):
    super(LSTM, self).__init__()
    self.embedding = nn.Embedding(vocab_size, embed_dim)
    self.lstm = nn.LSTM(embed_dim, hidden_dim,batch_first=True)
    self.fc = nn.Linear(hidden_dim, 1)

  def forward(self,x):
    x = self.embedding(x)
    output, _ = self.lstm(x)
    last_hidden = output[:,-1,:] # last time step
    out = self.fc(last_hidden)
    return torch.sigmoid(out).squeeze(1)