In [None]:
# Install all the required dependencies for the project
!pip install spacy==2.2.4 --quiet
!python -m spacy download en_core_web_md
!pip install pytorch-lightning==1.6.5 spacy==2.2.4

In [2]:
import numpy as np
import en_core_web_md
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset

In [3]:
nlp = en_core_web_md.load()

In [4]:
# Positive sentences
pos_sentences = ['I love this product',
                 'This is a great movie',
                 'The food was delicious',
                 'The book was awesome']

# Negative sentences
neg_sentences = ['I hate this product',
                 'This is a terrible movie',
                 'The food was awful',
                 'The book was boring']

# Neutral sentences
neu_sentences = ['The weather is nice today',
                 'I am feeling okay',
                 'This book is okay',
                 'I am ok with this breakfast']

# Combine all sentences and create labels
sentences = pos_sentences + neg_sentences + neu_sentences
labels = [1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2]

In [5]:
# Tokenize and vectorize sentences
# Convert sentence to list of word embeddings, e.g.[(300), (300), (300), (300)]

# maximum number of words embeddings in one sentence
max_len = 20

sentences_vectors = []
for sentence in sentences:
  tokens = nlp(sentence)
  vectors = []
  for token in tokens:
    if token.has_vector:
      vectors.append(token.vector)
  vectors = vectors[:max_len] # Truncate vectors if they exceed max length
  vectors += [[0] * 300] * (max_len - len(vectors)) # Pad vectors with zeros
  sentences_vectors.append(vectors)

sentences_vectors = np.array(sentences_vectors)

In [6]:
# Split data into training and testing sets
train_threshold = 7
train_vectors = sentences_vectors[:train_threshold]
train_labels = labels[:train_threshold]
test_vectors = sentences_vectors[train_threshold:]
test_labels = labels[train_threshold:]

# Define PyTorch dataset
class SentimentDataset(Dataset):
  def __init__(self, vectors, labels):
    self.vectors = torch.tensor(vectors).float()
    self.labels = torch.tensor(labels).long()

  def __getitem__(self, index):
    return self.vectors[index], self.labels[index]

  def __len__(self):
    return len(self.labels)

In [7]:
# Define PyTorch LSTM model
class SentimentLSTM(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super(SentimentLSTM, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
    self.fc = nn.Linear(hidden_size, num_classes)

  def forward(self, batch):
    h0 = torch.zeros(self.num_layers, batch.size(0), self.hidden_size).to(batch.device)
    c0 = torch.zeros(self.num_layers, batch.size(0), self.hidden_size).to(batch.device)
    # out_lstm - contains the hidden state of the LSTM at each time step
    # I could use it if I need to access all these states.
    # If I need just combined, resulted state I could use "hidden_state" instead.
    out_lstm, (hidden_state, cell_state) = self.lstm(batch, (h0, c0))
    out_linear = self.fc(hidden_state[-1])
    return out_linear

In [8]:
# Define model hyperparameters
input_size = 300
hidden_size = 128
num_layers = 1
num_classes = 3
batch_size = 1

# Create PyTorch data loaders
train_dataset = SentimentDataset(train_vectors, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_dataset = SentimentDataset(test_vectors, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Create model
model = SentimentLSTM(input_size, hidden_size, num_layers, num_classes)

In [14]:
# Train model
num_epochs = 100

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):
  for i, (vectors, labels) in enumerate(train_loader):
    outputs = model(vectors)
    loss = criterion(outputs, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 2 == 0:
      print ('Epoch [{}/{}], Loss: {:.4f}'
             .format(epoch+1, num_epochs, loss.item()))


Epoch [1/100], Loss: 0.0003
Epoch [1/100], Loss: 0.0002
Epoch [1/100], Loss: 8.5776
Epoch [2/100], Loss: 0.0003
Epoch [2/100], Loss: 0.0004
Epoch [2/100], Loss: 0.0002
Epoch [3/100], Loss: 0.0006
Epoch [3/100], Loss: 0.0006
Epoch [3/100], Loss: 0.0002
Epoch [4/100], Loss: 0.0300
Epoch [4/100], Loss: 0.0008
Epoch [4/100], Loss: 0.0002
Epoch [5/100], Loss: 0.0008
Epoch [5/100], Loss: 0.0008
Epoch [5/100], Loss: 0.0002
Epoch [6/100], Loss: 0.0009
Epoch [6/100], Loss: 0.0009
Epoch [6/100], Loss: 6.7296
Epoch [7/100], Loss: 0.0021
Epoch [7/100], Loss: 0.0030
Epoch [7/100], Loss: 0.0002
Epoch [8/100], Loss: 0.0058
Epoch [8/100], Loss: 0.0068
Epoch [8/100], Loss: 0.0002
Epoch [9/100], Loss: 0.0093
Epoch [9/100], Loss: 0.0097
Epoch [9/100], Loss: 0.0002
Epoch [10/100], Loss: 0.0104
Epoch [10/100], Loss: 0.0101
Epoch [10/100], Loss: 0.0002
Epoch [11/100], Loss: 0.0099
Epoch [11/100], Loss: 0.0094
Epoch [11/100], Loss: 0.0002
Epoch [12/100], Loss: 0.0091
Epoch [12/100], Loss: 0.0086
Epoch [12/10

In [15]:
# Evaluate the model on the test set
correct = 0
total = 0
with torch.no_grad():
  for batch in test_loader:
    inputs, labels = batch
    outputs = model(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print('Accuracy: {:.2f}%'.format(accuracy))

Accuracy: 20.00%


In [16]:
# Test model

# Tokenize the text
text = "bad"

tokens = nlp(text)
vectors = []
for token in tokens:
  if token.has_vector:
    vectors.append(token.vector)
vectors = vectors[:max_len] # Truncate vectors if they exceed max length
vectors += [[0] * 300] * (max_len - len(vectors)) # Pad vectors with zeros

# Convert to PyTorch tensor
inputs = torch.tensor([vectors])

# Pass input through model
model.eval()
with torch.no_grad():
  outputs = model(inputs)

# Get predicted class
probs = torch.softmax(outputs, dim=1)
_, predicted = torch.max(probs.data, 1)

# Print predicted class
print("Predicted class:", predicted)

Predicted class: tensor([0])
