In [6]:
# Install all the required dependencies for the project
!pip install spacy==2.2.4 --quiet
!python -m spacy download en_core_web_md
!pip install pytorch-lightning==1.6.5 spacy==2.2.4


You should consider upgrading via the '/Users/vitalii.mishchenko/Documents/experiments/2302-nlp-course/venv/bin/python -m pip install --upgrade pip' command.[0m
Collecting en_core_web_md==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.2.5/en_core_web_md-2.2.5.tar.gz (96.4 MB)
     |████████████████████████████████| 96.4 MB 1.8 MB/s            
[?25h  Preparing metadata (setup.py) ... [?25ldone
You should consider upgrading via the '/Users/vitalii.mishchenko/Documents/experiments/2302-nlp-course/venv/bin/python -m pip install --upgrade pip' command.[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_md')
You should consider upgrading via the '/Users/vitalii.mishchenko/Documents/experiments/2302-nlp-course/venv/bin/python -m pip install --upgrade pip' command.[0m


In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import en_core_web_md


In [2]:
nlp = en_core_web_md.load()

In [3]:
# Positive sentences
pos_sentences = ['I love this product',
                 'This is a great movie',
                 'The food was delicious']

# Negative sentences
neg_sentences = ['I hate this product',
                 'This is a terrible movie',
                 'The food was awful']

# Neutral sentences
neu_sentences = ['The weather is nice today',
                 'I am feeling okay',
                 'This book is okay']

# Combine all sentences and create labels
sentences = pos_sentences + neg_sentences + neu_sentences
labels = np.concatenate((np.ones(len(pos_sentences)),
                         np.zeros(len(neg_sentences)),
                         np.full(len(neu_sentences), 2)))

In [4]:
# Tokenize and vectorize sentences
max_len = 20
sentences_vectors = []
for sentence in sentences:
  tokens = nlp(sentence)
  vectors = []
  for token in tokens:
    if token.has_vector:
      vectors.append(token.vector)
  vectors = vectors[:max_len] # Truncate vectors if they exceed max length
  vectors += [[0] * 300] * (max_len - len(vectors)) # Pad vectors with zeros
  sentences_vectors.append(vectors)

sentences_vectors = np.array(sentences_vectors)
sentences_vectors

array([[[ 1.87329993e-01,  4.05950010e-01, -5.11740029e-01, ...,
          1.64949998e-01,  1.87570006e-01,  5.38739979e-01],
        [ 1.39489993e-01,  5.34529984e-01, -2.52469987e-01, ...,
         -1.52279995e-02,  8.84080008e-02,  3.02170008e-01],
        [-8.75950009e-02,  3.55019987e-01,  6.38680011e-02, ...,
          3.44600007e-02, -1.50270000e-01,  4.06729996e-01],
        ...,
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00]],

       [[-8.75950009e-02,  3.55019987e-01,  6.38680011e-02, ...,
          3.44600007e-02, -1.50270000e-01,  4.06729996e-01],
        [-8.49609971e-02,  5.01999974e-01,  2.38230010e-03, ...,
         -2.15110004e-01, -2.63040006e

In [5]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset

# Split data into training and testing sets
train_vectors = sentences_vectors[:7]
train_labels = labels[:7]
test_vectors = sentences_vectors[7:]
test_labels = labels[7:]

# Define PyTorch dataset
class SentimentDataset(Dataset):
  def __init__(self, vectors, labels):
    self.vectors = torch.tensor(vectors).float()
    self.labels = torch.tensor(labels).long()

  def __getitem__(self, index):
    return self.vectors[index], self.labels[index]

  def __len__(self):
    return len(self.labels)

In [6]:
# Define PyTorch LSTM model
class SentimentLSTM(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_classes):
    super(SentimentLSTM, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
    self.fc = nn.Linear(hidden_size, num_classes)

  def forward(self, x):
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
    c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
    out, _ = self.lstm(x, (h0, c0))
    out = self.fc(out[:, -1, :])
    return out

In [7]:
# Define model hyperparameters
input_size = 300
hidden_size = 128
num_layers = 2
num_classes = 3
batch_size = 1

model = SentimentLSTM(input_size, hidden_size, num_layers, num_classes)



In [8]:
# Create PyTorch data loaders
train_dataset = SentimentDataset(train_vectors, train_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = SentimentDataset(test_vectors, test_labels)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [9]:
# Train model
num_epochs = 100
total_step = len(train_loader)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):
  for i, (vectors, labels) in enumerate(train_loader):
    outputs = model(vectors)
    loss = criterion(outputs, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 2 == 0:
      print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
             .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


Epoch [1/100], Step [2/7], Loss: 1.1282
Epoch [1/100], Step [4/7], Loss: 1.1109
Epoch [1/100], Step [6/7], Loss: 1.0754
Epoch [2/100], Step [2/7], Loss: 1.0561
Epoch [2/100], Step [4/7], Loss: 1.0961
Epoch [2/100], Step [6/7], Loss: 1.2054
Epoch [3/100], Step [2/7], Loss: 1.0163
Epoch [3/100], Step [4/7], Loss: 1.2442
Epoch [3/100], Step [6/7], Loss: 1.0510
Epoch [4/100], Step [2/7], Loss: 1.0111
Epoch [4/100], Step [4/7], Loss: 1.0328
Epoch [4/100], Step [6/7], Loss: 1.0122
Epoch [5/100], Step [2/7], Loss: 1.0038
Epoch [5/100], Step [4/7], Loss: 1.0026
Epoch [5/100], Step [6/7], Loss: 0.9938
Epoch [6/100], Step [2/7], Loss: 0.9688
Epoch [6/100], Step [4/7], Loss: 1.5254
Epoch [6/100], Step [6/7], Loss: 0.9391
Epoch [7/100], Step [2/7], Loss: 0.8308
Epoch [7/100], Step [4/7], Loss: 0.9008
Epoch [7/100], Step [6/7], Loss: 0.8297
Epoch [8/100], Step [2/7], Loss: 0.7008
Epoch [8/100], Step [4/7], Loss: 0.8532
Epoch [8/100], Step [6/7], Loss: 0.8098
Epoch [9/100], Step [2/7], Loss: 0.7525
