In [6]:
import torch
import torch.nn as nn
import os
from pyprojroot import here
os.environ["HF_HOME"] = str(here("cache/HF/"))
from transformers import AutoTokenizer, AutoModel

class TokenizationAndSentimentModel(nn.Module):
  def __init__(self):
    super(TokenizationAndSentimentModel, self).__init__()
    self.tokenizer = AutoTokenizer.from_pretrained("prajjwal1/bert-tiny")
    self.encoder = AutoModel.from_pretrained("prajjwal1/bert-tiny")
    self.classifier = nn.Linear(self.encoder.config.hidden_size, 3)  # 3 sentiment classes

  def forward(self, text):
    tokens = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    outputs = self.encoder(**tokens)
    cls_rep = outputs.last_hidden_state[:, 0, :]  # Use [CLS] token representation
    logits = self.classifier(cls_rep)
    return torch.sigmoid(logits)


In [7]:
model = TokenizationAndSentimentModel()
model.eval()

input_text = ["I love this!"]
with torch.no_grad():
  output = model(input_text)

print(f"Input text: {input_text[0]}")
print(f"Output probabilities: {output.numpy()}")

config.json:   0%|          | 0.00/285 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/17.8M [00:00<?, ?B/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Input text: I love this!
Output probabilities: [[0.40785298 0.5385456  0.53477716]]


In [8]:
torch.save(model.state_dict(), here("models/dummy_model_3.pt"))

model.safetensors:   0%|          | 0.00/17.7M [00:00<?, ?B/s]