In [None]:
!pip install transformers
!pip install datasets

In [None]:
from datasets import load_dataset
dataset = load_dataset('turkish_product_reviews', split = "train[:10%]")

In [None]:
from transformers import AutoTokenizer, AutoModel

PRE_TRAINED_MODEL_NAME = 'savasy/bert-base-turkish-sentiment-cased'

tokenizer = AutoTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
model = AutoModel.from_pretrained(PRE_TRAINED_MODEL_NAME)

In [None]:
dataset

In [None]:
dataset[0]["sentence"]

In [None]:
def read_split(split_dir):
    texts = []
    labels = []
    #values_dataset = list(dataset.values())
    for i in dataset:
      texts.append(i["sentence"])
      labels.append(i["sentiment"])

    return texts, labels

In [None]:
texts, labels = read_split(dataset)

In [None]:
from sklearn.model_selection import train_test_split
train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=.2)

In [None]:
train_texts, val_texts, train_labels, val_labels = train_test_split(train_texts, train_labels, test_size=.2)

In [None]:
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
val_encodings = tokenizer(val_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

In [None]:
import torch

class ReviewDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = ReviewDataset(train_encodings, train_labels)
val_dataset = ReviewDataset(val_encodings, val_labels)
test_dataset = ReviewDataset(test_encodings, test_labels)

In [None]:
from torch.utils.data import DataLoader
from transformers import AdamW

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = AutoModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
model.to(device)
model.train()

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)

optim = AdamW(model.parameters(), lr=5e-5)

for epoch in range(3):
    for batch in train_loader:
        optim.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask)
        loss = outputs[0]
        loss.sum().backward()
        optim.step()

model.eval()

In [None]:
#torch.save(model, "/content/drive/MyDrive/Colab_Notebooks/pytorch_model")

In [None]:
review_text = "Bu ürünü çok sevdim"
encoded_review = tokenizer.encode_plus(review_text)

In [None]:
input_ids = encoded_review['input_ids']
attention_mask = encoded_review['attention_mask']