In [1]:
import pandas as pd
from transformers import BertTokenizer, BertModel
import torch
import numpy as np
from torch import nn
from torch.optim import Adam
from tqdm import tqdm
import re

In [2]:
df = pd.read_csv('../../dataframe.csv')
df = df.dropna(subset=['text', 'is_offensive'])
df['is_offensive'] = df['is_offensive'].astype('int')
df['target'] = df['target'].map({'INSULT': 0, 'RACIST': 1, 'SEXIST': 2, 'PROFANITY': 3, 'OTHER': 4})
df = df[~(((df['is_offensive'] == 0) & (df['target'] != 4)) | df['text'].isnull() | (df['is_offensive'] == 1) & (df['target'] == 4))]
df = df[df['is_offensive'] == 1]
df = df.drop(['id','is_offensive'], axis=1)
df.reset_index(drop=True, inplace=True)
for i in range(len(df)):
    if len(df['text'][i]) == 1:
        #delete row
        df = df.drop(i)
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,text,target
0,çürük dişli,0
1,Bu adamın islama ve müslümanlara verdiği zarar...,1
2,erkekler zora gelmez,2
3,Utanmazın götüne kazık sokmuşlar bu tıkırtı ne...,3
4,amını siktiğimin yarrağı,3
...,...,...
8878,Azıcık bile beynin olsa anlardın durumu,0
8879,zenciler hayata bir sıfır yenik başlar,1
8880,Bu Yunanlılar herşeyimizi çalmış resmen,1
8881,Siyahi gençler potansiyel suçlu olarak görünme...,1


In [3]:
manually_cheked = pd.read_csv('../../augmentation/new_generated/manually_checked/combined.csv')
df = pd.concat([df, manually_cheked], ignore_index=True)
df

Unnamed: 0,text,target
0,çürük dişli,0
1,Bu adamın islama ve müslümanlara verdiği zarar...,1
2,erkekler zora gelmez,2
3,Utanmazın götüne kazık sokmuşlar bu tıkırtı ne...,3
4,amını siktiğimin yarrağı,3
...,...,...
10846,lan amina koydugumun şeriatçı orospu çocuğu seni,3
10847,elbistanı görünce tüylerim diken diken oldu...,3
10848,ateist misin dinle aran nasıl sanane aq https...,0
10849,seviye yine yaşa indi bana müsade,0


In [4]:
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    text = re.sub(r'\s+', ' ', text)
    text = re.sub('[0-9]', '', text)
    return text

for i in range(len(df)):
    df['text'][i] = preprocess(df['text'][i])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['text'][i] = preprocess(df['text'][i])


In [5]:
tokenizer = BertTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")

class Dataset(torch.utils.data.Dataset):
    
    def __init__(self, df):

        self.labels = [label for label in df['target']]
        self.texts = [tokenizer(text, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for text in df['text']]

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):

        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_texts, batch_y

In [6]:
# np.random.seed(42)
# df_train, df_val, df_test = np.split(df.sample(frac=1, random_state=42), 
#                                      [int(.8*len(df)), int(.9*len(df))])

# print(len(df_train),len(df_val), len(df_test))

8680 1085 1086


In [6]:
class BertClassifier(nn.Module):

    def __init__(self, dropout=0.5):

        super(BertClassifier, self).__init__()

        self.bert = BertModel.from_pretrained("dbmdz/bert-base-turkish-cased")
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 4)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask, return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)

        return final_layer

In [7]:
def train(model, train_data, val_data, learning_rate, epochs):

    train, val = Dataset(train_data), Dataset(val_data)

    train_dataloader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr= learning_rate)

    if use_cuda:
        model = model.cuda()
        criterion = criterion.cuda()

    for epoch_num in range(epochs):

        total_acc_train = 0
        total_loss_train = 0

        for train_input, train_label in tqdm(train_dataloader):

            train_label = train_label.to(device)
            mask = train_input['attention_mask'].to(device)
            input_id = train_input['input_ids'].squeeze(1).to(device)

            output = model(input_id, mask)
            
            batch_loss = criterion(output, train_label.long())
            total_loss_train += batch_loss.item()
            
            acc = (output.argmax(dim=1) == train_label).sum().item()
            total_acc_train += acc

            model.zero_grad()
            batch_loss.backward()
            optimizer.step()
        
        total_acc_val = 0
        total_loss_val = 0

        with torch.no_grad():

            for val_input, val_label in val_dataloader:

                val_label = val_label.to(device)
                mask = val_input['attention_mask'].to(device)
                input_id = val_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)

                batch_loss = criterion(output, val_label.long())
                total_loss_val += batch_loss.item()
                
                acc = (output.argmax(dim=1) == val_label).sum().item()
                total_acc_val += acc
        
        print(
            f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_data): .3f} \
            | Train Accuracy: {total_acc_train / len(train_data): .3f} \
            | Val Loss: {total_loss_val / len(val_data): .3f} \
            | Val Accuracy: {total_acc_val / len(val_data): .3f}')                  

In [8]:
np.random.seed(42)
df_train = df[:-1]
df_val = df[-1:]

EPOCHS = 2
model = BertClassifier()
LR = 1e-6
              
train(model, df_train, df_val, LR, EPOCHS)

Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 5425/5425 [38:58<00:00,  2.32it/s]


Epochs: 1 | Train Loss:  0.445             | Train Accuracy:  0.631             | Val Loss:  0.145             | Val Accuracy:  1.000


100%|██████████| 5425/5425 [38:56<00:00,  2.32it/s]

Epochs: 2 | Train Loss:  0.127             | Train Accuracy:  0.928             | Val Loss:  0.231             | Val Accuracy:  1.000





In [8]:
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
EPOCHS = 2
LR = 1e-6

# make 10 folds cross validation separating train test validation sets
kf = KFold(n_splits=10, shuffle=True, random_state=42)
i = 1
for train_index, test_index in kf.split(df):
    print(f'This is the {i}th fold')
    df_train, df_test = df.iloc[train_index], df.iloc[test_index]
    df_train, df_val = train_test_split(df_train, test_size=0.1, random_state=42)
    model = BertClassifier()
    train(model, df_train, df_val, LR, EPOCHS)
    i+=1

This is the 1th fold


Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 4394/4394 [31:37<00:00,  2.32it/s]


Epochs: 1 | Train Loss:  0.477             | Train Accuracy:  0.624             | Val Loss:  0.234             | Val Accuracy:  0.864


100%|██████████| 4394/4394 [31:26<00:00,  2.33it/s]


Epochs: 2 | Train Loss:  0.154             | Train Accuracy:  0.910             | Val Loss:  0.156             | Val Accuracy:  0.898
This is the 2th fold


Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 1 | Train Loss:  0.509             | Train Accuracy:  0.568             | Val Loss:  0.231             | Val Accuracy:  0.870


100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 2 | Train Loss:  0.165             | Train Accuracy:  0.903             | Val Loss:  0.120             | Val Accuracy:  0.933
This is the 3th fold


Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 1 | Train Loss:  0.507             | Train Accuracy:  0.591             | Val Loss:  0.256             | Val Accuracy:  0.847


100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 2 | Train Loss:  0.171             | Train Accuracy:  0.900             | Val Loss:  0.142             | Val Accuracy:  0.909
This is the 4th fold


Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 1 | Train Loss:  0.494             | Train Accuracy:  0.591             | Val Loss:  0.229             | Val Accuracy:  0.864


100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 2 | Train Loss:  0.150             | Train Accuracy:  0.910             | Val Loss:  0.139             | Val Accuracy:  0.915
This is the 5th fold


Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 1 | Train Loss:  0.553             | Train Accuracy:  0.514             | Val Loss:  0.282             | Val Accuracy:  0.835


100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 2 | Train Loss:  0.172             | Train Accuracy:  0.899             | Val Loss:  0.148             | Val Accuracy:  0.913
This is the 6th fold


Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 1 | Train Loss:  0.458             | Train Accuracy:  0.637             | Val Loss:  0.245             | Val Accuracy:  0.855


100%|██████████| 4395/4395 [31:26<00:00,  2.33it/s]


Epochs: 2 | Train Loss:  0.150             | Train Accuracy:  0.915             | Val Loss:  0.145             | Val Accuracy:  0.901
This is the 7th fold


Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 1 | Train Loss:  0.487             | Train Accuracy:  0.611             | Val Loss:  0.258             | Val Accuracy:  0.849


100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 2 | Train Loss:  0.169             | Train Accuracy:  0.904             | Val Loss:  0.152             | Val Accuracy:  0.905
This is the 8th fold


Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 1 | Train Loss:  0.546             | Train Accuracy:  0.518             | Val Loss:  0.325             | Val Accuracy:  0.811


100%|██████████| 4395/4395 [31:25<00:00,  2.33it/s]


Epochs: 2 | Train Loss:  0.179             | Train Accuracy:  0.900             | Val Loss:  0.162             | Val Accuracy:  0.901
This is the 9th fold


Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 4395/4395 [31:24<00:00,  2.33it/s]


Epochs: 1 | Train Loss:  0.557             | Train Accuracy:  0.502             | Val Loss:  0.343             | Val Accuracy:  0.801


100%|██████████| 4395/4395 [31:26<00:00,  2.33it/s]


Epochs: 2 | Train Loss:  0.191             | Train Accuracy:  0.889             | Val Loss:  0.148             | Val Accuracy:  0.909
This is the 10th fold


Some weights of the model checkpoint at dbmdz/bert-base-turkish-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 4395/4395 [31:27<00:00,  2.33it/s]


Epochs: 1 | Train Loss:  0.477             | Train Accuracy:  0.613             | Val Loss:  0.222             | Val Accuracy:  0.886


100%|██████████| 4395/4395 [31:28<00:00,  2.33it/s]


Epochs: 2 | Train Loss:  0.169             | Train Accuracy:  0.899             | Val Loss:  0.137             | Val Accuracy:  0.916


In [10]:
# 1. fold 1. epoch -> Train Loss:  0.477             | Train Accuracy:  0.624             | Val Loss:  0.234             | Val Accuracy:  0.864
# 1. fold 2. epoch -> Train Loss:  0.154             | Train Accuracy:  0.910             | Val Loss:  0.156             | Val Accuracy:  0.898

# 2. fold 1. epoch -> Train Loss:  0.509             | Train Accuracy:  0.568             | Val Loss:  0.231             | Val Accuracy:  0.870
# 2. fold 2. epoch -> Train Loss:  0.165             | Train Accuracy:  0.903             | Val Loss:  0.120             | Val Accuracy:  0.933

# 3. fold 1. epoch -> Train Loss:  0.507             | Train Accuracy:  0.591             | Val Loss:  0.256             | Val Accuracy:  0.847
# 3. fold 2. epoch -> Train Loss:  0.171             | Train Accuracy:  0.900             | Val Loss:  0.142             | Val Accuracy:  0.909

# 4. fold 1. epoch -> Train Loss:  0.494             | Train Accuracy:  0.591             | Val Loss:  0.229             | Val Accuracy:  0.864
# 4. fold 2. epoch -> Train Loss:  0.150             | Train Accuracy:  0.910             | Val Loss:  0.139             | Val Accuracy:  0.915

# 5. fold 1. epoch -> Train Loss:  0.553             | Train Accuracy:  0.514             | Val Loss:  0.282             | Val Accuracy:  0.835
# 5. fold 2. epoch -> Train Loss:  0.172             | Train Accuracy:  0.899             | Val Loss:  0.148             | Val Accuracy:  0.913

# 6. fold 1. epoch -> Train Loss:  0.458             | Train Accuracy:  0.637             | Val Loss:  0.245             | Val Accuracy:  0.855
# 6. fold 2. epoch -> Train Loss:  0.150             | Train Accuracy:  0.915             | Val Loss:  0.145             | Val Accuracy:  0.901

# 7. fold 1. epoch -> Train Loss:  0.487             | Train Accuracy:  0.611             | Val Loss:  0.258             | Val Accuracy:  0.849
# 7. fold 2. epoch -> Train Loss:  0.169             | Train Accuracy:  0.904             | Val Loss:  0.152             | Val Accuracy:  0.905

# 8. fold 1. epoch -> Train Loss:  0.546             | Train Accuracy:  0.518             | Val Loss:  0.325             | Val Accuracy:  0.811
# 8. fold 2. epoch -> Train Loss:  0.179             | Train Accuracy:  0.900             | Val Loss:  0.162             | Val Accuracy:  0.901 
 
# 9. fold 1. epoch -> Train Loss:  0.557             | Train Accuracy:  0.502             | Val Loss:  0.343             | Val Accuracy:  0.801
# 9. fold 2. epoch -> Train Loss:  0.191             | Train Accuracy:  0.889             | Val Loss:  0.148             | Val Accuracy:  0.909

# 10. fold 1. epoch ->Train Loss:  0.477             | Train Accuracy:  0.613             | Val Loss:  0.222             | Val Accuracy:  0.886
# 10. fold 2. epoch ->Train Loss:  0.169             | Train Accuracy:  0.899             | Val Loss:  0.137             | Val Accuracy:  0.916

train_acc = [0.910, 0.903, 0.900, 0.910, 0.899, 0.915, 0.904, 0.900, 0.889, 0.899]
val_acc = [0.898, 0.933, 0.909, 0.915, 0.913, 0.901, 0.905, 0.901, 0.909, 0.916]
print(f'Mean Train Accuracy: {np.mean(train_acc)}')
print(f'Mean Val Accuracy: {np.mean(val_acc)}')

Mean Train Accuracy: 0.9029
Mean Val Accuracy: 0.9100000000000001


In [9]:
def evaluate(model, test_data):

    test = Dataset(test_data)

    test_dataloader = torch.utils.data.DataLoader(test, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    if use_cuda:

        model = model.cuda()

    total_acc_test = 0
    with torch.no_grad():

        for test_input, test_label in test_dataloader:

              test_label = test_label.to(device)
              mask = test_input['attention_mask'].to(device)
              input_id = test_input['input_ids'].squeeze(1).to(device)

              output = model(input_id, mask)

              acc = (output.argmax(dim=1) == test_label).sum().item()
              total_acc_test += acc
    
    print(f'Test Accuracy: {total_acc_test / len(test_data): .3f}')
    
evaluate(model, df_test)

Test Accuracy:  0.901


In [9]:
# try to predict a sentence
def predict(model, sentence):
    
        use_cuda = torch.cuda.is_available()
        device = torch.device("cuda" if use_cuda else "cpu")
    
        if use_cuda:
            model = model.cuda()
    
        with torch.no_grad():
    
            input_id = tokenizer(sentence, padding='max_length', max_length = 512, truncation=True, return_tensors="pt")
            mask = input_id['attention_mask'].to(device)
            input_id = input_id['input_ids'].squeeze(1).to(device)
    
            output = model(input_id, mask)
    
            return output.argmax(dim=1).item()

In [13]:
predict(model, "zenci gibi kıvırtma karı mısın birader")

2

In [15]:
from sklearn.metrics import f1_score

predicted = []
for i in range(len(df_test.text)):
    predicted.append(predict(model, df_test.text.iloc[i]))
    
f1_score(df_test.target, predicted, average='macro')

0.9056443348296539

In [16]:
true = 0
for i in range(len(df_test.text)):
    if predicted[i] == df_test.target.iloc[i]:
        true += 1

acc = true / len(df_test.text)

In [17]:
acc

0.9014732965009208

In [18]:
df_test['target'].value_counts()

0    324
3    317
1    241
2    204
Name: target, dtype: int64

In [14]:
torch.save(model, './models/bert_model_target.pt')