In [30]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader
from datasets import Dataset
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
import torch.nn as nn

In [31]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [32]:
from datasets import Dataset

df = pd.read_csv("/content/drive/MyDrive/Colab/IntelliGuard/All_Datasets.csv")
df = df.dropna(subset=["text", "label"])

print(len(df))

21082


In [33]:
train_df, test_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df['label'],
    random_state=42
)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

In [34]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_function(example):
    return tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=128
    )

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/16865 [00:00<?, ? examples/s]

Map:   0%|          | 0/4217 [00:00<?, ? examples/s]

In [35]:
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

BATCH_SIZE = 16
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [36]:
labels = np.array(train_df['label'])
unique_classes, class_counts = np.unique(labels, return_counts=True)
weights = 1.0 / class_counts
print("Class counts:", dict(zip(unique_classes, class_counts)))
print("Class weights:", weights)

Class counts: {np.int64(0): np.int64(11200), np.int64(1): np.int64(5665)}
Class weights: [8.92857143e-05 1.76522507e-04]


In [37]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=len(unique_classes)
).to(device)

class_weights = torch.tensor(weights, dtype=torch.float).to(device)
loss_fn = nn.CrossEntropyLoss(weight=class_weights)

optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [38]:
def evaluate(model, dataloader, device):
    model.eval()
    preds, true_labels = [], []
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            predictions = torch.argmax(logits, dim=-1)

            preds.extend(predictions.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(true_labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, preds, average='binary')
    print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")
    print("Confusion Matrix:")
    print(confusion_matrix(true_labels, preds))
    print(classification_report(true_labels, preds, digits=4))
    return acc

In [39]:
from tqdm.notebook import tqdm

EPOCHS = 40
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}", leave=False)
    for batch in loop:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = loss_fn(outputs.logits, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        loop.set_postfix(loss=loss.item())

    print(f"Epoch {epoch+1}/{EPOCHS}, Average Loss: {total_loss/len(train_loader):.4f}")
    evaluate(model, test_loader, device)

Epoch 1/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 1/40, Average Loss: 0.4970
Accuracy: 0.8295, Precision: 0.7570, Recall: 0.7255, F1: 0.7409
Confusion Matrix:
[[2470  330]
 [ 389 1028]]
              precision    recall  f1-score   support

           0     0.8639    0.8821    0.8729      2800
           1     0.7570    0.7255    0.7409      1417

    accuracy                         0.8295      4217
   macro avg     0.8105    0.8038    0.8069      4217
weighted avg     0.8280    0.8295    0.8286      4217



Epoch 2/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 2/40, Average Loss: 0.3394
Accuracy: 0.8357, Precision: 0.7855, Recall: 0.7029, F1: 0.7419
Confusion Matrix:
[[2528  272]
 [ 421  996]]
              precision    recall  f1-score   support

           0     0.8572    0.9029    0.8795      2800
           1     0.7855    0.7029    0.7419      1417

    accuracy                         0.8357      4217
   macro avg     0.8214    0.8029    0.8107      4217
weighted avg     0.8331    0.8357    0.8332      4217



Epoch 3/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 3/40, Average Loss: 0.2220
Accuracy: 0.8176, Precision: 0.7093, Recall: 0.7749, F1: 0.7406
Confusion Matrix:
[[2350  450]
 [ 319 1098]]
              precision    recall  f1-score   support

           0     0.8805    0.8393    0.8594      2800
           1     0.7093    0.7749    0.7406      1417

    accuracy                         0.8176      4217
   macro avg     0.7949    0.8071    0.8000      4217
weighted avg     0.8230    0.8176    0.8195      4217



Epoch 4/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 4/40, Average Loss: 0.1309
Accuracy: 0.8203, Precision: 0.7312, Recall: 0.7354, F1: 0.7333
Confusion Matrix:
[[2417  383]
 [ 375 1042]]
              precision    recall  f1-score   support

           0     0.8657    0.8632    0.8644      2800
           1     0.7312    0.7354    0.7333      1417

    accuracy                         0.8203      4217
   macro avg     0.7985    0.7993    0.7989      4217
weighted avg     0.8205    0.8203    0.8204      4217



Epoch 5/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 5/40, Average Loss: 0.0912
Accuracy: 0.8105, Precision: 0.7038, Recall: 0.7530, F1: 0.7276
Confusion Matrix:
[[2351  449]
 [ 350 1067]]
              precision    recall  f1-score   support

           0     0.8704    0.8396    0.8548      2800
           1     0.7038    0.7530    0.7276      1417

    accuracy                         0.8105      4217
   macro avg     0.7871    0.7963    0.7912      4217
weighted avg     0.8144    0.8105    0.8120      4217



Epoch 6/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 6/40, Average Loss: 0.0684
Accuracy: 0.8191, Precision: 0.7433, Recall: 0.7050, F1: 0.7237
Confusion Matrix:
[[2455  345]
 [ 418  999]]
              precision    recall  f1-score   support

           0     0.8545    0.8768    0.8655      2800
           1     0.7433    0.7050    0.7237      1417

    accuracy                         0.8191      4217
   macro avg     0.7989    0.7909    0.7946      4217
weighted avg     0.8171    0.8191    0.8178      4217



Epoch 7/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 7/40, Average Loss: 0.0658
Accuracy: 0.8143, Precision: 0.6974, Recall: 0.7904, F1: 0.7410
Confusion Matrix:
[[2314  486]
 [ 297 1120]]
              precision    recall  f1-score   support

           0     0.8863    0.8264    0.8553      2800
           1     0.6974    0.7904    0.7410      1417

    accuracy                         0.8143      4217
   macro avg     0.7918    0.8084    0.7981      4217
weighted avg     0.8228    0.8143    0.8169      4217



Epoch 8/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 8/40, Average Loss: 0.0492
Accuracy: 0.8210, Precision: 0.7570, Recall: 0.6881, F1: 0.7209
Confusion Matrix:
[[2487  313]
 [ 442  975]]
              precision    recall  f1-score   support

           0     0.8491    0.8882    0.8682      2800
           1     0.7570    0.6881    0.7209      1417

    accuracy                         0.8210      4217
   macro avg     0.8030    0.7881    0.7946      4217
weighted avg     0.8181    0.8210    0.8187      4217



Epoch 9/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 9/40, Average Loss: 0.0495
Accuracy: 0.8143, Precision: 0.7186, Recall: 0.7354, F1: 0.7269
Confusion Matrix:
[[2392  408]
 [ 375 1042]]
              precision    recall  f1-score   support

           0     0.8645    0.8543    0.8593      2800
           1     0.7186    0.7354    0.7269      1417

    accuracy                         0.8143      4217
   macro avg     0.7915    0.7948    0.7931      4217
weighted avg     0.8155    0.8143    0.8148      4217



Epoch 10/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 10/40, Average Loss: 0.0451
Accuracy: 0.8233, Precision: 0.7410, Recall: 0.7290, F1: 0.7350
Confusion Matrix:
[[2439  361]
 [ 384 1033]]
              precision    recall  f1-score   support

           0     0.8640    0.8711    0.8675      2800
           1     0.7410    0.7290    0.7350      1417

    accuracy                         0.8233      4217
   macro avg     0.8025    0.8000    0.8012      4217
weighted avg     0.8227    0.8233    0.8230      4217



Epoch 11/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 11/40, Average Loss: 0.0451
Accuracy: 0.8198, Precision: 0.7218, Recall: 0.7544, F1: 0.7378
Confusion Matrix:
[[2388  412]
 [ 348 1069]]
              precision    recall  f1-score   support

           0     0.8728    0.8529    0.8627      2800
           1     0.7218    0.7544    0.7378      1417

    accuracy                         0.8198      4217
   macro avg     0.7973    0.8036    0.8002      4217
weighted avg     0.8221    0.8198    0.8207      4217



Epoch 12/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 12/40, Average Loss: 0.0381
Accuracy: 0.8221, Precision: 0.7422, Recall: 0.7212, F1: 0.7316
Confusion Matrix:
[[2445  355]
 [ 395 1022]]
              precision    recall  f1-score   support

           0     0.8609    0.8732    0.8670      2800
           1     0.7422    0.7212    0.7316      1417

    accuracy                         0.8221      4217
   macro avg     0.8016    0.7972    0.7993      4217
weighted avg     0.8210    0.8221    0.8215      4217



Epoch 13/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 13/40, Average Loss: 0.0413
Accuracy: 0.8198, Precision: 0.7502, Recall: 0.6951, F1: 0.7216
Confusion Matrix:
[[2472  328]
 [ 432  985]]
              precision    recall  f1-score   support

           0     0.8512    0.8829    0.8668      2800
           1     0.7502    0.6951    0.7216      1417

    accuracy                         0.8198      4217
   macro avg     0.8007    0.7890    0.7942      4217
weighted avg     0.8173    0.8198    0.8180      4217



Epoch 14/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 14/40, Average Loss: 0.0378
Accuracy: 0.8157, Precision: 0.7279, Recall: 0.7212, F1: 0.7246
Confusion Matrix:
[[2418  382]
 [ 395 1022]]
              precision    recall  f1-score   support

           0     0.8596    0.8636    0.8616      2800
           1     0.7279    0.7212    0.7246      1417

    accuracy                         0.8157      4217
   macro avg     0.7938    0.7924    0.7931      4217
weighted avg     0.8153    0.8157    0.8155      4217



Epoch 15/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 15/40, Average Loss: 0.0329
Accuracy: 0.7892, Precision: 0.6485, Recall: 0.8137, F1: 0.7218
Confusion Matrix:
[[2175  625]
 [ 264 1153]]
              precision    recall  f1-score   support

           0     0.8918    0.7768    0.8303      2800
           1     0.6485    0.8137    0.7218      1417

    accuracy                         0.7892      4217
   macro avg     0.7701    0.7952    0.7760      4217
weighted avg     0.8100    0.7892    0.7938      4217



Epoch 16/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 16/40, Average Loss: 0.0362
Accuracy: 0.8264, Precision: 0.7674, Recall: 0.6937, F1: 0.7287
Confusion Matrix:
[[2502  298]
 [ 434  983]]
              precision    recall  f1-score   support

           0     0.8522    0.8936    0.8724      2800
           1     0.7674    0.6937    0.7287      1417

    accuracy                         0.8264      4217
   macro avg     0.8098    0.7936    0.8005      4217
weighted avg     0.8237    0.8264    0.8241      4217



Epoch 17/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 17/40, Average Loss: 0.0331
Accuracy: 0.8120, Precision: 0.6990, Recall: 0.7735, F1: 0.7343
Confusion Matrix:
[[2328  472]
 [ 321 1096]]
              precision    recall  f1-score   support

           0     0.8788    0.8314    0.8545      2800
           1     0.6990    0.7735    0.7343      1417

    accuracy                         0.8120      4217
   macro avg     0.7889    0.8024    0.7944      4217
weighted avg     0.8184    0.8120    0.8141      4217



Epoch 18/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 18/40, Average Loss: 0.0349
Accuracy: 0.8210, Precision: 0.7445, Recall: 0.7114, F1: 0.7275
Confusion Matrix:
[[2454  346]
 [ 409 1008]]
              precision    recall  f1-score   support

           0     0.8571    0.8764    0.8667      2800
           1     0.7445    0.7114    0.7275      1417

    accuracy                         0.8210      4217
   macro avg     0.8008    0.7939    0.7971      4217
weighted avg     0.8193    0.8210    0.8199      4217



Epoch 19/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 19/40, Average Loss: 0.0319
Accuracy: 0.8167, Precision: 0.7209, Recall: 0.7417, F1: 0.7311
Confusion Matrix:
[[2393  407]
 [ 366 1051]]
              precision    recall  f1-score   support

           0     0.8673    0.8546    0.8609      2800
           1     0.7209    0.7417    0.7311      1417

    accuracy                         0.8167      4217
   macro avg     0.7941    0.7982    0.7960      4217
weighted avg     0.8181    0.8167    0.8173      4217



Epoch 20/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 20/40, Average Loss: 0.0305
Accuracy: 0.8210, Precision: 0.7914, Recall: 0.6344, F1: 0.7043
Confusion Matrix:
[[2563  237]
 [ 518  899]]
              precision    recall  f1-score   support

           0     0.8319    0.9154    0.8716      2800
           1     0.7914    0.6344    0.7043      1417

    accuracy                         0.8210      4217
   macro avg     0.8116    0.7749    0.7879      4217
weighted avg     0.8183    0.8210    0.8154      4217



Epoch 21/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 21/40, Average Loss: 0.0323
Accuracy: 0.8134, Precision: 0.7163, Recall: 0.7361, F1: 0.7261
Confusion Matrix:
[[2387  413]
 [ 374 1043]]
              precision    recall  f1-score   support

           0     0.8645    0.8525    0.8585      2800
           1     0.7163    0.7361    0.7261      1417

    accuracy                         0.8134      4217
   macro avg     0.7904    0.7943    0.7923      4217
weighted avg     0.8147    0.8134    0.8140      4217



Epoch 22/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 22/40, Average Loss: 0.0301
Accuracy: 0.8165, Precision: 0.7105, Recall: 0.7657, F1: 0.7371
Confusion Matrix:
[[2358  442]
 [ 332 1085]]
              precision    recall  f1-score   support

           0     0.8766    0.8421    0.8590      2800
           1     0.7105    0.7657    0.7371      1417

    accuracy                         0.8165      4217
   macro avg     0.7936    0.8039    0.7981      4217
weighted avg     0.8208    0.8165    0.8180      4217



Epoch 23/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 23/40, Average Loss: 0.0290
Accuracy: 0.8214, Precision: 0.7570, Recall: 0.6902, F1: 0.7220
Confusion Matrix:
[[2486  314]
 [ 439  978]]
              precision    recall  f1-score   support

           0     0.8499    0.8879    0.8685      2800
           1     0.7570    0.6902    0.7220      1417

    accuracy                         0.8214      4217
   macro avg     0.8034    0.7890    0.7953      4217
weighted avg     0.8187    0.8214    0.8193      4217



Epoch 24/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 24/40, Average Loss: 0.0316
Accuracy: 0.8191, Precision: 0.7353, Recall: 0.7212, F1: 0.7282
Confusion Matrix:
[[2432  368]
 [ 395 1022]]
              precision    recall  f1-score   support

           0     0.8603    0.8686    0.8644      2800
           1     0.7353    0.7212    0.7282      1417

    accuracy                         0.8191      4217
   macro avg     0.7978    0.7949    0.7963      4217
weighted avg     0.8183    0.8191    0.8186      4217



Epoch 25/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 25/40, Average Loss: 0.0279
Accuracy: 0.8120, Precision: 0.7005, Recall: 0.7692, F1: 0.7333
Confusion Matrix:
[[2334  466]
 [ 327 1090]]
              precision    recall  f1-score   support

           0     0.8771    0.8336    0.8548      2800
           1     0.7005    0.7692    0.7333      1417

    accuracy                         0.8120      4217
   macro avg     0.7888    0.8014    0.7940      4217
weighted avg     0.8178    0.8120    0.8140      4217



Epoch 26/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 26/40, Average Loss: 0.0277
Accuracy: 0.8229, Precision: 0.7349, Recall: 0.7396, F1: 0.7372
Confusion Matrix:
[[2422  378]
 [ 369 1048]]
              precision    recall  f1-score   support

           0     0.8678    0.8650    0.8664      2800
           1     0.7349    0.7396    0.7372      1417

    accuracy                         0.8229      4217
   macro avg     0.8014    0.8023    0.8018      4217
weighted avg     0.8231    0.8229    0.8230      4217



Epoch 27/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 27/40, Average Loss: 0.0278
Accuracy: 0.8255, Precision: 0.7692, Recall: 0.6867, F1: 0.7256
Confusion Matrix:
[[2508  292]
 [ 444  973]]
              precision    recall  f1-score   support

           0     0.8496    0.8957    0.8720      2800
           1     0.7692    0.6867    0.7256      1417

    accuracy                         0.8255      4217
   macro avg     0.8094    0.7912    0.7988      4217
weighted avg     0.8226    0.8255    0.8228      4217



Epoch 28/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 28/40, Average Loss: 0.0303
Accuracy: 0.8193, Precision: 0.7382, Recall: 0.7163, F1: 0.7271
Confusion Matrix:
[[2440  360]
 [ 402 1015]]
              precision    recall  f1-score   support

           0     0.8586    0.8714    0.8649      2800
           1     0.7382    0.7163    0.7271      1417

    accuracy                         0.8193      4217
   macro avg     0.7984    0.7939    0.7960      4217
weighted avg     0.8181    0.8193    0.8186      4217



Epoch 29/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 29/40, Average Loss: 0.0261
Accuracy: 0.8267, Precision: 0.7663, Recall: 0.6965, F1: 0.7298
Confusion Matrix:
[[2499  301]
 [ 430  987]]
              precision    recall  f1-score   support

           0     0.8532    0.8925    0.8724      2800
           1     0.7663    0.6965    0.7298      1417

    accuracy                         0.8267      4217
   macro avg     0.8097    0.7945    0.8011      4217
weighted avg     0.8240    0.8267    0.8245      4217



Epoch 30/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 30/40, Average Loss: 0.0299
Accuracy: 0.8214, Precision: 0.7385, Recall: 0.7255, F1: 0.7319
Confusion Matrix:
[[2436  364]
 [ 389 1028]]
              precision    recall  f1-score   support

           0     0.8623    0.8700    0.8661      2800
           1     0.7385    0.7255    0.7319      1417

    accuracy                         0.8214      4217
   macro avg     0.8004    0.7977    0.7990      4217
weighted avg     0.8207    0.8214    0.8210      4217



Epoch 31/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 31/40, Average Loss: 0.0255
Accuracy: 0.8122, Precision: 0.7166, Recall: 0.7297, F1: 0.7231
Confusion Matrix:
[[2391  409]
 [ 383 1034]]
              precision    recall  f1-score   support

           0     0.8619    0.8539    0.8579      2800
           1     0.7166    0.7297    0.7231      1417

    accuracy                         0.8122      4217
   macro avg     0.7892    0.7918    0.7905      4217
weighted avg     0.8131    0.8122    0.8126      4217



Epoch 32/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 32/40, Average Loss: 0.0302
Accuracy: 0.8165, Precision: 0.7168, Recall: 0.7502, F1: 0.7331
Confusion Matrix:
[[2380  420]
 [ 354 1063]]
              precision    recall  f1-score   support

           0     0.8705    0.8500    0.8601      2800
           1     0.7168    0.7502    0.7331      1417

    accuracy                         0.8165      4217
   macro avg     0.7937    0.8001    0.7966      4217
weighted avg     0.8189    0.8165    0.8175      4217



Epoch 33/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 33/40, Average Loss: 0.0281
Accuracy: 0.8191, Precision: 0.7277, Recall: 0.7375, F1: 0.7326
Confusion Matrix:
[[2409  391]
 [ 372 1045]]
              precision    recall  f1-score   support

           0     0.8662    0.8604    0.8633      2800
           1     0.7277    0.7375    0.7326      1417

    accuracy                         0.8191      4217
   macro avg     0.7970    0.7989    0.7979      4217
weighted avg     0.8197    0.8191    0.8194      4217



Epoch 34/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 34/40, Average Loss: 0.0249
Accuracy: 0.8233, Precision: 0.7534, Recall: 0.7050, F1: 0.7284
Confusion Matrix:
[[2473  327]
 [ 418  999]]
              precision    recall  f1-score   support

           0     0.8554    0.8832    0.8691      2800
           1     0.7534    0.7050    0.7284      1417

    accuracy                         0.8233      4217
   macro avg     0.8044    0.7941    0.7987      4217
weighted avg     0.8211    0.8233    0.8218      4217



Epoch 35/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 35/40, Average Loss: 0.0280
Accuracy: 0.8072, Precision: 0.7057, Recall: 0.7311, F1: 0.7182
Confusion Matrix:
[[2368  432]
 [ 381 1036]]
              precision    recall  f1-score   support

           0     0.8614    0.8457    0.8535      2800
           1     0.7057    0.7311    0.7182      1417

    accuracy                         0.8072      4217
   macro avg     0.7836    0.7884    0.7858      4217
weighted avg     0.8091    0.8072    0.8080      4217



Epoch 36/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 36/40, Average Loss: 0.0260
Accuracy: 0.8074, Precision: 0.7015, Recall: 0.7431, F1: 0.7217
Confusion Matrix:
[[2352  448]
 [ 364 1053]]
              precision    recall  f1-score   support

           0     0.8660    0.8400    0.8528      2800
           1     0.7015    0.7431    0.7217      1417

    accuracy                         0.8074      4217
   macro avg     0.7838    0.7916    0.7873      4217
weighted avg     0.8107    0.8074    0.8088      4217



Epoch 37/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 37/40, Average Loss: 0.0248
Accuracy: 0.8070, Precision: 0.7039, Recall: 0.7347, F1: 0.7189
Confusion Matrix:
[[2362  438]
 [ 376 1041]]
              precision    recall  f1-score   support

           0     0.8627    0.8436    0.8530      2800
           1     0.7039    0.7347    0.7189      1417

    accuracy                         0.8070      4217
   macro avg     0.7833    0.7891    0.7860      4217
weighted avg     0.8093    0.8070    0.8080      4217



Epoch 38/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 38/40, Average Loss: 0.0262
Accuracy: 0.8103, Precision: 0.7153, Recall: 0.7234, F1: 0.7193
Confusion Matrix:
[[2392  408]
 [ 392 1025]]
              precision    recall  f1-score   support

           0     0.8592    0.8543    0.8567      2800
           1     0.7153    0.7234    0.7193      1417

    accuracy                         0.8103      4217
   macro avg     0.7872    0.7888    0.7880      4217
weighted avg     0.8108    0.8103    0.8106      4217



Epoch 39/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 39/40, Average Loss: 0.0243
Accuracy: 0.8193, Precision: 0.7494, Recall: 0.6944, F1: 0.7209
Confusion Matrix:
[[2471  329]
 [ 433  984]]
              precision    recall  f1-score   support

           0     0.8509    0.8825    0.8664      2800
           1     0.7494    0.6944    0.7209      1417

    accuracy                         0.8193      4217
   macro avg     0.8002    0.7885    0.7936      4217
weighted avg     0.8168    0.8193    0.8175      4217



Epoch 40/40:   0%|          | 0/1055 [00:00<?, ?it/s]

Epoch 40/40, Average Loss: 0.0243
Accuracy: 0.8226, Precision: 0.7472, Recall: 0.7135, F1: 0.7300
Confusion Matrix:
[[2458  342]
 [ 406 1011]]
              precision    recall  f1-score   support

           0     0.8582    0.8779    0.8679      2800
           1     0.7472    0.7135    0.7300      1417

    accuracy                         0.8226      4217
   macro avg     0.8027    0.7957    0.7990      4217
weighted avg     0.8209    0.8226    0.8216      4217



In [40]:
save_path = "/content/drive/MyDrive/Colab/IntelliGuard/bert_rumor"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print(f"Model and tokenizer saved to {save_path}")

Model and tokenizer saved to /content/drive/MyDrive/Colab/IntelliGuard/bert_rumor


In [41]:
from transformers import pipeline
clf = pipeline("text-classification", model=save_path, tokenizer=save_path, device=0 if torch.cuda.is_available() else -1)
print(clf("This is a rumor tweet"))

Device set to use cuda:0


[{'label': 'LABEL_1', 'score': 0.9990455508232117}]
