In [None]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from transformers import BertModel, get_linear_schedule_with_warmup
from torch.optim import AdamW
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import f1_score

# Cuda kontrolü
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Kullanılan cihaz: {device}")

df = pd.read_csv('dataset.csv')

labels = pd.read_csv("dataset.csv")
labels = labels["label"].values.tolist()

Kullanılan cihaz: cuda


In [None]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from transformers import AutoModel, AutoTokenizer, get_linear_schedule_with_warmup
from sklearn.metrics import f1_score, classification_report
from sklearn.model_selection import train_test_split
import os

tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
tokenizer2 = AutoTokenizer.from_pretrained("TURKCELL/roberta-base-turkish-uncased")


metinler = df["text"].values.tolist()

# Tokenize et ve attention mask'i al
#encodings = tokenizer(metinler, padding=True, truncation=True, return_tensors='pt')

# Tokenize et ve attention mask'i al
encodings = tokenizer2(
    metinler,
    padding=True,
    truncation=True,
    max_length=512,  # Bu satırı ekleyin
    return_tensors='pt'
)

input_ids = encodings['input_ids']
attention_mask = encodings['attention_mask']

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/60.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/251k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/650 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/976k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/634k [00:00<?, ?B/s]

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import copy

class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha  # Weight factor for each class
        self.gamma = gamma  # Focusing parameter
        self.reduction = reduction

    def forward(self, inputs, targets):
        log_softmax = F.log_softmax(inputs, dim=1)
        ce_loss = F.nll_loss(log_softmax, targets, weight=self.alpha, reduction='none')

        # Get the probabilities for the target class
        p_t = torch.exp(-ce_loss)

        # Apply the focusing term
        focal_loss = (1 - p_t) ** self.gamma * ce_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss


In [None]:
# Sabit değerler
MAX_LEN = 128
BATCH_SIZE = 48
EPOCHS = 4
LEARNING_RATE = 2e-5
DROPOUT_RATE = 0.3
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MODEL_NAME = "dbmdz/bert-base-turkish-cased"  # BERTurk modeli
MODEL_NAME2 = "TURKCELL/roberta-base-turkish-uncased"

In [None]:
MAX_FEATURES = 5000  # TF-IDF için maksimum özellik sayısı
EMBEDDING_DIM = 64   # TF-IDF vektörlerinin projection boyutu

class TFIDFCNNEnsemble(nn.Module):
    def __init__(self, input_size, embedding_dim=EMBEDDING_DIM, dropout_rate=DROPOUT_RATE):
        super(TFIDFCNNEnsemble, self).__init__()

        # TF-IDF'i daha düşük boyutlu bir embedding'e projeksiyon yapıyoruz
        # Bu adım projection layer olarak da düşünülebilir
        self.projection = nn.Linear(input_size, embedding_dim)

        # Feature boyutunu bir sequence gibi yeniden şekillendiriyoruz
        # Bu sayede CNN'lerin çalışması için uygun hale getiriyoruz
        # Evrişimli sinir ağı katmanları
        # Farklı boyutlarda filtreler kullanarak farklı özellikleri yakalama
        self.conv1 = nn.Conv1d(embedding_dim, 128, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(embedding_dim, 128, kernel_size=4, padding=1)
        self.conv3 = nn.Conv1d(embedding_dim, 128, kernel_size=5, padding=2)

        # Dropout katmanı
        self.dropout = nn.Dropout(dropout_rate)

        # Son sınıflandırıcı katman (3 CNN çıktısının birleştirilmiş boyutu: 128 * 3)
        self.classifier = nn.Linear(128 * 3, 2)  # Binary sınıflandırma

    def forward(self, tfidf_features):
        # TF-IDF özelliklerini embedding boyutuna projeksiyon yap
        x = self.projection(tfidf_features)  # [batch_size, input_size] -> [batch_size, embedding_dim]

        # x'i 1D CNN'ler için uygun şekle getir [batch_size, embedding_dim, sequence_length]
        # TF-IDF özellikleri aslında bir sequence değil, ama bir sequence olarak davranabiliriz
        # Burada sequence_length=1 olacak şekilde reshape ediyoruz
        x = x.unsqueeze(2)  # [batch_size, embedding_dim, 1]

        # Eğer daha uzun bir sequence istiyorsak, TF-IDF'i parçalara bölebiliriz
        # Örnek: x vektörünü 64 parçaya böl (isteğe bağlı)
        batch_size = x.size(0)
        chunk_size = max(1, embedding_dim // 64)
        # x = x.view(batch_size, chunk_size, -1)  # [batch_size, chunk_size, embedding_dim/chunk_size]
        # x = x.permute(0, 2, 1)  # [batch_size, embedding_dim/chunk_size, chunk_size]

        # Evrişim katmanlarını uygula
        conv1_out = F.relu(self.conv1(x))
        conv2_out = F.relu(self.conv2(x))
        conv3_out = F.relu(self.conv3(x))

        # Global max pooling uygula
        # Her bir özellik haritasının en belirgin özelliğini seç
        pooled_1 = F.adaptive_max_pool1d(conv1_out, 1).squeeze(2)
        pooled_2 = F.adaptive_max_pool1d(conv2_out, 1).squeeze(2)
        pooled_3 = F.adaptive_max_pool1d(conv3_out, 1).squeeze(2)

        # Tüm özellikleri birleştir
        cat = torch.cat((pooled_1, pooled_2, pooled_3), dim=1)

        # Dropout uygula
        cat = self.dropout(cat)

        # Sınıflandırma
        logits = self.classifier(cat)

        return logits

class TFIDFCNNEnsemble2D(nn.Module):
    """
    TF-IDF özellikleri için 2D CNN kullanarak daha efektif bir model.
    TF-IDF vektörlerini 2 boyutlu matrise yeniden şekillendirir.
    """
    def __init__(self, input_size, dropout_rate=DROPOUT_RATE):
        super(TFIDFCNNEnsemble2D, self).__init__()

        # TF-IDF vektörünü bir matrise çeviriyoruz
        # Örneğin 5000 -> 70x72 (yaklaşık kare matris olacak şekilde)
        self.side1 = int(math.sqrt(input_size))
        self.side2 = input_size // self.side1
        if self.side1 * self.side2 < input_size:
            self.side2 += 1

        # 2D Evrişim katmanları
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)

        # Pooling
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Hesaplanan final boyut
        # Başlangıç: [side1, side2]
        # 3 pooling sonrası: [side1/8, side2/8]
        self.fc_input_size = 128 * max(1, self.side1 // 8) * max(1, self.side2 // 8)

        # Fully connected katmanlar
        self.fc1 = nn.Linear(self.fc_input_size, 256)
        self.fc2 = nn.Linear(256, 2)

        # Dropout
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, tfidf_features):
        batch_size = tfidf_features.size(0)

        # TF-IDF vektörünü 2D matrise yeniden şekillendir
        # Padding ekle
        padded_size = self.side1 * self.side2
        if tfidf_features.size(1) < padded_size:
            padding = torch.zeros(batch_size, padded_size - tfidf_features.size(1), device=tfidf_features.device)
            x = torch.cat([tfidf_features, padding], dim=1)
        else:
            x = tfidf_features[:, :padded_size]

        # 2D tensöre yeniden şekillendir: [batch_size, 1, side1, side2]
        x = x.view(batch_size, 1, self.side1, self.side2)

        # Konvolüsyon ve pooling
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        # Flatten
        x = x.view(batch_size, -1)

        # Fully connected katmanlar
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x

class CustomTFIDFDataset(Dataset):
    def __init__(self, tfidf_features, labels=None):
        self.tfidf_features = tfidf_features
        self.labels = labels

    def __len__(self):
        return len(self.tfidf_features)

    def __getitem__(self, idx):
        item = {
            'tfidf_features': torch.tensor(self.tfidf_features[idx], dtype=torch.float)
        }

        if self.labels is not None:
            item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)

        return item

In [None]:
class BERTurkCNN(nn.Module):
    def __init__(self, dropout_rate=DROPOUT_RATE, freeze=False):
        super(BERTurkCNN, self).__init__()

        # BERT katmanı
        self.bert = AutoModel.from_pretrained(MODEL_NAME2)

        # Evrişimli sinir ağı katmanları
        # Farklı boyutlarda filtreler kullanarak farklı n-gram özellikleri yakalama
        self.conv1 = nn.Conv1d(self.bert.config.hidden_size, 128, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(self.bert.config.hidden_size, 128, kernel_size=4, padding=1)
        self.conv3 = nn.Conv1d(self.bert.config.hidden_size, 128, kernel_size=5, padding=2)

        # Dropout katmanı
        self.dropout = nn.Dropout(dropout_rate)

        # Son sınıflandırıcı katman (3 CNN çıktısının birleştirilmiş boyutu: 128 * 3)
        self.classifier = nn.Linear(128 * 3, 2)  # Binary sınıflandırma

        # BERT katmanını dondurma seçeneği
        if freeze:
            for param in self.bert.parameters():
                param.requires_grad = False

    def forward(self, input_ids, attention_mask):
        # BERT kodlaması
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)

        # BERT'ten çıkan gizli durumları al
        sequence_output = outputs.last_hidden_state  # Boyut: [batch_size, seq_len, hidden_size]

        # CNN için boyut düzenlemesi (CNN'ler [batch_size, channels, seq_len] biçiminde giriş bekler)
        # Orijinal boyut: [batch_size, seq_len, hidden_size]
        # Hedef boyut: [batch_size, hidden_size, seq_len]
        x = sequence_output.permute(0, 2, 1)

        # Evrişim katmanlarını uygula
        conv1_out = F.relu(self.conv1(x))
        conv2_out = F.relu(self.conv2(x))
        conv3_out = F.relu(self.conv3(x))

        # Global max pooling uygula
        # Her bir özellik haritasının en belirgin özelliğini seç
        pooled_1 = F.max_pool1d(conv1_out, conv1_out.size(2)).squeeze(2)
        pooled_2 = F.max_pool1d(conv2_out, conv2_out.size(2)).squeeze(2)
        pooled_3 = F.max_pool1d(conv3_out, conv3_out.size(2)).squeeze(2)

        # Tüm özellikleri birleştir
        cat = torch.cat((pooled_1, pooled_2, pooled_3), dim=1)

        # Dropout uygula
        cat = self.dropout(cat)

        # Sınıflandırma
        logits = self.classifier(cat)

        return logits

class CustomDataset(Dataset):
    def __init__(self, input_ids, attention_masks, labels=None):
        self.input_ids = input_ids
        self.attention_masks = attention_masks
        self.labels = labels

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        item = {
            'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
            'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),
        }

        if self.labels is not None:
            item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)

        return item

class EarlyStoppingWithClassF1:
    def __init__(self, patience=5, min_delta=0.001, class_index=1, class_weight=0.5):
        """
        Early stopping that monitors both overall F1 and Class 1 F1 scores

        Args:
            patience: Number of epochs to wait before stopping
            min_delta: Minimum change in monitored metrics to qualify as improvement
            class_index: Index of the class to monitor (1 for Class 1)
            class_weight: Weight given to class F1 vs overall F1 (0.5 means equal weight)
        """
        self.patience = patience
        self.min_delta = min_delta
        self.class_index = class_index
        self.class_weight = class_weight
        self.counter = 0
        self.best_score = None
        self.best_class_f1 = None
        self.early_stop = False
        self.best_weights = None

    def __call__(self, model, val_overall_f1, val_class_f1s):
        """
        Args:
            model: Model to save if improvement
            val_overall_f1: Overall validation F1 score
            val_class_f1s: F1 scores for each class (list or array)
        """
        # Get the F1 score for the specific class we're monitoring
        class_f1 = val_class_f1s[self.class_index]

        # Calculate a combined score that considers both overall F1 and class F1
        combined_score = (1 - self.class_weight) * val_overall_f1 + self.class_weight * class_f1

        if self.best_score is None or self.best_class_f1 is None:
            # First epoch
            self.best_score = combined_score
            self.best_class_f1 = class_f1
            self.save_checkpoint(model)
        elif combined_score < self.best_score + self.min_delta:
            # Score didn't improve enough
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            # Score improved, but also check if Class 1 F1 didn't decrease significantly
            class_f1_decrease = self.best_class_f1 - class_f1

            # Only consider it an improvement if Class 1 F1 didn't drop too much
            if class_f1_decrease <= self.min_delta:
                print(f'Validation score improved! Overall F1: {val_overall_f1:.4f}, Class 1 F1: {class_f1:.4f}')
                self.best_score = combined_score
                self.best_class_f1 = class_f1
                self.save_checkpoint(model)
                self.counter = 0
            else:
                # Class 1 F1 dropped too much, don't reset counter
                print(f'Overall score improved but Class 1 F1 dropped too much (from {self.best_class_f1:.4f} to {class_f1:.4f})')
                self.counter += 1
                if self.counter >= self.patience:
                    self.early_stop = True

    def save_checkpoint(self, model):
        """Save model weights when validation performance improves"""
        self.best_weights = copy.deepcopy(model.state_dict())

    def restore_best_weights(self, model):
        """Restore model to best weights"""
        model.load_state_dict(self.best_weights)

def train_model(model, train_dataloader, test_dataloader=None, class_weights=None, gamma=2.0,
                num_epochs=10, patience=3, class_f1_weight=0.6, learning_rate=2e-5):
    """
    Train model with Focal Loss and balanced early stopping

    Args:
        model: Model to train
        train_dataloader: Training data loader
        test_dataloader: Validation/test data loader
        class_weights: Dictionary mapping class indices to weights (e.g., {0: 1, 1: 6})
        gamma: Focal loss focusing parameter
        num_epochs: Maximum number of training epochs
        patience: Early stopping patience
        class_f1_weight: Weight to give to Class 1 F1 score in early stopping
        learning_rate: Learning rate for optimizer

    Returns:
        trained_model: The trained model
        history: Dictionary containing training metrics
    """
    # Set up device
    device = next(model.parameters()).device

    # Create alpha tensor for focal loss based on class weights
    if class_weights is not None:
        num_classes = len(class_weights)
        alpha = torch.ones(num_classes)
        for cls_idx, weight in class_weights.items():
            alpha[cls_idx] = weight
        alpha = alpha.to(device)
    else:
        alpha = None

    # Initialize Focal Loss
    criterion = FocalLoss(alpha=alpha, gamma=gamma)

    # Initialize optimizer
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

    # Set up early stopping based on Class 1 metrics
    early_stopping = EarlyStoppingWithClassF1(
        patience=patience,
        class_index=1,  # Monitor Class 1 F1
        class_weight=1.0  # Maximize weight on Class 1 (completely focused on Class 1)
    )

    # Track metrics
    history = {
        'train_loss': [],
        'train_f1': [],
        'train_class_f1s': [],
        'train_class_0_acc': [],
        'train_class_1_acc': [],
        'val_loss': [],
        'val_f1': [],
        'val_class_f1s': [],
        'val_class_0_acc': [],
        'val_class_1_acc': []
    }

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_preds = []
        train_labels = []

        for batch in train_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Forward pass
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

            # Store predictions and labels for metrics
            _, preds = torch.max(outputs, 1)
            train_preds.extend(preds.cpu().numpy())
            train_labels.extend(labels.cpu().numpy())

        # Calculate training metrics
        train_loss = train_loss / len(train_dataloader)
        train_f1 = f1_score(train_labels, train_preds, average='weighted')
        train_class_f1s = f1_score(train_labels, train_preds, average=None)

        # Calculate class-based accuracies
        train_metrics = calculate_metrics(train_labels, train_preds)

        # Save training metrics
        history['train_loss'].append(train_loss)
        history['train_f1'].append(train_f1)
        history['train_class_f1s'].append(train_class_f1s.tolist())
        history['train_class_0_acc'].append(train_metrics['class_0_acc'])
        history['train_class_1_acc'].append(train_metrics['class_1_acc'])

        # Validation phase
        if test_dataloader is not None:
            model.eval()
            val_loss = 0.0
            val_preds = []
            val_labels = []

            with torch.no_grad():
                for batch in test_dataloader:
                    input_ids = batch['input_ids'].to(device)
                    attention_mask = batch['attention_mask'].to(device)
                    labels = batch['labels'].to(device)

                    # Forward pass
                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                    loss = criterion(outputs, labels)

                    val_loss += loss.item()

                    # Store predictions and labels for metrics
                    _, preds = torch.max(outputs, 1)
                    val_preds.extend(preds.cpu().numpy())
                    val_labels.extend(labels.cpu().numpy())

            # Calculate validation metrics
            val_loss = val_loss / len(test_dataloader)
            val_f1 = f1_score(val_labels, val_preds, average='weighted')
            val_class_f1s = f1_score(val_labels, val_preds, average=None)

            # Calculate class-based accuracies for validation
            val_metrics = calculate_metrics(val_labels, val_preds)

            # Save validation metrics
            history['val_loss'].append(val_loss)
            history['val_f1'].append(val_f1)
            history['val_class_f1s'].append(val_class_f1s.tolist())
            history['val_class_0_acc'].append(val_metrics['class_0_acc'])
            history['val_class_1_acc'].append(val_metrics['class_1_acc'])

            # Print epoch results
            print(f'Epoch {epoch+1}/{num_epochs}')
            print(f'Train Loss: {train_loss:.4f}, Train F1: {train_f1:.4f}, Train Class 1 F1: {train_class_f1s[1]:.4f}')
            print(f'Train Class 0 Acc: {train_metrics["class_0_acc"]:.4f}, Train Class 1 Acc: {train_metrics["class_1_acc"]:.4f}')
            print(f'Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}, Val Class 1 F1: {val_class_f1s[1]:.4f}')
            print(f'Val Class 0 Acc: {val_metrics["class_0_acc"]:.4f}, Val Class 1 Acc: {val_metrics["class_1_acc"]:.4f}')

            # Early stopping check
            early_stopping(model, val_f1, val_class_f1s)

            if early_stopping.early_stop:
                print("Early stopping triggered")
                # Restore best weights
                early_stopping.restore_best_weights(model)
                break
        else:
            # Print training metrics when no validation set provided
            print(f'Epoch {epoch+1}/{num_epochs}')
            print(f'Train Loss: {train_loss:.4f}, Train F1: {train_f1:.4f}, Train Class 1 F1: {train_class_f1s[1]:.4f}')
            print(f'Train Class 0 Acc: {train_metrics["class_0_acc"]:.4f}, Train Class 1 Acc: {train_metrics["class_1_acc"]:.4f}')

    # Load best weights if early stopping was used
    if test_dataloader is not None and early_stopping.best_weights is not None:
        model.load_state_dict(early_stopping.best_weights)
        print("Loaded best model weights (highest Class 1 performance)")

    return model, history


def train_model1(model, train_dataloader, test_dataloader=None, class_weights=None, gamma=2.0):
    """Modeli eğitme ve metrikleri kaydetme"""
    # Optimizer ve scheduler oluştur
    optimizer = AdamW([p for p in model.parameters() if p.requires_grad], lr=LEARNING_RATE)
    total_steps = len(train_dataloader) * EPOCHS
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )

    # Focal Loss fonksiyonu - class weights kullanarak dengesiz veri için
    if class_weights is not None:
        weights = torch.tensor(list(class_weights.values())).float().to(DEVICE)
        loss_fn = FocalLoss(alpha=weights, gamma=gamma)
    else:
        loss_fn = FocalLoss(gamma=gamma)

    # Eğitim metriklerini kaydetmek için history sözlüğü
    history = {
        "train_loss": [], "train_f1": [], "train_acc": [],
        "train_class_0_acc": [], "train_class_1_acc": []
    }

    if test_dataloader:
        history.update({
            "test_loss": [], "test_f1": [], "test_acc": [],
            "test_class_0_acc": [], "test_class_1_acc": []
        })

    # Eğitim döngüsü
    best_class1_acc = 0  # Sınıf 1 için en iyi doğruluk oranını takip etmek için
    for epoch in range(EPOCHS):
        print(f"\n{'='*50}")
        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"{'='*50}")

        # Eğitim aşaması
        model.train()
        train_predictions = []
        train_true_labels = []
        train_loss = 0

        for batch in train_dataloader:
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            labels = batch['labels'].to(DEVICE)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()

            train_loss += loss.item()

            # Training performans hesaplaması için tahminleri topla
            _, preds = torch.max(outputs, dim=1)
            train_predictions.extend(preds.cpu().tolist())
            train_true_labels.extend(labels.cpu().tolist())

        # Eğitim metriklerini hesapla
        train_metrics = calculate_metrics(train_true_labels, train_predictions)
        train_metrics["loss"] = train_loss / len(train_dataloader)

        # Test metriklerini hesapla (eğer test_dataloader verilmişse)
        if test_dataloader:
            test_metrics = evaluate_model(model, test_dataloader, loss_fn)

        # Metrikleri history'e kaydet
        history["train_loss"].append(train_metrics["loss"])
        history["train_f1"].append(train_metrics["f1_weighted"])
        history["train_acc"].append(train_metrics["accuracy"])
        history["train_class_0_acc"].append(train_metrics["class_0_acc"])
        history["train_class_1_acc"].append(train_metrics["class_1_acc"])

        if test_dataloader:
            history["test_loss"].append(test_metrics["loss"])
            history["test_f1"].append(test_metrics["f1_weighted"])
            history["test_acc"].append(test_metrics["accuracy"])
            history["test_class_0_acc"].append(test_metrics["class_0_acc"])
            history["test_class_1_acc"].append(test_metrics["class_1_acc"])

        # Metrikleri yazdır
        print_metrics(train_metrics, "Training")
        if test_dataloader:
            print_metrics(test_metrics, "Test")

        # Epoch sonunda sınıf bazlı doğruluk oranlarını özellikle vurgula
        print("\n" + "="*20 + " SINIF BAZLI DOĞRULUK ORANLARI " + "="*20)
        print(f"Epoch {epoch+1}/{EPOCHS}")
        print(f"Eğitim Sınıf 0 Doğruluk: {train_metrics['class_0_acc']:.4f}")
        print(f"Eğitim Sınıf 1 Doğruluk: {train_metrics['class_1_acc']:.4f}")

        if test_dataloader:
            print(f"Test Sınıf 0 Doğruluk: {test_metrics['class_0_acc']:.4f}")
            print(f"Test Sınıf 1 Doğruluk: {test_metrics['class_1_acc']:.4f}")
        print("="*70)

        # En iyi modeli kaydet (F1 metriğine göre)
        if train_metrics["f1_weighted"] > best_train_f1:
            best_train_f1 = train_metrics["f1_weighted"]
            torch.save(model.state_dict(), 'best_berturk_bigru.pt')
            print(f"\nModel kaydedildi! En iyi Training F1 Skor: {best_train_f1:.4f}")

    # Eğitim sonrası grafikleri çiz
    plot_training_history(history)

    return model, history


def plot_training_history(history):
    """Eğitim sürecindeki metriklerin grafiklerini çiz"""
    epochs = range(1, len(history["train_loss"]) + 1)

    # Eğer test_loss yoksa, sadece train metriklerini çizdir
    has_test = "test_loss" in history

    # Loss grafiği
    plt.figure(figsize=(15, 12))
    plt.subplot(3, 2, 1)
    plt.plot(epochs, history["train_loss"], 'b-', label='Training Loss')
    if has_test:
        plt.plot(epochs, history["test_loss"], 'g-', label='Test Loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # F1 grafiği
    plt.subplot(3, 2, 2)
    plt.plot(epochs, history["train_f1"], 'b-', label='Training F1')
    if has_test:
        plt.plot(epochs, history["test_f1"], 'g-', label='Test F1')
    plt.title('F1 Score')
    plt.xlabel('Epochs')
    plt.ylabel('F1 Score')
    plt.legend()

    # Accuracy grafiği
    plt.subplot(3, 2, 3)
    plt.plot(epochs, history["train_acc"], 'b-', label='Training Accuracy')
    if has_test:
        plt.plot(epochs, history["test_acc"], 'g-', label='Test Accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Sınıf 0 Accuracy grafiği
    plt.subplot(3, 2, 4)
    plt.plot(epochs, history["train_class_0_acc"], 'b-', label='Training Class 0 Acc')
    if has_test:
        plt.plot(epochs, history["test_class_0_acc"], 'g-', label='Test Class 0 Acc')
    plt.title('Class 0 Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Sınıf 1 Accuracy grafiği
    plt.subplot(3, 2, 5)
    plt.plot(epochs, history["train_class_1_acc"], 'b-', label='Training Class 1 Acc')
    if has_test:
        plt.plot(epochs, history["test_class_1_acc"], 'g-', label='Test Class 1 Acc')
    plt.title('Class 1 Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Sınıf 0 ve 1'in accuracy farkını gösterme (dengesiz veri setlerinde faydalı)
    if has_test:
        plt.subplot(3, 2, 6)
        train_class_diff = [c1 - c0 for c0, c1 in zip(history["train_class_0_acc"], history["train_class_1_acc"])]
        test_class_diff = [c1 - c0 for c0, c1 in zip(history["test_class_0_acc"], history["test_class_1_acc"])]
        plt.plot(epochs, train_class_diff, 'b-', label='Train Class Diff (1-0)')
        plt.plot(epochs, test_class_diff, 'g-', label='Test Class Diff (1-0)')
        plt.title('Class Accuracy Difference (1-0)')
        plt.xlabel('Epochs')
        plt.ylabel('Difference')
        plt.axhline(y=0, color='r', linestyle='-', alpha=0.3)
        plt.legend()

    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.show()

# Partially freeze BERT (freeze only the first N layers)
def freeze_bert_layers(model, num_layers_to_freeze):
    # Freeze embeddings
    for param in model.bert.embeddings.parameters():
        param.requires_grad = False

    # Freeze the first num_layers_to_freeze encoder layers
    for layer in list(model.bert.encoder.layer)[:num_layers_to_freeze]:
        for param in layer.parameters():
            param.requires_grad = False

def calculate_metrics(y_true, y_pred):
    """
    Hesaplanan metrikleri döndür:
    - F1 skoru (weighted)
    - Accuracy
    - Her sınıf için precision, recall ve f1
    """
    # Genel metrikler
    f1 = f1_score(y_true, y_pred, average='weighted')
    acc = accuracy_score(y_true, y_pred)

    # Sınıf bazlı metrikler
    class_report = classification_report(y_true, y_pred, output_dict=True)

    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)

    # Sınıf bazlı accuracy hesaplama
    class_accuracies = {}
    for cls in set(y_true):
        # Belirli bir sınıfa ait örneklerin indexleri
        class_idx = [i for i, label in enumerate(y_true) if label == cls]
        # Bu örnekler için doğru tahmin sayısı
        correct_preds = sum(1 for i in class_idx if y_pred[i] == y_true[i])
        # Sınıf accuracy'si
        class_accuracies[f"class_{cls}_acc"] = correct_preds / len(class_idx) if len(class_idx) > 0 else 0

    metrics = {
        "f1_weighted": f1,
        "accuracy": acc,
        **class_accuracies,
        "confusion_matrix": cm,
        "classification_report": class_report
    }

    return metrics


def evaluate_model(model, dataloader, criterion=None):
    """
    Evaluate model performance

    Args:
        model: Trained model to evaluate
        dataloader: Data loader for evaluation
        criterion: Loss function (optional)

    Returns:
        metrics: Dictionary containing evaluation metrics
    """
    device = next(model.parameters()).device
    model.eval()

    all_preds = []
    all_labels = []
    total_loss = 0.0

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Forward pass
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)

            # Calculate loss if criterion provided
            if criterion is not None:
                loss = criterion(outputs, labels)
                total_loss += loss.item()

            # Get predictions
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score

    metrics = {
        'accuracy': accuracy_score(all_labels, all_preds),
        'f1_weighted': f1_score(all_labels, all_preds, average='weighted'),
        'f1_macro': f1_score(all_labels, all_preds, average='macro'),
        'f1_by_class': f1_score(all_labels, all_preds, average=None),
        'precision_weighted': precision_score(all_labels, all_preds, average='weighted'),
        'recall_weighted': recall_score(all_labels, all_preds, average='weighted'),
        'confusion_matrix': confusion_matrix(all_labels, all_preds).tolist(),
        'classification_report': classification_report(all_labels, all_preds, output_dict=True)
    }

    # Add loss if criterion was provided
    if criterion is not None:
        metrics['loss'] = total_loss / len(dataloader)

    # Print key metrics
    print(f"Evaluation Results:")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"F1 Score (Weighted): {metrics['f1_weighted']:.4f}")
    print(f"F1 Score for Class 1: {metrics['f1_by_class'][1]:.4f}")
    print(f"Confusion Matrix:\n{metrics['confusion_matrix']}")

    return metrics


def evaluate_model1(model, dataloader, loss_fn):
    """Modeli değerlendirme"""
    model.eval()
    predictions = []
    true_labels = []
    total_loss = 0

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            labels = batch['labels'].to(DEVICE)

            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            total_loss += loss.item()

            _, preds = torch.max(outputs, dim=1)

            predictions.extend(preds.cpu().tolist())
            true_labels.extend(labels.cpu().tolist())

    # Metrikleri hesapla
    metrics = calculate_metrics(true_labels, predictions)
    metrics["loss"] = total_loss / len(dataloader)

    return metrics


def print_metrics(metrics, set_name=""):
    """Metrikleri düzenli bir şekilde yazdır"""
    print(f"\n{set_name} Metrikleri:")
    print(f"Loss: {metrics['loss']:.4f}")
    print(f"F1 Score (Weighted): {metrics['f1_weighted']:.4f}")
    print(f"Accuracy: {metrics['accuracy']:.4f}")

    # Sınıf bazlı accuracy'leri yazdır
    for key, value in metrics.items():
        if key.startswith("class_"):
            print(f"{key}: {value:.4f}")

    # Classification report'u düzenli yazdır
    print("\nSınıf Bazlı Detaylı Metrikler:")
    for cls, metrics_dict in metrics["classification_report"].items():
        if cls in ['0', '1']:  # Sadece sınıf metriklerini göster
            print(f"  Sınıf {cls}:")
            print(f"    Precision: {metrics_dict['precision']:.4f}")
            print(f"    Recall: {metrics_dict['recall']:.4f}")
            print(f"    F1-score: {metrics_dict['f1-score']:.4f}")
            print(f"    Support: {metrics_dict['support']}")

    print("\nConfusion Matrix:")
    print(metrics["confusion_matrix"])


def plot_training_history(history):
    """Eğitim sürecindeki metriklerin grafiklerini çiz"""
    epochs = range(1, len(history["train_loss"]) + 1)

    # Eğer test_loss yoksa, sadece train metriklerini çizdir
    has_test = "test_loss" in history

    # Loss grafiği
    plt.figure(figsize=(12, 8))
    plt.subplot(2, 2, 1)
    plt.plot(epochs, history["train_loss"], 'b-', label='Training Loss')
    if has_test:
        plt.plot(epochs, history["test_loss"], 'g-', label='Test Loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # F1 grafiği
    plt.subplot(2, 2, 2)
    plt.plot(epochs, history["train_f1"], 'b-', label='Training F1')
    if has_test:
        plt.plot(epochs, history["test_f1"], 'g-', label='Test F1')
    plt.title('F1 Score')
    plt.xlabel('Epochs')
    plt.ylabel('F1 Score')
    plt.legend()

    # Accuracy grafiği
    plt.subplot(2, 2, 3)
    plt.plot(epochs, history["train_acc"], 'b-', label='Training Accuracy')
    if has_test:
        plt.plot(epochs, history["test_acc"], 'g-', label='Test Accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Sınıf 1 Accuracy grafiği
    plt.subplot(2, 2, 4)
    plt.plot(epochs, history["train_class_1_acc"], 'b-', label='Training Class 1 Acc')
    if has_test:
        plt.plot(epochs, history["test_class_1_acc"], 'g-', label='Test Class 1 Acc')
    plt.title('Class 1 Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.show()

In [None]:
train_inputs, test_inputs, train_masks, test_masks, train_labels, test_labels = train_test_split(
    input_ids, attention_mask, labels, test_size=0.2, random_state=42, stratify=labels
)

# Dataset ve DataLoader oluşturma
train_dataset = CustomDataset(train_inputs, train_masks, train_labels)
test_dataset = CustomDataset(test_inputs, test_masks, test_labels)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

# Model oluşturma
model = BERTurkCNN(freeze=False).to(DEVICE)  # Don't freeze everything
#freeze_bert_layers(model, num_layers_to_freeze=8)  # Freeze the first 8 layers

# Sınıf ağırlığı ve Focal Loss parametreleri
class_weights = {0: 1, 1: 6}  # veya None
gamma = 2.0  # Focal Loss focusing parametresi

# Modeli eğitme (Focal Loss ve dengeli early stopping kullanarak)
trained_model, history = train_model(
    model=model,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    class_weights=class_weights,
    gamma=gamma,
    patience=3,            # Early stopping için sabır değeri
    class_f1_weight=0.6    # Sınıf 1 F1 skoru ağırlığı (daha yüksek değer = sınıf 1'e daha fazla önem)
)

# Son test değerlendirmesi - Focal Loss ile
focal_loss = FocalLoss(
    alpha=torch.tensor([1, 6]).float().to(DEVICE),
    gamma=gamma
)
test_metrics = evaluate_model(trained_model, test_dataloader, focal_loss)

# Sonuçları görüntüleme
print(f"Test Metrics:")
print(f"Overall F1 score: {test_metrics['f1_weighted']:.4f}")
print(f"Class 1 F1 score: {test_metrics['f1_by_class'][1]:.4f}")

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at TURKCELL/roberta-base-turkish-uncased and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),


Epoch 1/10
Train Loss: 0.5366, Train F1: 0.5326, Train Class 1 F1: 0.3711
Train Class 0 Acc: 0.3976, Train Class 1 Acc: 0.9348
Val Loss: 0.4588, Val F1: 0.7580, Val Class 1 F1: 0.5132
Val Class 0 Acc: 0.6873, Val Class 1 Acc: 0.9013


  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),
  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),


Epoch 2/10
Train Loss: 0.3900, Train F1: 0.7846, Train Class 1 F1: 0.5434
Train Class 0 Acc: 0.7251, Train Class 1 Acc: 0.9013
Val Loss: 0.3852, Val F1: 0.7915, Val Class 1 F1: 0.5536
Val Class 0 Acc: 0.7335, Val Class 1 Acc: 0.9082
Validation score improved! Overall F1: 0.7915, Class 1 F1: 0.5536


  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),
  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),


Epoch 3/10
Train Loss: 0.3004, Train F1: 0.8415, Train Class 1 F1: 0.6285
Train Class 0 Acc: 0.8015, Train Class 1 Acc: 0.9268
Val Loss: 0.4212, Val F1: 0.8748, Val Class 1 F1: 0.6602
Val Class 0 Acc: 0.8783, Val Class 1 Acc: 0.8017
Validation score improved! Overall F1: 0.8748, Class 1 F1: 0.6602


  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),
  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),


Epoch 4/10
Train Loss: 0.3818, Train F1: 0.7299, Train Class 1 F1: 0.4936
Train Class 0 Acc: 0.6420, Train Class 1 Acc: 0.9320
Val Loss: 0.7104, Val F1: 0.8297, Val Class 1 F1: 0.4107
Val Class 0 Acc: 0.9451, Val Class 1 Acc: 0.3316
EarlyStopping counter: 1 out of 3


  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),
  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),


Epoch 5/10
Train Loss: 0.7125, Train F1: 0.1147, Train Class 1 F1: 0.2863
Train Class 0 Acc: 0.0425, Train Class 1 Acc: 0.9913
Val Loss: 0.7409, Val F1: 0.0455, Val Class 1 F1: 0.2796
Val Class 0 Acc: 0.0000, Val Class 1 Acc: 1.0000
EarlyStopping counter: 2 out of 3


  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),


Epoch 6/10
Train Loss: 0.7298, Train F1: 0.0462, Train Class 1 F1: 0.2797
Train Class 0 Acc: 0.0005, Train Class 1 Acc: 0.9998
Val Loss: 0.7465, Val F1: 0.0455, Val Class 1 F1: 0.2796
Val Class 0 Acc: 0.0000, Val Class 1 Acc: 1.0000
EarlyStopping counter: 3 out of 3
Early stopping triggered
Loaded best model weights (highest Class 1 performance)
Evaluation Results:
Accuracy: 0.8659
F1 Score (Weighted): 0.8748
F1 Score for Class 1: 0.6602
Confusion Matrix:
[[5227, 724], [229, 926]]
Test Metrics:
Overall F1 score: 0.8748
Class 1 F1 score: 0.6602


In [None]:
#modelin ağırlıklarını kaydet

torch.save(trained_model.state_dict(), 'best_berturk_cnn_roberta.pt')

In [None]:
best_test_metrics = evaluate_model1(trained_model, test_dataloader, focal_loss)
print("\nEn İyi Modelin Test Metrikleri:")
print_metrics(best_test_metrics, "Test")

  'input_ids': torch.tensor(self.input_ids[idx], dtype=torch.long),
  'attention_mask': torch.tensor(self.attention_masks[idx], dtype=torch.long),



En İyi Modelin Test Metrikleri:

Test Metrikleri:
Loss: 0.4212
F1 Score (Weighted): 0.8748
Accuracy: 0.8659
class_0_acc: 0.8783
class_1_acc: 0.8017

Sınıf Bazlı Detaylı Metrikler:
  Sınıf 0:
    Precision: 0.9580
    Recall: 0.8783
    F1-score: 0.9165
    Support: 5951.0
  Sınıf 1:
    Precision: 0.5612
    Recall: 0.8017
    F1-score: 0.6602
    Support: 1155.0

Confusion Matrix:
[[5227  724]
 [ 229  926]]
