In [1]:
import pandas as pd
import numpy as np
import wfdb
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [3]:
# Veri yolu
data_path = Path("d:/ecg/data/raw/ptbxl")

# Meta verileri yükle
df = pd.read_csv(data_path / "ptbxl_database.csv")

# Test için küçük bir veri seti (ilk 100 kayıt)
n_samples = 100
df_subset = df.head(n_samples)

# Tanı sınıfları
diagnostic_classes = ['NORM', 'MI', 'STTC', 'CD', 'HYP']

# Dataset sınıfı
class ECGDataset(Dataset):
    def __init__(self, waveforms, labels):
        self.waveforms = torch.FloatTensor(waveforms).transpose(1, 2)
        self.labels = torch.FloatTensor(labels)
        
    def __len__(self):
        return len(self.waveforms)
    
    def __getitem__(self, idx):
        return self.waveforms[idx], self.labels[idx]

# Veri yükleme ve ön işleme
def prepare_data(df):
    waveforms = []
    labels = np.zeros((len(df), len(diagnostic_classes)))
    
    for idx, row in df.iterrows():
        if idx % 10 == 0:
            print(f"Kayıt yükleniyor: {idx}/{len(df)}")
            
        # EKG sinyalini yükle
        record_path = data_path / row['filename_hr'].replace('.hea', '')
        record = wfdb.rdrecord(str(record_path))
        waveform = record.p_signal
        
        # Normalize et
        scaler = StandardScaler()
        waveform_normalized = scaler.fit_transform(waveform)
        waveforms.append(waveform_normalized)
        
        # Etiketleri hazırla
        scp_codes = eval(row['scp_codes']) if isinstance(row['scp_codes'], str) else row['scp_codes']
        for diagnosis in scp_codes:
            if diagnosis in diagnostic_classes:
                labels[idx, diagnostic_classes.index(diagnosis)] = 1
                
    return np.array(waveforms), labels

# Veriyi hazırla
print("Veri hazırlanıyor...")
X, y = prepare_data(df_subset)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# DataLoader'ları oluştur
train_dataset = ECGDataset(X_train, y_train)
test_dataset = ECGDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

Veri hazırlanıyor...
Kayıt yükleniyor: 0/100
Kayıt yükleniyor: 10/100
Kayıt yükleniyor: 20/100
Kayıt yükleniyor: 30/100
Kayıt yükleniyor: 40/100
Kayıt yükleniyor: 50/100
Kayıt yükleniyor: 60/100
Kayıt yükleniyor: 70/100
Kayıt yükleniyor: 80/100
Kayıt yükleniyor: 90/100


In [4]:
# Model tanımı
class ECGClassifier(nn.Module):
    def __init__(self, n_leads=12, n_classes=5):
        super().__init__()
        
        self.conv_layers = nn.Sequential(
            # Input: (batch, 12, 5000)
            nn.Conv1d(n_leads, 32, kernel_size=50, stride=3),  # -> (32, 1651)
            nn.BatchNorm1d(32),  # Batch normalization ekledik
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),             # -> (32, 825)
            nn.Dropout(0.2),
            
            nn.Conv1d(32, 64, kernel_size=15, stride=2),       # -> (64, 406)
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),             # -> (64, 203)
            nn.Dropout(0.2),
            
            nn.Conv1d(64, 128, kernel_size=10, stride=1),      # -> (128, 194)
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),             # -> (128, 97)
            nn.Dropout(0.2)
        )
        
        # Çıktı boyutunu hesapla
        self.feature_size = self._get_conv_output_size()
        
        # Fully connected katmanlar
        self.fc_layers = nn.Sequential(
            nn.Linear(self.feature_size, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, n_classes),
            nn.Sigmoid()  # Çoklu etiket sınıflandırma için sigmoid
        )
    
    def _get_conv_output_size(self):
        # Dummy input ile conv katmanlarının çıktı boyutunu hesapla
        x = torch.randn(1, 12, 5000)
        x = self.conv_layers(x)
        return x.numel() // x.size(0)
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc_layers(x)
        return x

# Model, optimizer ve loss function
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Kullanılan cihaz: {device}")

model = ECGClassifier().to(device)
criterion = nn.BCELoss()  # Binary Cross Entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Test için bir batch yükle
for batch_waveforms, batch_labels in train_loader:
    batch_waveforms = batch_waveforms.to(device)
    batch_labels = batch_labels.to(device)
    
    print("\nGiriş boyutları:")
    print(f"Waveforms: {batch_waveforms.shape}")
    print(f"Labels: {batch_labels.shape}")
    
    # Forward pass test
    with torch.no_grad():
        output = model(batch_waveforms)
        print(f"Model çıktı boyutu: {output.shape}")
    break

print("\nModel özeti:")
print(model)

Kullanılan cihaz: cpu

Giriş boyutları:
Waveforms: torch.Size([8, 12, 5000])
Labels: torch.Size([8, 5])
Model çıktı boyutu: torch.Size([8, 5])

Model özeti:
ECGClassifier(
  (conv_layers): Sequential(
    (0): Conv1d(12, 32, kernel_size=(50,), stride=(3,))
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout(p=0.2, inplace=False)
    (5): Conv1d(32, 64, kernel_size=(15,), stride=(2,))
    (6): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU()
    (8): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Dropout(p=0.2, inplace=False)
    (10): Conv1d(64, 128, kernel_size=(10,), stride=(1,))
    (11): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=

In [6]:
# Eğitim parametreleri
n_epochs = 20
best_loss = float('inf')

# Metrik hesaplama fonksiyonu
def calculate_metrics(y_true, y_pred):
    """Çoklu etiket sınıflandırma metrikleri"""
    y_pred_binary = (y_pred > 0.5).float()
    correct = (y_pred_binary == y_true).float()
    accuracy = correct.mean(dim=0)
    return accuracy

# Eğitim döngüsü
print("Eğitim başlıyor...")
for epoch in range(n_epochs):
    # Training
    model.train()
    train_loss = 0
    train_acc = torch.zeros(5).to(device)
    n_batches = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Metrikleri hesapla
        train_loss += loss.item()
        train_acc += calculate_metrics(target, output)
        n_batches += 1
        
        # Her 5 batch'te bir durum raporu
        if batch_idx % 5 == 0:
            print(f'Epoch {epoch+1}/{n_epochs} '
                  f'[{batch_idx}/{len(train_loader)}] '
                  f'Loss: {loss.item():.4f}')
    
    # Epoch sonunda ortalama kayıp ve doğruluk
    train_loss /= n_batches
    train_acc /= n_batches
    
    # Validation
    model.eval()
    val_loss = 0
    val_acc = torch.zeros(5).to(device)
    n_val_batches = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_loss += criterion(output, target).item()
            val_acc += calculate_metrics(target, output)
            n_val_batches += 1
    
    val_loss /= n_val_batches
    val_acc /= n_val_batches
    
    # Sonuçları yazdır
    print(f'\nEpoch {epoch+1}/{n_epochs}:')
    print(f'Train Loss: {train_loss:.4f}')
    print('Train Acc per class:', ' '.join(f'{acc.item():.2f}' for acc in train_acc))
    print(f'Val Loss: {val_loss:.4f}')
    print('Val Acc per class:', ' '.join(f'{acc.item():.2f}' for acc in val_acc))
    print('-' * 60)
    
    # En iyi modeli kaydet
    if val_loss < best_loss:
        best_loss = val_loss
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': best_loss,
        }, 'best_model.pt')

print("Eğitim tamamlandı!")

Eğitim başlıyor...
Epoch 1/20 [0/10] Loss: 0.1630
Epoch 1/20 [5/10] Loss: 0.8397

Epoch 1/20:
Train Loss: 0.3036
Train Acc per class: 0.62 1.00 1.00 1.00 1.00
Val Loss: 0.7136
Val Acc per class: 0.67 1.00 1.00 1.00 1.00
------------------------------------------------------------
Epoch 2/20 [0/10] Loss: 0.0003
Epoch 2/20 [5/10] Loss: 0.0628

Epoch 2/20:
Train Loss: 0.1971
Train Acc per class: 0.80 1.00 1.00 1.00 1.00
Val Loss: 0.4024
Val Acc per class: 0.67 1.00 1.00 1.00 1.00
------------------------------------------------------------
Epoch 3/20 [0/10] Loss: 0.0876
Epoch 3/20 [5/10] Loss: 0.0388

Epoch 3/20:
Train Loss: 0.1626
Train Acc per class: 0.81 1.00 1.00 1.00 1.00
Val Loss: 1.4397
Val Acc per class: 0.67 1.00 1.00 1.00 1.00
------------------------------------------------------------
Epoch 4/20 [0/10] Loss: 0.0090
Epoch 4/20 [5/10] Loss: 0.0364

Epoch 4/20:
Train Loss: 0.1127
Train Acc per class: 0.85 1.00 1.00 1.00 1.00
Val Loss: 0.4182
Val Acc per class: 0.71 1.00 1.00 1.00