# Détection automatique de maladies pulmonaires à partir de radiographies

### Pré-traitement des données

In [32]:
import os
import random
from pathlib import Path
from PIL import Image
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision.models import ResNet18_Weights

from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

In [33]:
def buildSampleFromPath(path1, path2, path3, path4):
    normal_list = []
    covid_list = []
    pneumonia_list = []

    normal_files = [f for f in os.listdir(path1) if f.lower().endswith(('.png', '.jpg', '.jpeg','.webp'))]
    for image_name in normal_files:
        chemin = os.path.join(path1, image_name)
        normal_list.append((chemin, "Normal"))

    covid_files = [f for f in os.listdir(path2) if f.lower().endswith(('.png', '.jpg', '.jpeg','.webp'))]
    for image_name in covid_files:
        chemin = os.path.join(path2, image_name)
        covid_list.append((chemin, "COVID"))

    pneumonia_files = [f for f in os.listdir(path3) if f.lower().endswith(('.png', '.jpg', '.jpeg','.webp'))]
    for image_name in pneumonia_files:
        chemin = os.path.join(path3, image_name)
        pneumonia_list.append((chemin, "Pneumonia"))

    lung_opacity_files = [f for f in os.listdir(path4) if f.lower().endswith(('.png', '.jpg', '.jpeg','.webp'))]
    for image_name in lung_opacity_files:
        chemin = os.path.join(path4, image_name)
        pneumonia_list.append((chemin, "Pneumonia"))

    return normal_list, covid_list, pneumonia_list

In [34]:
base_path = Path("archive/COVID-19_Radiography_Dataset")

path_normal = base_path/"Normal/images"
path_covid = base_path/"COVID/images"    
path_pneumonia = base_path/"Viral Pneumonia/images"
path_lung_opacity = base_path/"Lung_Opacity/images"

img_normals, img_covids, img_pneumonias = buildSampleFromPath(
    path_normal, path_covid, path_pneumonia, path_lung_opacity
)

print(f"Nombre d'images normales: {len(img_normals)}")
print(f"Nombre d'images COVID: {len(img_covids)}")
print(f"Nombre d'images pneumonies: {len(img_pneumonias)}")

Nombre d'images normales: 10192
Nombre d'images COVID: 3616
Nombre d'images pneumonies: 7357


In [35]:
all_images = img_normals + img_covids + img_pneumonias
print(f"Total d’images : {len(all_images)}")

random.shuffle(all_images)

Total d’images : 21165


In [None]:
train_set, test_set = train_test_split(all_images, test_size=0.15, stratify=[label for _, label in all_images])
train, validation = train_test_split(train_set, test_size=0.1765, stratify=[label for _, label in train_set])

print(f"Train : {len(train)}")
print(f"Val : {len(validation)}")
print(f"Test : {len(test_set)}")


Train : 14814
Val : 3176
Test : 3175


In [37]:
class CovidDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
        self.label_map = {"Normal": 0, "COVID": 1, "Pneumonia": 2}

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, label = self.data[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, self.label_map[label]


In [38]:
weights = ResNet18_Weights.DEFAULT
preprocess = weights.transforms()

In [39]:
train_dataset = CovidDataset(train, transform=preprocess)
val_dataset = CovidDataset(validation, transform=preprocess)
test_dataset = CovidDataset(test_set, transform=preprocess)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

In [40]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import numpy as np
from sklearn.utils.class_weight import compute_class_weight

# Labels sous forme numérique
all_labels = [label for _, label in all_images]
label_map = {"Normal": 0, "COVID": 1, "Pneumonia": 2}
numerical_labels = [label_map[l] for l in all_labels]

# Convertir la liste des classes en array
classes_array = np.array([0, 1, 2])

# Calcul des poids
class_weights = compute_class_weight(class_weight="balanced", classes=classes_array, y=numerical_labels)

# Transformation en tenseur PyTorch
weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)

# Appliquer à la loss
criterion = nn.CrossEntropyLoss(weight=weights_tensor)


model = models.resnet18(weights=weights)
model.fc = nn.Linear(model.fc.in_features, 3)
model = model.to(device)

In [None]:
#  Définition de l'optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Fonction d'entraînement
def train(model, dataloader, criterion, optimizer, epoch):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / len(dataloader)
    epoch_acc = 100. * correct / total
    print(f'Train Epoch {epoch} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.2f}%')

# Fonction d'évaluation
def evaluate(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    
    acc = 100. * correct / total
    print(f'Validation Accuracy: {acc:.2f}%')
    return acc

In [42]:
# Boucle d'entraînement
num_epochs = 20
best_acc = 0.0

for epoch in range(1, num_epochs + 1):
    train(model, train_loader, criterion, optimizer, epoch)
    val_acc = evaluate(model, val_loader)
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')

Train Epoch 1 | Loss: 0.4218 | Acc: 83.79%
Validation Accuracy: 83.75%
Train Epoch 2 | Loss: 0.2735 | Acc: 89.50%
Validation Accuracy: 70.21%
Train Epoch 3 | Loss: 0.2309 | Acc: 90.85%
Validation Accuracy: 86.15%
Train Epoch 4 | Loss: 0.1934 | Acc: 92.43%
Validation Accuracy: 89.17%
Train Epoch 5 | Loss: 0.1699 | Acc: 93.41%
Validation Accuracy: 88.66%
Train Epoch 6 | Loss: 0.1416 | Acc: 94.31%
Validation Accuracy: 92.41%
Train Epoch 7 | Loss: 0.1314 | Acc: 94.79%
Validation Accuracy: 91.50%
Train Epoch 8 | Loss: 0.1132 | Acc: 95.46%
Validation Accuracy: 90.24%
Train Epoch 9 | Loss: 0.0794 | Acc: 96.72%
Validation Accuracy: 91.85%
Train Epoch 10 | Loss: 0.0751 | Acc: 96.96%
Validation Accuracy: 90.68%
Train Epoch 11 | Loss: 0.0689 | Acc: 97.41%
Validation Accuracy: 88.04%
Train Epoch 12 | Loss: 0.0604 | Acc: 97.73%
Validation Accuracy: 91.25%
Train Epoch 13 | Loss: 0.0490 | Acc: 98.11%
Validation Accuracy: 89.48%
Train Epoch 14 | Loss: 0.0425 | Acc: 98.30%
Validation Accuracy: 87.50%
T