In [None]:
%pip install matplotlib pandas seaborn kagglehub tensorflow keras scipy

In [None]:
import os
import shutil
import kagglehub
import numpy as np
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import DenseNet121
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

In [None]:
path = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")

print("Path to dataset files:", path)

In [None]:
train_dir, val_dir, test_dir = [os.path.join(path, "chest_xray", d) for d in ["train", "val", "test"]]

for cls in ["NORMAL", "PNEUMONIA"]:
    src, dst = os.path.join(train_dir, cls), os.path.join(val_dir, cls)
    os.makedirs(dst, exist_ok=True)
    
    if os.path.exists(src) and len(os.listdir(dst)) < 50:
        files = os.listdir(src)
        for img in np.random.choice(files, int(len(files) * 0.2), replace=False):
            shutil.move(os.path.join(src, img), os.path.join(dst, img))
        print(f"Movido 10% de {cls} para validação.")

In [None]:
BATCH_SIZE = 32
TARGET_SIZE = (224, 224)

train_datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    fill_mode="nearest",
    preprocessing_function=preprocess_input
)

test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

print("Carregando datasets...")
train_ds = train_datagen.flow_from_directory(
    train_dir,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=True
)

val_ds = test_datagen.flow_from_directory(
    val_dir,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False 
)

test_ds = test_datagen.flow_from_directory(
    test_dir,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

In [None]:
try:
    count_normal = len(os.listdir(os.path.join(train_dir, 'NORMAL')))
    count_pneumonia = len(os.listdir(os.path.join(train_dir, 'PNEUMONIA')))
    total = count_normal + count_pneumonia
    weight_0 = (1 / count_normal) * (total / 2.0)
    weight_1 = (1 / count_pneumonia) * (total / 2.0)
    class_weights = {0: weight_0, 1: weight_1}
except:
    class_weights = {0: 1.0, 1: 1.0}

In [None]:
import numpy as np

def print_dataset_counts(dataset, name):
    print(f"--- {name} set ---")
    class_labels = {v: k for k, v in dataset.class_indices.items()}
    unique, counts = np.unique(dataset.classes, return_counts=True)
    for i, count in zip(unique, counts):
        print(f"{class_labels[i]}: {count}")
    print(f"Total: {len(dataset.classes)}\n")

print_dataset_counts(train_ds, "Training")
print_dataset_counts(val_ds, "Validation")
print_dataset_counts(test_ds, "Test")

In [None]:
from keras import layers
base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

model =  Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model.summary()

In [None]:
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy', 
                tf.keras.metrics.Precision(name='precision'), 
                tf.keras.metrics.Recall(name='recall'), 
                tf.keras.metrics.AUC(name='auc')]
)

In [None]:
callbacks = [
    ModelCheckpoint('pneumonia_model.keras', monitor='val_auc', mode='max', save_best_only=True, verbose=1),
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)
]

In [None]:
history = model.fit(
    train_ds,
    epochs=30,
    validation_data=val_ds,
    callbacks=callbacks,
    class_weight=class_weights,
    verbose=1
)

In [None]:
plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Curva de Perda (Loss)')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['auc'], label='Train AUC')
plt.plot(history.history['val_auc'], label='Val AUC')
plt.title('Curva ROC-AUC')
plt.legend()
plt.show()

print("\n--- Avaliação no Dataset de Teste ---")
eval_results = model.evaluate(test_ds)
print(f"Test Loss: {eval_results[0]:.4f}")
print(f"Test Accuracy: {eval_results[1]:.4f}")
print(f"Test AUC: {eval_results[4]:.4f}")
print("\nRelatório de Classificação Detalhado:")
y_pred = (model.predict(test_ds) > 0.5).astype("int32")
y_true = test_ds.classes
print(classification_report(y_true, y_pred, target_names=['NORMAL', 'PNEUMONIA']))

In [None]:
images, labels = next(test_ds)
class_names = list(train_ds.class_indices.keys())

plt.figure(figsize=(12, 12))
for i in range(min(9, len(images))):
    ax = plt.subplot(3, 3, i + 1)
    
    img = images[i]
    img_display = (img - img.min()) / (img.max() - img.min())
    plt.imshow(img_display)
    
    img_batch = np.expand_dims(img, axis=0)
    pred_prob = model.predict(img_batch, verbose=0)[0][0]
    
    pred_label = class_names[1] if pred_prob > 0.5 else class_names[0]
    true_label = class_names[int(labels[i])]
    
    conf = pred_prob if pred_prob > 0.5 else 1 - pred_prob
    
    color = "green" if pred_label == true_label else "red"
    plt.title(f"Real: {true_label}\nPred: {pred_label} ({conf:.1%})", color=color)
    plt.axis("off")
plt.tight_layout()
plt.show()