# 📊 IA para Detecção de Cardiomegalia em Radiografias de Tórax

In [10]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
import pandas as pd

csv_path = '/content/drive/MyDrive/Dataset/Data_Entry_2017.csv'
df = pd.read_csv(csv_path)

# Verificar labels únicos
print(df['Finding Labels'].unique())


['Cardiomegaly' 'Cardiomegaly|Emphysema' 'Cardiomegaly|Effusion'
 'No Finding' 'Hernia' 'Hernia|Infiltration' 'Mass|Nodule' 'Infiltration'
 'Effusion|Infiltration' 'Nodule' 'Emphysema' 'Effusion' 'Atelectasis'
 'Effusion|Mass' 'Emphysema|Pneumothorax' 'Pleural_Thickening'
 'Effusion|Emphysema|Infiltration|Pneumothorax'
 'Emphysema|Infiltration|Pleural_Thickening|Pneumothorax'
 'Effusion|Pneumonia|Pneumothorax' 'Pneumothorax'
 'Effusion|Infiltration|Pneumothorax' 'Infiltration|Mass'
 'Infiltration|Mass|Pneumothorax' 'Mass'
 'Cardiomegaly|Infiltration|Mass|Nodule'
 'Cardiomegaly|Effusion|Emphysema|Mass'
 'Atelectasis|Cardiomegaly|Emphysema|Mass|Pneumothorax' 'Emphysema|Mass'
 'Emphysema|Mass|Pneumothorax' 'Atelectasis|Pneumothorax'
 'Cardiomegaly|Emphysema|Pneumothorax' 'Mass|Pleural_Thickening'
 'Cardiomegaly|Mass|Pleural_Thickening' 'Effusion|Infiltration|Nodule'
 'Atelectasis|Effusion|Pleural_Thickening' 'Fibrosis|Infiltration'
 'Fibrosis|Infiltration|Pleural_Thickening' 'Fibrosis'
 '

In [12]:
# Instalar versões específicas dos pacotes
!pip install numpy==2.0.2 pandas==2.2.2 matplotlib==3.10.0 seaborn==0.13.2 opencv-python==4.11.0.86 \
kagglehub==0.3.12 tqdm==4.67.1 tensorflow==2.18.0 scikit-learn==1.6.1 Pillow==11.2.1



In [13]:
import pandas as pd
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator


# Caminhos
csv_path = '/content/drive/MyDrive/Dataset/Data_Entry_2017.csv'
img_folder = '/content/drive/MyDrive/Dataset/'

# CSV
df = pd.read_csv(csv_path)

# Imagens disponíveis
available_imgs = os.listdir(img_folder)
df = df[df['Image Index'].isin(available_imgs)]

# Criar coluna binária
df['Emphysema'] = df['Finding Labels'].apply(lambda x: 1 if 'Emphysema' in x else 0)

# Verificar distribuição
print(df['Emphysema'].value_counts())



Emphysema
0    9807
1     193
Name: count, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Emphysema'] = df['Finding Labels'].apply(lambda x: 1 if 'Emphysema' in x else 0)


In [14]:
# Separar
df_majority = df[df.Emphysema == 0]
df_minority = df[df.Emphysema == 1]

# Oversample
df_minority_upsampled = resample(df_minority,
                                 replace=True,
                                 n_samples=len(df_majority),
                                 random_state=42)

# Juntar
df_balanced = pd.concat([df_majority, df_minority_upsampled])

# Embaralhar
df_balanced = df_balanced.sample(frac=1).reset_index(drop=True)

# Verificar
print(df_balanced['Emphysema'].value_counts())


Emphysema
0    9807
1    9807
Name: count, dtype: int64


In [15]:
X = []
y = []
img_size = 128

for idx, row in df_balanced.iterrows():
    img_path = os.path.join(img_folder, row['Image Index'])
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (img_size, img_size))
    img = img / 255.0
    X.append(img)
    y.append(row['Emphysema'])

X = np.array(X).reshape(-1, img_size, img_size, 1)
y = np.array(y)


In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [37]:
# Criar datagen só pra minoria
minority_gen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.15,
    height_shift_range=0.15,
    zoom_range=0.2,
    horizontal_flip=True
)

# Gerar imagens aumentadas da minoria e juntar


In [39]:
model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(img_size, img_size, 1)),
    layers.MaxPooling2D((2,2)),
    layers.BatchNormalization(),

    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.BatchNormalization(),

    layers.Conv2D(128, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.BatchNormalization(),

    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(1, activation='sigmoid')
])


In [40]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint('best_model.h5', save_best_only=True)
]


In [43]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [44]:
history = model.fit(datagen.flow(X_train, y_train, batch_size=32),
                    epochs=8,
                    validation_data=(X_test, y_test))


Epoch 1/8
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m568s[0m 1s/step - accuracy: 0.5528 - loss: 1.0884 - val_accuracy: 0.5126 - val_loss: 0.7558
Epoch 2/8
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m541s[0m 1s/step - accuracy: 0.6014 - loss: 0.6741 - val_accuracy: 0.6113 - val_loss: 0.6696
Epoch 3/8
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m536s[0m 1s/step - accuracy: 0.6071 - loss: 0.6576 - val_accuracy: 0.6248 - val_loss: 0.6857
Epoch 4/8
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m528s[0m 1s/step - accuracy: 0.6262 - loss: 0.6348 - val_accuracy: 0.6240 - val_loss: 0.8171
Epoch 5/8
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m551s[0m 1s/step - accuracy: 0.6331 - loss: 0.6270 - val_accuracy: 0.5384 - val_loss: 0.7964
Epoch 6/8
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m555s[0m 1s/step - accuracy: 0.6357 - loss: 0.6355 - val_accuracy: 0.6436 - val_loss: 0.8523
Epoch 7/8
[1m491/491[0m [

In [45]:
import pickle

with open('/content/drive/MyDrive/history.pkl', 'wb') as f:
    pickle.dump(history.history, f)


In [46]:
with open('/content/drive/MyDrive/history.pkl', 'rb') as f:
    history = pickle.load(f)


In [47]:
def preprocess_image(img_path, img_size=(128, 128)):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f'❌ Imagem não encontrada: {img_path}')
        return None
    img = cv2.resize(img, img_size)
    img = img / 255.0
    img = img.reshape(1, img_size[0], img_size[1], 1)
    return img


In [48]:
# Caminho da pasta com imagens externas
test_folder = '/content/drive/MyDrive/teste_imagem/'

# Listar imagens na pasta
test_images = os.listdir(test_folder)

# Testar todas
for file_name in test_images:
    img_path = os.path.join(test_folder, file_name)
    new_img = preprocess_image(img_path)

    if new_img is None:
        continue

    pred = model.predict(new_img)

    if pred[0][0] >= 0.5:
        print(f'❌ {file_name}: Enfisema detectado (score: {pred[0][0]:.4f})')
    else:
        print(f'✅ {file_name}: Normal (score: {pred[0][0]:.4f})')



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 264ms/step
✅ raio-xtorax.png: Normal (score: 0.4413)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
❌ teste-raioxenf.jpg: Enfisema detectado (score: 0.8980)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
✅ 00001507_002.png: Normal (score: 0.4528)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
❌ 00001437_053.png: Enfisema detectado (score: 0.7521)


In [52]:
model.save("modelo_enfisema.keras")


In [None]:
from tensorflow import keras
model = keras.models.load_model("modelo_enfisema.keras")
