In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np
import os

# Ruta al directorio con las imágenes
directorio_imagenes = '/content/drive/My Drive/Proyecto de Grado G/ProcessedImages_Disc_Exp2'

imagenes_info = []
for filename in os.listdir(directorio_imagenes):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        base = filename.split('_')[0]
        id_paciente = base[3:-2]  # Extrae correctamente el ID del paciente
        tipo_ojo = base[-2:]
        full_path = os.path.join(directorio_imagenes, filename)
        imagenes_info.append({'filename': full_path, 'id': id_paciente, 'eye': tipo_ojo})

imagenes_df = pd.DataFrame(imagenes_info)

# Cargar y preparar los datos clínicos
data_od = pd.read_excel('/content/drive/My Drive/Proyecto de Grado G/patient_data_od.xlsx')
data_os = pd.read_excel('/content/drive/My Drive/Proyecto de Grado G/patient_data_os.xlsx')

data_od['id'] = data_od['Unnamed: 0'].str.replace('#', '').astype(str)
data_os['id'] = data_os['Unnamed: 0'].str.replace('#', '').astype(str)

data_od['eye'] = 'OD'
data_os['eye'] = 'OS'

data_clinica = pd.concat([data_od[['id', 'eye', 'Diagnosis']], data_os[['id', 'eye', 'Diagnosis']]])

full_df = pd.merge(imagenes_df, data_clinica, on=['id', 'eye'], how='left')
full_df = full_df[full_df['Diagnosis'] != 2]

dominant_diagnosis = full_df.groupby('id')['Diagnosis'].agg(lambda x: x.mode()[0])


patient_df = pd.DataFrame(dominant_diagnosis).reset_index()

splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_patients, test_patients = next(splitter.split(patient_df, patient_df['Diagnosis']))

train_ids = patient_df.iloc[train_patients]['id']
test_ids = patient_df.iloc[test_patients]['id']

train_df = full_df[full_df['id'].isin(train_ids)]
test_df = full_df[full_df['id'].isin(test_ids)]

print("Train DataFrame:")
print(train_df[['filename', 'id', 'eye', 'Diagnosis']].head())

print("\nTest DataFrame:")
print(test_df[['filename', 'id', 'eye', 'Diagnosis']].head())

Train DataFrame:
                                             filename   id eye Diagnosis
2   /content/drive/My Drive/Proyecto de Grado G/Pr...  004  OD         1
3   /content/drive/My Drive/Proyecto de Grado G/Pr...  004  OS         1
4   /content/drive/My Drive/Proyecto de Grado G/Pr...  005  OD         1
5   /content/drive/My Drive/Proyecto de Grado G/Pr...  005  OS         1
14  /content/drive/My Drive/Proyecto de Grado G/Pr...  010  OD         1

Test DataFrame:
                                             filename   id eye Diagnosis
20  /content/drive/My Drive/Proyecto de Grado G/Pr...  015  OD         1
21  /content/drive/My Drive/Proyecto de Grado G/Pr...  015  OS         1
32  /content/drive/My Drive/Proyecto de Grado G/Pr...  023  OD         1
33  /content/drive/My Drive/Proyecto de Grado G/Pr...  023  OS         1
42  /content/drive/My Drive/Proyecto de Grado G/Pr...  028  OD         0


In [None]:
import numpy as np
import pandas as pd
import os
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedShuffleSplit


# Preparar los generadores de datos (esto permanece igual)
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1.0 / 255,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)

test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rescale=1.0 / 255
)

# Convertir 'Diagnosis' a tipo cadena
train_df['Diagnosis'] = train_df['Diagnosis'].astype(str)
test_df['Diagnosis'] = test_df['Diagnosis'].astype(str)

train_data = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='filename',
    y_col='Diagnosis',
    target_size=(299, 299),
    batch_size=20,
    class_mode='binary'
)

test_data = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='filename',
    y_col='Diagnosis',
    target_size=(299, 299),
    batch_size=32,
    class_mode='binary'
)

valid_data = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='filename',
    y_col='Diagnosis',
    target_size=(299, 299),
    batch_size=32,
    class_mode='binary'
)

# Configuración de los experimentos con diferentes tasas de aprendizaje
learning_rates = [0.001, 0.0001, 0.00001, 0.000001, 0.000001]

# Resultados para cada experimento
results = []

for lr in learning_rates:
    print(f"\n\n--- Experimento con learning rate = {lr} ---\n")

    # Base del modelo InceptionV3
    input_tensor = Input(shape=(299, 299, 3))
    base_model = InceptionV3(input_tensor=input_tensor, weights='imagenet', include_top=False, pooling='avg')

    # Añadir capas superiores
    x = base_model.output
    x = Dense(2048, activation='relu')(x)
    predictions = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    # Warm-Up (entrenar solo capas superiores)
    for layer in base_model.layers:
        layer.trainable = False

    model.compile(optimizer=Adam(learning_rate=lr), loss='binary_crossentropy', metrics=['accuracy', 'AUC'])

    checkpoint = ModelCheckpoint(f'best_model_warmup_lr_{lr}.keras', monitor='val_auc', save_best_only=True, mode='min', verbose=1)
    early_stop = EarlyStopping(monitor='val_auc', patience=10, restore_best_weights=True, mode='min')

    print("Inicio de Warm-Up Training...\n")
    warmup_history = model.fit(
        train_data,
        validation_data=valid_data,
        epochs=10,  # Por ejemplo, entrenamos por 10 épocas antes de hacer fine-tuning
        callbacks=[checkpoint, early_stop]
    )

    # Fine-Tuning (descongelar todas las capas)
    for layer in base_model.layers:
        layer.trainable = True

    model.compile(optimizer=Adam(learning_rate=lr/10), loss='binary_crossentropy', metrics=['accuracy', 'AUC'])  # Reducir el LR para fine-tuning
    checkpoint = ModelCheckpoint(f'best_model_finetune_lr_{lr}.keras', monitor='val_auc', save_best_only=True, mode='min', verbose=1)

    print("Inicio de Fine-Tuning Training...\n")
    finetune_history = model.fit(
        train_data,
        validation_data=valid_data,
        epochs=50,  # Añadimos más épocas para el fine-tuning
        callbacks=[checkpoint, early_stop]
    )

    # Evaluación final en el conjunto de prueba
    evaluation = model.evaluate(test_data)
    print(f"Test Loss: {evaluation[0]}, Test Accuracy: {evaluation[1]}, Test AUC: {evaluation[2]}")

    # Guardar los resultados del experimento
    results.append({
        'learning_rate': lr,
        'test_loss': evaluation[0],
        'test_accuracy': evaluation[1],
        'test_auc': evaluation[2]
    })

    # Predicciones y AUC en el conjunto de entrenamiento
    y_train = train_data.classes
    predictions_train = model.predict(train_data, steps=len(train_data), verbose=1)
    predicted_classes_train = np.round(predictions_train).astype(int)
    print(classification_report(y_train, predicted_classes_train, target_names=['Clase 0', 'Clase 1']))

    auc_score_train = roc_auc_score(y_train, predictions_train)
    print(f"AUC Score en entrenamiento: {auc_score_train}")

# Mostrar todos los resultados al final de los 5 experimentos
for result in results:
    print(f"Learning Rate: {result['learning_rate']}, Test Loss: {result['test_loss']}, Test Accuracy: {result['test_accuracy']}, Test AUC: {result['test_auc']}")


Found 335 validated image filenames belonging to 2 classes.
Found 84 validated image filenames belonging to 2 classes.
Found 84 validated image filenames belonging to 2 classes.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['Diagnosis'] = train_df['Diagnosis'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['Diagnosis'] = test_df['Diagnosis'].astype(str)




--- Experimento con learning rate = 0.001 ---

Inicio de Warm-Up Training...

Epoch 1/10


  self._warn_if_super_not_called()


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - AUC: 0.4672 - accuracy: 0.7463 - loss: 0.9355 - val_AUC: 0.6067 - val_accuracy: 0.7976 - val_loss: 0.8053
Epoch 2/10


  self._save_model(epoch=epoch, batch=None, logs=logs)
  current = self.get_monitor_value(logs)


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 302ms/step - AUC: 0.4991 - accuracy: 0.7066 - loss: 0.7842 - val_AUC: 0.5764 - val_accuracy: 0.7976 - val_loss: 0.6696
Epoch 3/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 303ms/step - AUC: 0.5628 - accuracy: 0.8025 - loss: 0.5714 - val_AUC: 0.5413 - val_accuracy: 0.7976 - val_loss: 0.5262
Epoch 4/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 409ms/step - AUC: 0.4730 - accuracy: 0.7597 - loss: 0.5904 - val_AUC: 0.5329 - val_accuracy: 0.7976 - val_loss: 0.5021
Epoch 5/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 303ms/step - AUC: 0.5279 - accuracy: 0.7933 - loss: 0.5307 - val_AUC: 0.5316 - val_accuracy: 0.7976 - val_loss: 0.5138
Epoch 6/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 304ms/step - AUC: 0.4868 - accuracy: 0.7365 - loss: 0.6447 - val_AUC: 0.5566 - val_accuracy: 0.7976 - val_loss: 0.5490
Epoch 7/10
[1m17/17[0m [32m━━━━━━━━

  self._save_model(epoch=epoch, batch=None, logs=logs)
  current = self.get_monitor_value(logs)


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 420ms/step - AUC: 0.4529 - accuracy: 0.7839 - loss: 0.5485 - val_AUC: 0.5474 - val_accuracy: 0.7976 - val_loss: 0.5136
Epoch 3/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 300ms/step - AUC: 0.5266 - accuracy: 0.8001 - loss: 0.5042 - val_AUC: 0.5219 - val_accuracy: 0.7976 - val_loss: 0.5049
Epoch 4/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 372ms/step - AUC: 0.4883 - accuracy: 0.7618 - loss: 0.5524 - val_AUC: 0.5536 - val_accuracy: 0.7976 - val_loss: 0.5078
Epoch 5/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 322ms/step - AUC: 0.5403 - accuracy: 0.8030 - loss: 0.4951 - val_AUC: 0.5439 - val_accuracy: 0.7976 - val_loss: 0.5043
Epoch 6/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 304ms/step - AUC: 0.5046 - accuracy: 0.7667 - loss: 0.5437 - val_AUC: 0.5347 - val_accuracy: 0.7976 - val_loss: 0.5039
Epoch 7/10
[1m17/17[0m [32m━━━━━━━━

  self._save_model(epoch=epoch, batch=None, logs=logs)
  current = self.get_monitor_value(logs)


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 318ms/step - AUC: 0.5792 - accuracy: 0.7576 - loss: 0.5685 - val_AUC: 0.5856 - val_accuracy: 0.7976 - val_loss: 0.5086
Epoch 3/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 361ms/step - AUC: 0.5110 - accuracy: 0.7985 - loss: 0.5059 - val_AUC: 0.4925 - val_accuracy: 0.7976 - val_loss: 0.5032
Epoch 4/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 334ms/step - AUC: 0.5152 - accuracy: 0.8039 - loss: 0.4949 - val_AUC: 0.5000 - val_accuracy: 0.7976 - val_loss: 0.5033
Epoch 5/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 298ms/step - AUC: 0.4930 - accuracy: 0.7648 - loss: 0.5500 - val_AUC: 0.5689 - val_accuracy: 0.7976 - val_loss: 0.5033
Epoch 6/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 488ms/step - AUC: 0.5675 - accuracy: 0.7814 - loss: 0.5249 - val_AUC: 0.5316 - val_accuracy: 0.7976 - val_loss: 0.5031
Epoch 7/10
[1m17/17[0m [32m━━━━━━━━

  self._save_model(epoch=epoch, batch=None, logs=logs)
  current = self.get_monitor_value(logs)


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 368ms/step - AUC: 0.4879 - accuracy: 0.8144 - loss: 0.6042 - val_AUC: 0.5000 - val_accuracy: 0.7976 - val_loss: 0.5958
Epoch 3/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 309ms/step - AUC: 0.5108 - accuracy: 0.7930 - loss: 0.5940 - val_AUC: 0.5364 - val_accuracy: 0.7976 - val_loss: 0.5806
Epoch 4/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 320ms/step - AUC: 0.4513 - accuracy: 0.8172 - loss: 0.5685 - val_AUC: 0.5606 - val_accuracy: 0.7976 - val_loss: 0.5677
Epoch 5/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 366ms/step - AUC: 0.5196 - accuracy: 0.7844 - loss: 0.5724 - val_AUC: 0.6150 - val_accuracy: 0.7976 - val_loss: 0.5567
Epoch 6/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 311ms/step - AUC: 0.5413 - accuracy: 0.7826 - loss: 0.5638 - val_AUC: 0.5996 - val_accuracy: 0.7976 - val_loss: 0.5474
Epoch 7/10
[1m17/17[0m [32m━━━━━━━━