In [None]:
import numpy as np
import torch
from torch.utils.data import DataLoader
from EDAspy.optimization import UMDAc
from torch.optim import Adam
import torch.nn.functional as F
from tqdm import tqdm

# Importaciones de tus módulos personalizados
from limpieza_datos import read_item_index_to_entity_id_file, convert_rating, convert_kg, dataset_split, TrainSet
from custom_mkr import MultiKR
from train_and_evaluate import train_and_evaluate

In [None]:
embed_dim = 64  # Dimensionalidad de los embeddings

def initialize_population(entity_num, relation_num, embed_dim):
    # Genera embeddings iniciales con las dimensiones adecuadas.
    entity_population = np.random.rand(entity_num, embed_dim)
    relation_population = np.random.rand(relation_num, embed_dim)
    return entity_population, relation_population


def auc_cost_function(entity_pop, relation_pop, model, train_loader, val_loader, optimizer, loss_function, epoch=5):
    # print("Shape of entity_pop:", entity_pop.shape)
    # print("Shape of relation_pop:", relation_pop.shape)
    # print("Expected shape of model's entity embeddings:", model.entity_embed.weight.shape)
    # print("Expected shape of model's relation embeddings:", model.relation_embed.weight.shape)

    # print("New entity embeddings:", entity_pop[:1]) 
    # print("New relation embeddings:", relation_pop[:1])  


    # Asignar embeddings verificando las formas
    if entity_pop.shape == model.entity_embed.weight.shape and relation_pop.shape == model.relation_embed.weight.shape:
        model.entity_embed.weight.data = torch.tensor(entity_pop, dtype=torch.float32, device=model.entity_embed.weight.device)
        model.relation_embed.weight.data = torch.tensor(relation_pop, dtype=torch.float32, device=model.relation_embed.weight.device)
    else:
        print("Mismatch in embedding shapes!")
        return float('inf')  # Retorna un valor de 'infinito' si hay un error en las formas

    _, _, val_auc = train_and_evaluate(model, train_loader, val_loader, optimizer, loss_function, num_epochs=epoch, task_type='rec', edge_index=None, relation_index=None)
    return -val_auc



def run_eda(entity_num, relation_num, embed_dim, model, train_loader, val_loader, population_size=10,max_iter=150, dead_iter=10,lower_bound=0, upper_bound=1,alpha=0.5  ):
    optimizer = Adam(model.parameters(), lr=0.0001)
    loss_function = torch.nn.BCEWithLogitsLoss()
    epoch=5

    # UMDAc con el número correcto de variables: cada dimensión de cada embedding es una variable.
    umda = UMDAc(
        size_gen=population_size,
        max_iter=max_iter,
        dead_iter=dead_iter,
        n_variables=(entity_num + relation_num) * embed_dim,  # Total de dimensiones para todas las entidades y relaciones
        lower_bound=lower_bound,
        upper_bound=upper_bound,
        alpha=alpha
    )

    # La función lambda reconstruye los embeddings a partir del vector plano optimizado por el EDA.
    cost_function_wrapper = lambda x: auc_cost_function(
                                                        x[:entity_num * embed_dim].reshape(entity_num, embed_dim),
                                                        x[entity_num * embed_dim:(entity_num + relation_num) * embed_dim].reshape(relation_num, embed_dim),
                                                        model, train_loader, val_loader, optimizer, loss_function, epoch
    )
    umda_result = umda.minimize(cost_function_wrapper)
    return umda_result

In [None]:
# Configuración de datos y modelo
_, entity_id2index = read_item_index_to_entity_id_file()
convert_rating(_)
entity_id2index, relation_id2index = convert_kg()
ratings = np.loadtxt('./MKR-data/ratings_final.txt', dtype=np.int32)
train_data, eval_data, test_data = dataset_split(ratings)
train_loader = DataLoader(TrainSet(train_data), batch_size=64, shuffle=True)
val_loader = DataLoader(TrainSet(eval_data), batch_size=64, shuffle=True)
test_loader = DataLoader(TrainSet(test_data), batch_size=64, shuffle=False) 

In [None]:
ratings

In [None]:
entity_id2index ## COMPROBAR

In [None]:
def check_indices_in_dataloader(data_loader):
    max_user_idx, max_item_idx = 0, 0
    for data in data_loader:
        user, item, _ = data
        max_user_idx = max(max_user_idx, user.max().item())
        max_item_idx = max(max_item_idx, item.max().item())
    print(f'Max user index in DataLoader: {max_user_idx}')
    print(f'Max item index in DataLoader: {max_item_idx}')

check_indices_in_dataloader(train_loader)
check_indices_in_dataloader(val_loader)


In [None]:
user_num = len(np.unique(ratings[:, 0]))
item_num = len(np.unique(ratings[:, 1]))
entity_num = len(entity_id2index)
relation_num = len(relation_id2index)

print([user_num, item_num, entity_num, relation_num])


In [None]:
# [64 0.0001 64 '64' 0.3 1 5]


# 'batch_size': [32, 64, 128],
# 'lr': [0.01, 0.001, 0.0001],
# 'embed_dim': [64, 128, 256],
# 'hidden_layers_config': [
#     '64', '128', '256',  # Configuraciones de una sola capa
#     '64_64', '128_128', '256_256',  # Configuraciones de dos capas iguales
#     '64_128', '128_256',  # Configuraciones de dos capas crecientes
#     '128_64', '256_128',  # Configuraciones de dos capas decrecientes
#     '64_128_256', '256_128_64',  # Configuraciones de tres capas
#     '64_64_64', '128_128_128', '256_256_256',  # Configuraciones de tres capas iguales
# ],
# 'dropout_rate': [0.5, 0.3, 0.1],
# 'output_rec': [1],
# 'epochs': [5, 10, 15]  # Agrega los valores deseados para epochs

In [None]:
# Extraer el máximo índice usado para usuarios e ítems
max_user_idx = ratings[:, 0].max()
max_item_idx = ratings[:, 1].max()

# Configuración del modelo utilizando estos máximos
model = MultiKR(
    user_num=max_user_idx + 1,  # todos los índices de usuario desde 0 hasta max_user_idx
    item_num=max_item_idx + 1,  # todos los índices de ítem desde 0 hasta max_item_idx
    entity_num=len(entity_id2index),  
    relation_num=len(relation_id2index),  
    n_layer=1,
    embed_dim=64,
    hidden_layers=[64], 
    dropouts=[0.3],
    output_rec=1
)


In [None]:
# # Inicialización del modelo
# model = MultiKR(user_num=user_num, item_num=item_num, entity_num=entity_num, relation_num=relation_num, n_layer=2, embed_dim=64, hidden_layers=[128, 64], dropouts=[0.5, 0.5], output_rec=1)

In [None]:
print("Forma esperada de embeddings de entidades:", model.entity_embed.weight.data.shape)
print("Forma esperada de embeddings de relaciones:", model.relation_embed.weight.data.shape)

In [None]:
# Ejecutar EDA
best_embeddings = run_eda(entity_num, relation_num, embed_dim, model, train_loader, val_loader, population_size=25,max_iter=150, dead_iter=10,lower_bound=-1, upper_bound=1,alpha=0.4 )

In [None]:
print("Optimal embeddings found:", best_embeddings)

In [None]:
# Ejecutar EDA
best_embeddings2 = run_eda(entity_num, relation_num, embed_dim, model, train_loader, val_loader, population_size=30,max_iter=150, dead_iter=10,lower_bound=-1, upper_bound=1,alpha=0.2 )

In [None]:
print("Optimal embeddings found:", best_embeddings2)

In [None]:
# Ejecutar EDA
best_embeddings3 = run_eda(entity_num, relation_num, embed_dim, model, train_loader, val_loader, population_size=50,max_iter=150, dead_iter=15,lower_bound=-1, upper_bound=1,alpha=0.5 )

In [None]:
print("Optimal embeddings found:", best_embeddings3)