In [None]:
# Importar funciones de procesamiento de datos
from limpieza_datos import *

# Importar la clase del modelo y funciones de entrenamiento/evaluación
# from GNN import MultiKRWithGCN
from train_and_evaluate import *

from KGs import *

# Importar funciones para modelar el EDA
from edas import *

import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Procesar los datos y capturar los mapeos
item_index_old2new, entity_id2index = read_item_index_to_entity_id_file()

In [None]:
convert_rating(item_index_old2new)
entity_id2index, relation_id2index = convert_kg()

In [None]:
# Carga o genera edge_index una sola vez antes del entrenamiento
edge_index = load_kg_and_create_edge_index(entity_id2index,relation_id2index)

In [None]:
# Mapeo de tipos de relaciones a tipos de entidades ajustado a tu descripción
relation_to_entity_types = {
    'book.book.genre': ('book', 'genre'),
    'book.written_work.date_of_first_publication': ('book', 'date'),
    'book.literary_series.author': ('series', 'author'),
    'comic_books.series.publisher': ('comic_series', 'publisher'),
    'book.written_work.author': ('book', 'author'),
    'book.literary_series.works_in_this_series': ('series', 'work'),
    'book.written_work.translation': ('original_work', 'translation'),
    'book.written_work.subject': ('work', 'subject'),
    'book.written_work.literary_series': ('work', 'series'),
    'book.written_work.previous_in_series': ('work', 'previous_work'),
}


# Generar el mapeo de ID de entidad a tipo basado en el archivo kg.txt y el mapeo relation_to_entity_types
entity_to_type = generate_entity_to_type_mapping('./MKR-data/kg.txt', relation_to_entity_types)

# Mapeo de índice de relación a nombre de relación (debes definir este mapeo basado en tu datos)
index_to_relation_name = {
    0: 'book.book.genre',
    1: 'book.written_work.date_of_first_publication',
    2: 'book.literary_series.author',
    3: 'comic_books.series.publisher',
    4: 'book.written_work.author',
    5: 'book.literary_series.works_in_this_series',
    6: 'book.written_work.translation',
    7: 'book.written_work.subject',
    8: 'book.written_work.literary_series',
    9: 'book.written_work.previous_in_series'
}

In [None]:
kg_file_path = './MKR-data/kg.txt'
output_path = './MKR-data/'

# Llama a la función como antes, omitiendo 'relation_id2index' y 'index_to_relation_name'
# adapt_and_split_kg_data_with_slashes(entity_id2index, relation_to_entity_types, kg_file_path, output_path)

In [None]:
# Preparar los datos para el entrenamiento y la evaluación
ratings = np.loadtxt('./MKR-data/ratings_final.txt', dtype=np.int32)
train_data, eval_data, test_data = dataset_split(ratings)

# Convertir los conjuntos de datos a DataLoader
train_loader = DataLoader(TrainSet(train_data), batch_size=64, shuffle=True)
val_loader = DataLoader(TrainSet(eval_data), batch_size=64, shuffle=True)
eval_loader = DataLoader(TrainSet(test_data), batch_size=64, shuffle=False)


In [None]:
print("Training dataset summary:")
summarize_dataset(TrainSet(train_data))

print("\nEvaluation dataset summary:")
summarize_dataset(TrainSet(eval_data))

# Comprobar una muestra de los datos cargados
sample_user, sample_item, sample_target = next(iter(eval_loader))
print("\nSample batch from eval_loader:")
print(f"User tensor: {sample_user}")
print(f"Item tensor: {sample_item}")
print(f"Target tensor: {sample_target}")


## DATA LOADER

### KG

In [None]:
kg_data = np.loadtxt('./MKR-data/kg_final.txt', dtype=int)
# Calcular el número total de entidades en el KG
num_entities = max(np.max(kg_data[:, 0]), np.max(kg_data[:, 2])) + 1


kg_train_set=KGTrainSet(kg_data)
kg_train_loader = DataLoader(kg_train_set, batch_size=64, shuffle=True)

### REC

In [None]:
# Preparar los datos para el entrenamiento y la evaluación de recomendaciones
ratings = np.loadtxt('./MKR-data/ratings_final.txt', dtype=np.int32)
train_data, eval_data, test_data = dataset_split(ratings)

# Convertir los conjuntos de datos a DataLoader para las recomendaciones
rec_train_loader = DataLoader(TrainSet(train_data), batch_size=64, shuffle=True)
rec_val_loader = DataLoader(TrainSet(eval_data), batch_size=64, shuffle=True)
rec_eval_loader = DataLoader(TrainSet(test_data), batch_size=64, shuffle=False)


In [None]:
# Inicializar el modelo MultiKR
user_num = len(np.unique(ratings[:, 0]))  # Número de usuarios únicos
item_num = len(item_index_old2new)  # Número de ítems únicos
entity_num = len(entity_id2index)  # Número de entidades únicas
relation_num = len(np.unique(kg_data[:, 1])) # Número de relaciones únicas

## EDA para optimizacion

In [None]:
# Obteniendo variables y posibles valores para KG y Recomendación
variables_kg, variables_rec = define_variables_for_KG_and_rec()

combined_variables_and_values = {**variables_kg, **variables_rec}


In [None]:
# Convertir combined_variables_and_values en la estructura esperada por EBNA
possible_values_numeric = {i: combined_variables_and_values[var] for i, var in enumerate(combined_variables_and_values)}
frequency_numeric = {i: [1/len(possible_values_numeric[i])] * len(possible_values_numeric[i]) for i in possible_values_numeric}

In [None]:
len(possible_values_numeric)

In [None]:
from EDAspy.optimization import EBNA

# Inicializar EBNA con el espacio de soluciones y frecuencias definidas
ebna = EBNA(
    size_gen=100,
    max_iter=50,
    dead_iter=10,
    n_variables=len(possible_values_numeric),
    alpha=0.8,
    possible_values=possible_values_numeric,
    frequency=frequency_numeric
)

In [None]:
# Definir la función de envoltura para EBNA, pasando los parámetros necesarios
multiKR_cost_wrapper_with_params = lambda solution: multiKR_cost_wrapper_eda(solution)

In [None]:
# Ejecutar EBNA
ebna_result = ebna.minimize(multiKR_cost_wrapper_with_params)