# Inteligencia Artificial 2
## P1 Project: Implement a Recommender system using a bipartite network projection
### Autores: Miguel Brito, Diana Cuenca, José Escudero, Danny Huacon, Steveen Terán

## Importar y analizar datos
1. Importar librerías

In [None]:
import numpy as np
import pandas as pd
import os
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import math
import json
import time
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
import joblib
import scipy.sparse
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds


2. Importar datos

In [None]:
electronics_data = pd.read_csv("./ratings_electronics.csv", names=['userId', 'productId', 'Rating', 'timestamp'])
electronics_data.drop(['timestamp'], axis=1,inplace=True)
electronics_data.head()


3. Analizar rango de ratings y revisar integridad de datos

In [None]:
#Find the minimum and maximum ratings
print('Minimum rating is: %d' %(electronics_data.Rating.min()))
print('Maximum rating is: %d' %(electronics_data.Rating.max()))
print("=====================================================================================================")
print('Number of missing values across columns: \n',electronics_data.isnull().sum())


4. Graficar distribucion de ratings

In [None]:
# Check the distribution of the rating

# create a dictionary in which the key is the rating, and the value is the number of times that rating occurs
rating_distribution = electronics_data.groupby(['Rating'])['Rating'].agg(['count'])
sns.barplot(x=rating_distribution.index, y=rating_distribution['count'])


5. Imprimir cantidad de ratings, usuarios unicos y productos unicos

In [None]:
print("Total no of ratings :",electronics_data.shape[0])
print("Total No of Users   :", len(np.unique(electronics_data.userId)))
print("Total No of products  :", len(np.unique(electronics_data.productId)))


## Desarrollo de taller

1. Generar red bipartita

In [None]:
import networkx as nx
from operator import itemgetter

# Filtrar las revisiones con 5 estrellas
five_star_reviews = electronics_data[electronics_data['Rating'] == 5]

# Crear el grafo bipartito
B = nx.Graph()

user_nodes = set(five_star_reviews['userId'])
product_nodes = set(five_star_reviews['productId'])
B.add_nodes_from(user_nodes, bipartite=0, color='blue')
B.add_nodes_from(product_nodes, bipartite=1, color='red')
B.add_edges_from(zip(five_star_reviews['userId'], five_star_reviews['productId']))


2.a Ejemplo de sistema de recomendacion con datos de la red bipartita. \
   En este bloque se selecciona un usuario, y se obtoene los productos revisados por otros usuarios que han revisado los mismos productos que el usuario en cuestión

In [None]:
# Supongamos que queremos las recomendaciones para un usuario en específico, p.ej., el primero en el dataset
user_node = five_star_reviews.iloc[0]['userId']

# Obtener los productos que ha revisado
products_reviewed_by_user = B.neighbors(user_node)

# Recomendaciones: Productos revisados por otros usuarios que han revisado los mismos productos que el usuario en cuestión
recommendations = set()
for product in products_reviewed_by_user:
    users_that_rated_this_product = B.neighbors(product)
    for user in users_that_rated_this_product:
        other_product_rated = B.neighbors(user)
        recommendations.update(other_product_rated)

# Remover productos ya revisados por el usuario
recommendations = recommendations - set(products_reviewed_by_user)

recommendations


2.b Ejemplo de sistema de recomendacion con datos de la red bipartita. \
   En este bloque se ordena los productos obtenidos en el bloque anterior, de acuerdo a la cantidad de usuarios que han revisado el producto, y se selecciona los 10 productos con mayor cantidad de usuarios que lo han revisado

In [None]:
# Si quieres obtener, digamos, las top 10 recomendaciones basadas en cuántos usuarios las han revisado:
product_recommendation_counts = [(product, len([u for u in B.neighbors(product)])) for product in recommendations]
top_recommendations = sorted(product_recommendation_counts, key=itemgetter(1), reverse=True)[:10]

# Mostrar recomendaciones
for product, count in top_recommendations:
    print(f"Producto: {product}, revisado por: {count} usuarios")


3.a Se crea un subconjunto de datos para construir un grafo bipartita para imprimir. \
    En este bloque se arma el grafo con un conjunto de 100 datos.

In [None]:
# Si deseas visualizar una parte del gráfico:
subset_data = five_star_reviews.head(100)
B_sub = nx.Graph()

B_sub.add_nodes_from(set(subset_data['userId']), bipartite=0, color='blue')
B_sub.add_nodes_from(set(subset_data['productId']), bipartite=1, color='red')
B_sub.add_edges_from(zip(subset_data['userId'], subset_data['productId']))


3.b Se crea un subconjunto de datos para construir un grafo bipartita para imprimir. \
    En este bloque se imprimie el grafo bipartita generado en el bloque anterior.

In [None]:

# Dibujar el grafo
pos = nx.bipartite_layout(B_sub, nodes=[n for n, d in B_sub.nodes(data=True) if d['bipartite'] == 0])
colors = [d['color'] for n, d in B_sub.nodes(data=True)]

plt.figure(figsize=(12, 12))
nx.draw(B_sub, pos=pos, node_color=colors, with_labels=False, node_size=20)
blue_patch = plt.Line2D([0], [0], marker='o', color='w', label='Users', markersize=10, markerfacecolor='blue')
red_patch = plt.Line2D([0], [0], marker='o', color='w', label='Products', markersize=10, markerfacecolor='red')
plt.legend(handles=[blue_patch, red_patch])
plt.title("Subset of Bipartite User-Product Network")
plt.show()


4. Poda de grafo bipartito e impresión del mismo


In [None]:
# 1. Podar por Grado (todos los nodos):
degree_threshold = 5  # por ejemplo, solo conservar nodos con grado > 5
to_remove = [node for node, degree in dict(B.degree()).items() if degree < degree_threshold]

trimmed_B = B.subgraph(B.nodes - set(to_remove))

# 2. Podar por Tipo de Nodo (por ejemplo, solo conservar productos calificados por más de 10 usuarios)
product_degree_threshold = 10
products_to_remove = [node for node, degree in trimmed_B.degree() if node in trimmed_B.nodes and trimmed_B.nodes[node]['bipartite'] == 1 and degree < product_degree_threshold]
trimmed_B = trimmed_B.subgraph(trimmed_B.nodes - set(products_to_remove))

# Si deseas visualizar una parte del gráfico después de la poda:
subset_nodes = list(trimmed_B.nodes)[:4000]  # Tomando un subconjunto de nodos para la visualización
B_sub = trimmed_B.subgraph(subset_nodes)

# Dibujar el grafo
pos = nx.bipartite_layout(B_sub, nodes=[n for n, d in B_sub.nodes(data=True) if d['bipartite'] == 0])
colors = [d['color'] for n, d in B_sub.nodes(data=True)]

plt.figure(figsize=(12, 12))
nx.draw(B_sub, pos=pos, node_color=colors, with_labels=False, node_size=20)
blue_patch = plt.Line2D([0], [0], marker='o', color='w', label='Usuarios', markersize=10, markerfacecolor='blue')
red_patch = plt.Line2D([0], [0], marker='o', color='w', label='Productos', markersize=10, markerfacecolor='red')
plt.legend(handles=[blue_patch, red_patch])
plt.title("Subconjunto del Grafo Bipartito Usuarios-Productos después de la poda")
plt.show()


## Funcion de recomendaciones e interfaz

In [None]:
def get_product_recommendation_network(product, limit):
  users = list(B.neighbors(product)) # lista de usuarios que han comprado el producto

  weighted_recommendations = {}

  for user in users:
    recommended_products = list(B.neighbors(user))
    for recommended_product in recommended_products:
      if recommended_product not in weighted_recommendations:
        weighted_recommendations[recommended_product] = 0
      weighted_recommendations[recommended_product] += 1

  weighted_recommendations.pop(product)

  weighted_recommendations = sorted(weighted_recommendations.items(), key=lambda x: x[1], reverse=True)
  weighted_recommendations = dict(weighted_recommendations[:limit])

  # create a networkX graph, in which the product is the center node, and the recommended products are the neighbors.
  # use weighted edges to indicate how many users have bought both products

  G = nx.Graph()
  G.add_node(product)
  for recommended_product, weight in weighted_recommendations.items():
    G.add_node(recommended_product)
    G.add_edge(product, recommended_product, weight=weight)

  return G




In [None]:
import ipywidgets as widgets

def query(btn):
    product_id = product_id_widget.value
    n_limit = n_limit_widget.value
    recommendation_network = get_product_recommendation_network(product_id, n_limit)
    # plot the network, and show the edge weights
    plt.figure(figsize=(5, 5))
    pos = nx.spring_layout(recommendation_network)
    nx.draw(recommendation_network, pos=pos, with_labels=True)
    edge_labels = nx.get_edge_attributes(recommendation_network, 'weight')
    nx.draw_networkx_edge_labels(recommendation_network, pos=pos, edge_labels=edge_labels)
    plt.show()


# Get nodes from B that are partite == 1 (products)
product_nodes = [n for n, d in B.nodes(data=True) if d['bipartite'] == 1][:1000]

product_id_widget = widgets.Dropdown(
    options=product_nodes,
    value='B002N5WAM6',
    description='Product ID:',
    disabled=False,
)
n_limit_widget = widgets.Dropdown(
    options=list(range(1,11)),
    value=5,
    description='N recommendations:',
    disabled=False,
)

button = widgets.Button(
    description='Consultar',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)

button.on_click(query)

product_id_widget
n_limit_widget
button
