**Maestría en Inteligencia Artificial Aplicada**

**6.2 Avance de proyecto 2: Sistema de Recomendación**

In [20]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

from sklearn.decomposition import TruncatedSVD

from google.colab import drive
drive.mount('/content/drive')

DIR = "/content/drive/MyDrive/Colab Notebooks/MNA/Big Data/Modulo 2 Sistema de Recomendacion/"
os.chdir(DIR)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [21]:
# Lee los dos archivos que contienen el listado de bebidas y los ratings dado por los usuarios
data1 = pd.read_csv("starbucks_ratings.csv", header='infer', sep=",")
data2 = pd.read_csv("starbucks_beverages.csv", header='infer')

print(data1.shape, data2.shape)

(1161, 3) (130, 3)


In [22]:
#Del primer archivo obtenemos los datos con las evaluaciones de las bebidas dadas por los usuarios

data1.head()

Unnamed: 0,userID,Beverage,rating
0,U1050,2584,5
1,U1082,2594,5
2,U1050,2594,5
3,U1082,2608,5
4,U1050,2608,5


In [23]:
#Del segundo archivo obtenemos el catalogo con el nombre de las bebidas
data2.head()

Unnamed: 0,Beverage,Beverage name,Beverage_category
0,5042,Classic Espresso Drinks-Cappuccino-2% Milk,Classic Espresso Drinks
1,5047,Classic Espresso Drinks-Cappuccino-2% Milk,Classic Espresso Drinks
2,5060,Classic Espresso Drinks-Cappuccino-2% Milk,Classic Espresso Drinks
3,5106,Classic Espresso Drinks-Cappuccino-2% Milk,Classic Espresso Drinks
4,5063,Classic Espresso Drinks-Cappuccino-Grande Nonf...,Classic Espresso Drinks


In [24]:
# Hacemos un dataframe conmbianado para tener en un mismo dataset los ratings y nombres de las bebidas

data = pd.merge(data1,data2, on='Beverage')

data.head()

Unnamed: 0,userID,Beverage,rating,Beverage name,Beverage_category
0,U1050,2584,5,Coffee-Brewed Coffee-Venti,Brewed Coffee
1,U1103,2584,1,Coffee-Brewed Coffee-Venti,Brewed Coffee
2,U1123,2584,1,Coffee-Brewed Coffee-Venti,Brewed Coffee
3,U1067,2584,2,Coffee-Brewed Coffee-Venti,Brewed Coffee
4,U1107,2584,2,Coffee-Brewed Coffee-Venti,Brewed Coffee


Implementaremos un modelo más avanzado de KNN para el sistema de recomendación, utilizando la libreria "surprise" la cual proporciona una implementación optimizada del algoritmo KNN para sistemas de recomendacion. Se recomienda el uso de este modelo,que no solo tiene en cuenta la similitud entre los usuarios y las bebidas, sino también las medias de las calificaciones, lo que puede mejorar la precisión de las predicciones.

In [7]:
pip install pandas scikit-learn surprise # Instalamos la libreria a utilizar


Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357241 sha256=da4f9a05bdb993ef2ed972d7b238c8f84dbe77eb19f62828cba2b5d75f29cec8
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully inst

In [18]:

#Al igual los modulos para generar las metricas para medir la efectividad del modelo
from surprise import Dataset, Reader, KNNWithMeans
from surprise.model_selection import train_test_split
from surprise import accuracy
from sklearn.metrics import mean_squared_error

In [25]:
# Crear un dataset Surprise
df = pd.DataFrame(data)
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['Beverage_category','Beverage name', 'rating']], reader)


In [26]:
# Dividir los datos en conjunto de entrenamiento y prueba
trainset, testset = train_test_split(data, test_size=0.2)

# Usar el algoritmo KNNWithMeans
sim_options = {
    'name': 'cosine',
    'user_based': False  # Utilizar item-item similarity
}

algo = KNNWithMeans(sim_options=sim_options)
algo.fit(trainset)

# Hacer predicciones en el conjunto de prueba
predictions = algo.test(testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [27]:
# Evaluar el modelo con métricas
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)
mse = mean_squared_error([pred.r_ui for pred in predictions], [pred.est for pred in predictions])

print(f'RMSE: {rmse}')
print(f'MAE: {mae}')
print(f'MSE: {mse}')


RMSE: 1.0602
MAE:  0.8066
RMSE: 1.060168711799959
MAE: 0.8066187163682643
MSE: 1.1239576974795844


In [28]:
# Recomendaciones para una bebida seleccionada
selected_beverage = 'Classic Espresso Drinks-Coffee Americano-Tall'

# Buscar ítems similares
inner_id = algo.trainset.to_inner_iid(selected_beverage)
neighbors = algo.get_neighbors(inner_id, k=5)

# Convertir inner ids a nombres de bebidas
similar_beverages = [algo.trainset.to_raw_iid(inner_id) for inner_id in neighbors]
print(f'Bebidas similares a {selected_beverage}: {similar_beverages}')

Bebidas similares a Classic Espresso Drinks-Coffee Americano-Tall: ['Classic Espresso Drinks-Coffee Mocha (Without Whipped Cream)-Short Nonfat Milk', 'Classic Espresso Drinks-Skinny Latte (Any Flavour)-Venti Nonfat Milk', 'Classic Espresso Drinks-Cappuccino-Grande Nonfat Milk', 'Classic Espresso Drinks-Coffee Americano-Venti', 'Classic Espresso Drinks-Vanilla Latte (Or Other Flavoured Latte)-Grande Nonfat Milk']
