In [127]:
# import libraries (you may add additional imports but you may not have to)
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
#las que voy a usar
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [6]:
# get data files
#!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip

#!unzip book-crossings.zip

#books_filename = 'BX-Books.csv'
#ratings_filename = 'BX-Book-Ratings.csv'

books_filename = 'https://github.com/valemicolgarcia/TensorFlow/raw/main/Book%20Recomendation/book-crossings/BX-Books.csv'
ratings_filename = 'https://github.com/valemicolgarcia/TensorFlow/raw/main/Book%20Recomendation/book-crossings/BX-Book-Ratings.csv'

In [121]:
# import csv data into dataframes
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [None]:
# add your code here - consider creating a new cell for each section of code

# Cleaning the data

In [122]:
df_books.head()

Unnamed: 0,isbn,title,author
0,195153448,Classical Mythology,Mark P. O. Morford
1,2005018,Clara Callan,Richard Bruce Wright
2,60973129,Decision in Normandy,Carlo D'Este
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata
4,393045218,The Mummies of Urumchi,E. J. W. Barber


In [123]:
df_ratings.head()

Unnamed: 0,user,isbn,rating
0,276725,034545104X,0.0
1,276726,0155061224,5.0
2,276727,0446520802,0.0
3,276729,052165615X,3.0
4,276729,0521795028,6.0


In [124]:
df_ratings.isnull().sum()

user      0
isbn      0
rating    0
dtype: int64

In [125]:
df_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1149780 entries, 0 to 1149779
Data columns (total 3 columns):
 #   Column  Non-Null Count    Dtype  
---  ------  --------------    -----  
 0   user    1149780 non-null  int32  
 1   isbn    1149780 non-null  object 
 2   rating  1149780 non-null  float32
dtypes: float32(1), int32(1), object(1)
memory usage: 17.5+ MB


In [113]:
# Verifica valores NaN
print(df_ratings.isna().sum())


user      0
isbn      0
rating    0
dtype: int64


In [114]:
# Verifica los tipos de datos
print(df_ratings.dtypes)


user        int32
isbn       object
rating    float32
dtype: object


In [130]:
df_ratings['user'] = pd.Categorical(df_ratings['user']).codes
df_ratings['isbn'] = pd.Categorical(df_ratings['isbn']).codes

from scipy.sparse import coo_matrix
user_ids = df_ratings['user'].values
isbn_ids = df_ratings['isbn'].values
ratings = df_ratings['rating'].values

# Construir la matriz dispersa (sparse matrix) en formato COO
user_book_matrix = coo_matrix((ratings, (isbn_ids, user_ids)))

scaler = StandardScaler(with_mean=False)
user_book_matrix_scaled = scaler.fit_transform(user_book_matrix)

In [150]:
print(user_book_matrix_scaled[:5])


  (0, 33805)	0.0
  (0, 68556)	485.56244
  (1, 33805)	19.248482
  (1, 100774)	0.0
  (2, 33805)	0.0
  (3, 4213)	5.9203987
  (3, 33805)	0.0
  (4, 33805)	0.0


# MODELO

In [143]:
# Configura el modelo KNN
model_knn = NearestNeighbors(metric='cosine',algorithm='brute')

# Ajusta el modelo con la matriz de características
model_knn.fit(user_book_matrix_scaled)


# FUNCION RECOMENDATION

In [147]:
# function to return recommended books - this will be tested
def get_recommends(book = ""):

  isbn = df_books[df_books['title'] == book ]['isbn'].values #encuentro el isbn del titulo del libro en df_books
  if len(isbn) == 0:
        return "Book title not found."
  
  book_isbn = isbn[0] 
  book_index = df_books[df_books['isbn'] == book_isbn].index[0] #encuentro el indice del libro en la matriz
  
  
  #if book_index >= len(user_book_matrix_scaled):
   #     return "ISBN not found in the dataset."
  
  distances, indices = model_knn.kneighbors(user_book_matrix_scaled[book_index:book_index+1], n_neighbors=6)

  print("Distances:", distances)
  print("Indices:", indices)

  recommended_indices = indices[0] #indices de los libros recomendados
  recommended_distances = distances[0]

  recommended_books = []
  for idx, distance in zip(recommended_indices, recommended_distances):
      # Asegúrate de que el índice sea válido
      if 0 <= idx < len(df_books):
          title = df_books.iloc[idx]['title']
          recommended_books.append([title, distance])

  result = [book,recommended_books]


  return result

In [148]:
books = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
print(books)

Distances: [[1. 1. 1. 1. 1. 1.]]
Indices: [[227039 227040 227037 227038 227035 227041]]
["Where the Heart Is (Oprah's Book Club (Paperback))", [['The Colour : A Novel', 1.0], ['Swedenborg: Life and Teaching', 1.0], ['FranzÃ?Â¶sische Gedichte / Poemes francais.', 1.0], ['Alice at 80', 1.0], ['The Path: The Inner Life of Jesus Christ', 1.0], ["Woman's Book of Soul", 1.0]]]


In [149]:
books = get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))")
print(books)

Distances: [[1. 1. 1. 1. 1. 1.]]
Indices: [[227039 227040 227037 227038 227035 227041]]
['The Queen of the Damned (Vampire Chronicles (Paperback))', [['The Colour : A Novel', 1.0], ['Swedenborg: Life and Teaching', 1.0], ['FranzÃ?Â¶sische Gedichte / Poemes francais.', 1.0], ['Alice at 80', 1.0], ['The Path: The Inner Life of Jesus Christ', 1.0], ["Woman's Book of Soul", 1.0]]]


In [146]:


def test_book_recommendation():
  test_pass = True
  recommends = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
  if recommends[0] != "Where the Heart Is (Oprah's Book Club (Paperback))":
    test_pass = False
  recommended_books = ["I'll Be Seeing You", 'The Weight of Water', 'The Surgeon', 'I Know This Much Is True']
  recommended_books_dist = [0.8, 0.77, 0.77, 0.77]
  for i in range(2):
    if recommends[1][i][0] not in recommended_books:
      test_pass = False
    if abs(recommends[1][i][1] - recommended_books_dist[i]) >= 0.05:
      test_pass = False
  if test_pass:
    print("You passed the challenge! 🎉🎉🎉🎉🎉")
  else:
    print("You haven't passed yet. Keep trying!")

test_book_recommendation()

You haven't passed yet. Keep trying!
