<a href="https://colab.research.google.com/github/valenlopez993/KNN_Book_Recomendation/blob/main/KNN_Book_Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libraries

In [None]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

# Get data files

In [None]:
import sys
import os

if 'google.colab' in sys.modules:
    !wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
    !unzip book-crossings.zip

else:
    import subprocess

    def runCommand(command, verbose=False):
        process = subprocess.run(command, stdout = subprocess.PIPE, stderr = subprocess.PIPE, text = True, shell = True)
        std_out, std_err = process.stdout, process.stderr
        if verbose:
            print(std_out.strip(), std_err)

    files = os.listdir()
    if not ('book-crossings.zip' in files):
        runCommand('wget "https://cdn.freecodecamp.org/project-data/books/book-crossings.zip"')
    if (('BX-Book-Ratings.csv' in files) or ('BX-Books.csv' in files)) or ('BX-Users.csv' in files):
        runCommand('rm BX-Book-Ratings.csv BX-Books.csv BX-Users.csv')
    runCommand('tar -xf book-crossings.zip')

# Preprocessing Data

## Creating the dataframes

In [None]:
books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [None]:
df_books.shape

(271379, 3)

In [None]:
df_ratings.shape

(1149780, 3)

## Decreasing the size of the data

##### Just the Books with at least 100 reviews will be take into account

In [None]:
books100 = df_ratings.isbn.value_counts()
books100 = books100.loc[books100 >= 100]

##### In a similar way just the user which have made at least 200 reviews will be consider

In [None]:
users200 = df_ratings.user.value_counts()
users200 = users200.loc[users200 >= 200]

##### `dt_toFit` will be the data frame passed to the knn algorithm in order to find the k nearest neighbors

In [None]:
df_toFit = df_ratings.loc[df_ratings.user.isin(users200.keys())]
df_toFit = df_toFit.loc[df_toFit.isbn.isin(books100.keys())]

df_toFit = df_toFit.pivot(index='isbn', columns='user', values='rating').fillna(0)

# KNN Algorithm

In [None]:
knn = NearestNeighbors(n_neighbors = 6, metric='cosine')
knn.fit(df_toFit)
distances, indices = knn.kneighbors(df_toFit)

# Making Recomendations

## The recomendation function

In [None]:
def get_recommends(book = ""):
  try:
    isbn = df_books.isbn[df_books.loc[df_books.title == book].index[0]]
    row = df_toFit.index.tolist().index(isbn)
    neighborsBooks = indices[row]
  except:
    return ["Book requested not found", []]
 
  recommended_books = [book, []]
  for count, nn in enumerate(neighborsBooks):
    if (distances[row][count] == 0.0): 
      pass
    else: 
      recommended_books[1].append([df_books.loc[df_books.isbn == df_toFit.iloc[nn].name].title.values[0], distances[row][count]])
  recommended_books[1].reverse()
  return recommended_books

## Getting recomendations for different books

In [None]:
book = ["The Queen of the Damned (Vampire Chronicles (Paperback))", 
        "Where the Heart Is (Oprah's Book Club (Paperback))",
        "Divine Secrets of the Ya-Ya Sisterhood : A Novel"]

for r in book:
  books = get_recommends(r)
  print(books[0], "\n")
  for i in range(len(books[1])):
    print(books[1][i])
  if r!=book[-1]: print("\n<========================================================>\n")

The Queen of the Damned (Vampire Chronicles (Paperback)) 

['Catch 22', 0.7939835]
['The Witching Hour (Lives of the Mayfair Witches)', 0.74486566]
['Interview with the Vampire', 0.73450685]
['The Tale of the Body Thief (Vampire Chronicles (Paperback))', 0.53763384]
['The Vampire Lestat (Vampire Chronicles, Book II)', 0.5178411]


Where the Heart Is (Oprah's Book Club (Paperback)) 

["I'll Be Seeing You", 0.8016211]
['The Weight of Water', 0.77085835]
['The Surgeon', 0.7699411]
['I Know This Much Is True', 0.7677075]
['The Lovely Bones: A Novel', 0.7234864]


Divine Secrets of the Ya-Ya Sisterhood : A Novel 

['All the Pretty Horses (The Border Trilogy, Vol 1)', 0.8012066]
["Tuesdays with Morrie: An Old Man, a Young Man, and Life's Greatest Lesson", 0.8010227]
["ANGELA'S ASHES", 0.7961572]
['A Walk in the Woods: Rediscovering America on the Appalachian Trail (Official Guides to the Appalachian Trail)', 0.75931203]
["The Pilot's Wife : A Novel Tag: Author of the Weight of Water (Oprah's

# Final Test

In [None]:
books = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
print(books)

def test_book_recommendation():
  test_pass = True
  recommends = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
  if recommends[0] != "Where the Heart Is (Oprah's Book Club (Paperback))":
    test_pass = False
  recommended_books = ["I'll Be Seeing You", 'The Weight of Water', 'The Surgeon', 'I Know This Much Is True']
  recommended_books_dist = [0.8, 0.77, 0.77, 0.77]
  for i in range(2): 
    if recommends[1][i][0] not in recommended_books:
      test_pass = False
    if abs(recommends[1][i][1] - recommended_books_dist[i]) >= 0.05:
      test_pass = False
  if test_pass:
    print("You passed the challenge! 🎉🎉🎉🎉🎉")
  else:
    print("You haven't passed yet. Keep trying!")

test_book_recommendation()

["Where the Heart Is (Oprah's Book Club (Paperback))", [["I'll Be Seeing You", 0.8016211], ['The Weight of Water', 0.77085835], ['The Surgeon', 0.7699411], ['I Know This Much Is True', 0.7677075], ['The Lovely Bones: A Novel', 0.7234864]]]
You passed the challenge! 🎉🎉🎉🎉🎉
