In [None]:
# Import required libraries
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt


In [None]:
# Load the dataset
books = pd.read_csv('/content/BX-Books.csv', sep=';', encoding='latin-1', error_bad_lines=False)
ratings = pd.read_csv('/content/BX-Book-Ratings.csv', sep=';', encoding='latin-1', error_bad_lines=False)
users = pd.read_csv('/content/BX-Users.csv', sep=';', encoding='latin-1', error_bad_lines=False)

In [None]:
# Fix column names and examine dataframes
books.columns = ['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher', 'Image-URL-S', 'Image-URL-M', 'Image-URL-L']
ratings.columns = ['User-ID', 'ISBN', 'Book-Rating']
users.columns = ['User-ID', 'Location', 'Age']

In [None]:
# Display basic information about the datasets
print(f"Books dataset shape: {books.shape}")
print(f"Ratings dataset shape: {ratings.shape}")
print(f"Users dataset shape: {users.shape}")

In [None]:
# Filter users with at least 200 ratings
user_counts = ratings['User-ID'].value_counts()
qualified_users = user_counts[user_counts >= 200].index.tolist()
filtered_ratings_by_users = ratings[ratings['User-ID'].isin(qualified_users)]

In [None]:
# Filter books with at least 100 ratings
book_counts = filtered_ratings_by_users['ISBN'].value_counts()
qualified_books = book_counts[book_counts >= 100].index.tolist()
filtered_ratings = filtered_ratings_by_users[filtered_ratings_by_users['ISBN'].isin(qualified_books)]

In [None]:
# Display information about the filtered dataset
print(f"Filtered ratings dataset shape: {filtered_ratings.shape}")
print(f"Number of qualified users: {len(qualified_users)}")
print(f"Number of qualified books: {len(qualified_books)}")

In [None]:
# Merge with the books dataset to get titles
filtered_ratings_with_titles = pd.merge(filtered_ratings, books[['ISBN', 'Book-Title']], on='ISBN')

In [None]:
# Create a pivot table: rows are books, columns are users
book_user_matrix = filtered_ratings_with_titles.pivot_table(
    index='Book-Title',
    columns='User-ID',
    values='Book-Rating',
    fill_value=0
)

In [None]:
# Display the shape of the book-user matrix
print(f"Book-user matrix shape: {book_user_matrix.shape}")

In [None]:
# Convert the matrix to a scipy sparse matrix
book_user_matrix_sparse = book_user_matrix.astype(np.float32)

In [None]:
# Train KNN model
model = NearestNeighbors(n_neighbors=6, algorithm='brute', metric='cosine')
model.fit(book_user_matrix_sparse)

In [None]:
# Optional: Visualize the ratings distribution
plt.figure(figsize=(10, 6))
plt.hist(ratings['Book-Rating'], bins=10, edgecolor='black')
plt.title('Distribution of Book Ratings')
plt.xlabel('Rating')
plt.ylabel('Count')
plt.show()

In [None]:
# Create a dictionary to map book titles to their indices in the matrix
book_to_index = {title: i for i, title in enumerate(book_user_matrix.index)}

def get_recommends(book_title):
    """
    Get 5 book recommendations based on similarity to the input book.

    Args:
        book_title: The title of the book to find recommendations for

    Returns:
        A list containing the input book title and a list of 5 recommendations with their distances
    """

In [None]:
# Check if the book title exists in our dataset
    if book_title not in book_to_index:
        print(f"Book '{book_title}' not found in the dataset.")
        return None

In [None]:
 # Get the index of the book in our matrix
    book_idx = book_to_index[book_title]

In [None]:
 # Get the book's feature vector
    book_features = book_user_matrix_sparse.iloc[book_idx].values.reshape(1, -1)

In [None]:
# Find 6 nearest neighbors (including the book itself)
    distances, indices = model.kneighbors(book_features, n_neighbors=6)

In [None]:
# Get the titles of the 5 most similar books (excluding the input book)
    similar_books = []
    for i in range(1, 6):  # Start from 1 to exclude the book itself
        idx = indices[0][i]
        title = book_user_matrix.index[idx]
        distance = distances[0][i]
        similar_books.append([title, float(distance)])

In [None]:
# Format the result as required
    result = [book_title, similar_books]

    return result


In [None]:
# Example usage (for testing)
recommendations = get_recommends("The Queen of the Damned (Vampire Chronicles (Paperback))")
print(recommendations)

In [None]:
books = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
print(books)

def test_book_recommendation():
  test_pass = True
  recommends = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))")
  if recommends[0] != "Where the Heart Is (Oprah's Book Club (Paperback))":
    test_pass = False
  recommended_books = ["I'll Be Seeing You", 'The Weight of Water', 'The Surgeon', 'I Know This Much Is True']
  recommended_books_dist = [0.8, 0.77, 0.77, 0.77]
  for i in range(2):
    if recommends[1][i][0] not in recommended_books:
      test_pass = False
    if abs(recommends[1][i][1] - recommended_books_dist[i]) >= 0.05:
      test_pass = False
  if test_pass:
    print("You passed the challenge! 🎉🎉🎉🎉🎉")
  else:
    print("You haven't passed yet. Keep trying!")

test_book_recommendation()