In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [None]:
# Download and unzip data files
!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
!unzip book-crossings.zip

--2023-05-22 11:11:39--  https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
Resolving cdn.freecodecamp.org (cdn.freecodecamp.org)... 172.67.70.149, 104.26.2.33, 104.26.3.33, ...
Connecting to cdn.freecodecamp.org (cdn.freecodecamp.org)|172.67.70.149|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26085508 (25M) [application/zip]
Saving to: ‘book-crossings.zip.6’


2023-05-22 11:11:40 (302 MB/s) - ‘book-crossings.zip.6’ saved [26085508/26085508]

Archive:  book-crossings.zip
replace BX-Book-Ratings.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: Y
  inflating: BX-Book-Ratings.csv     
replace BX-Books.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: BX-Books.csv            
  inflating: BX-Users.csv            


In [None]:
# File names
books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

In [None]:
# Import CSV data into dataframes
def import_data():
    df_books = pd.read_csv(books_filename, encoding="ISO-8859-1", sep=";", header=0, names=['isbn', 'title', 'author'], usecols=['isbn', 'title', 'author'], dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})
    df_ratings = pd.read_csv(ratings_filename, encoding="ISO-8859-1", sep=";", header=0, names=['user', 'isbn', 'rating'], usecols=['user', 'isbn', 'rating'], dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})
    return df_books, df_ratings

In [None]:
# Filter ratings data based on user and book counts
def filter_ratings(df_ratings):
    user_counts = df_ratings['user'].value_counts()
    isbn_counts = df_ratings['isbn'].value_counts()

    filtered_ratings = df_ratings[
        ~df_ratings['user'].isin(user_counts[user_counts < 200].index) &
        ~df_ratings['isbn'].isin(isbn_counts[isbn_counts < 100].index)
    ]

    return filtered_ratings

In [None]:
# Create a pivot table from filtered ratings
def create_pivot_table(df_ratings, df_books):
    df_table = df_ratings.pivot_table(index='isbn', columns='user', values='rating').fillna(0)

    df_table.index = df_table.join(df_books.set_index('isbn'))['title']
    return df_table

In [None]:
# Function to return recommended books
def get_recommends(book="", df_table=None):
    recommended_books = []
    neighbors = NearestNeighbors(n_neighbors=6, metric="cosine").fit(df_table.values)
    distances, indices = neighbors.kneighbors([df_table.loc[book].values], n_neighbors=6)
    for i in range(1, 6):
        recommended_books.append([df_table.index[indices[0][-i]], distances[0][-i]])

    return [book, recommended_books]

In [None]:
# Testing the book recommendation
def test_book_recommendation():
    recommends = get_recommends("Where the Heart Is (Oprah's Book Club (Paperback))", df_table)
    recommended_books = recommends[1]

    print(f"Book: {recommends[0]}")
    print("Recommended Books:")
    for book, distance in recommended_books:
        print(f"- {book} (Distance: {distance})")

    expected_books = ["I'll Be Seeing You", 'The Weight of Water', 'The Surgeon', 'I Know This Much Is True']
    expected_distances = [0.8, 0.77, 0.77, 0.77]

    for i in range(2):
        if recommended_books[i][0] not in expected_books or abs(recommended_books[i][1] - expected_distances[i]) >= 0.05:
            print("Test failed.")
            return

    print("You passed the challenge! 🎉🎉🎉🎉🎉")

In [None]:
# Main execution
if __name__ == "__main__":
    df_books, df_ratings = import_data()
    df_ratings = filter_ratings(df_ratings)
    df_table = create_pivot_table(df_ratings, df_books)
    test_book_recommendation()

Book: Where the Heart Is (Oprah's Book Club (Paperback))
Recommended Books:
- I'll Be Seeing You (Distance: 0.8016210794448853)
- The Weight of Water (Distance: 0.7708583474159241)
- The Surgeon (Distance: 0.7699410915374756)
- I Know This Much Is True (Distance: 0.7677075266838074)
- The Lovely Bones: A Novel (Distance: 0.7234864234924316)
You passed the challenge! 🎉🎉🎉🎉🎉
