In [1]:
import pandas as pd
import numpy as np

In [30]:

input_book = "1984"


In [3]:
from pathlib import Path
import argparse
import os
import zipfile
import requests

DOWNLOAD_URI = "https://www.kaggle.com/api/v1/datasets/download/arashnic/book-recommendation-dataset"
RATINGS_FNAME = "Ratings.csv"
BOOKS_FNAME = "Books.csv"

### LOADING DATA
def download_kaggle_ds(data_dir):
    # Check if files already exist:
    if all(os.path.exists(os.path.join(data_dir, f)) for f in [BOOKS_FNAME, RATINGS_FNAME]):
        print("Files already exist.")
        return
    url = DOWNLOAD_URI
    dest_path = data_dir / "book-recommendation-dataset.zip"

    os.makedirs(dest_path.parent, exist_ok=True)
    os.makedirs(data_dir, exist_ok=True)
    print(f"Starting download from:\n{url}\n")
    response = requests.get(url, stream=True)
    response.raise_for_status()

    total_size = int(response.headers.get('content-length', 0))
    block_size = 1024
    downloaded_size = 0

    with open(dest_path, 'wb') as file:
        for data in response.iter_content(block_size):
            file.write(data)
            downloaded_size += len(data)
            percent = downloaded_size * 100 / total_size if total_size else 0
            print(f"\rDownloaded: {percent:.2f}%", end="")

    print(f"\nDownload complete: {dest_path}")

    print(f"\nExtracting files to:\n{data_dir}\n")
    with zipfile.ZipFile(dest_path, 'r') as zip_ref:
        file_list = zip_ref.namelist()
        for file in file_list:
            print(f"Extracting: {file}")
        zip_ref.extractall(data_dir)
    print("\nExtraction complete.")

def load_data(download=False, data_dir="csv_data/"):
    data_dir = Path(data_dir)
    # Download from kaggle
    if download:
        download_kaggle_ds(data_dir)
        pass
    # filenames
    # RATINGS_FNAME = "BX-Book-Ratings.csv"
    # BOOKS_FNAME = "BX-Books.csv"

    # read csv files
    print("\n")
    ratings = pd.read_csv(data_dir / RATINGS_FNAME, on_bad_lines="warn")
    print("\n")
    books = pd.read_csv(data_dir / BOOKS_FNAME, on_bad_lines="warn")
    print("\n")
    return (ratings, books)
###

### PREPROCESSING DATA
def preprocess_data(ratings, books):
    # Remove 0 ratings
    ratings = ratings[ratings['Book-Rating'] != 0]

    # Merge Ratings and Books
    dataset = pd.merge(ratings, books, on=['ISBN']) ### INNER JOIN of 2 tables
    # Lowercase all non-numeric columns
    dataset_lowercase=dataset.apply(lambda x: x.str.lower() if(x.dtype == 'object') else x)
    dataset_lowercase["id"] = range(len(dataset_lowercase))
    return (ratings, dataset_lowercase)
###


In [4]:
ratings, books = load_data()









  books = pd.read_csv(data_dir / BOOKS_FNAME, on_bad_lines="warn")


In [5]:
ratings, dataset_lowercase = preprocess_data(ratings, books)

In [7]:
dataset_lowercase

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,id
0,276726,0155061224,5,rites of passage,judith rae,2001,heinle,http://images.amazon.com/images/p/0155061224.0...,http://images.amazon.com/images/p/0155061224.0...,http://images.amazon.com/images/p/0155061224.0...,0
1,276729,052165615x,3,help!: level 1,philip prowse,1999,cambridge university press,http://images.amazon.com/images/p/052165615x.0...,http://images.amazon.com/images/p/052165615x.0...,http://images.amazon.com/images/p/052165615x.0...,1
2,276729,0521795028,6,the amsterdam connection : level 4 (cambridge ...,sue leather,2001,cambridge university press,http://images.amazon.com/images/p/0521795028.0...,http://images.amazon.com/images/p/0521795028.0...,http://images.amazon.com/images/p/0521795028.0...,2
3,276744,038550120x,7,a painted house,john grisham,,doubleday,http://images.amazon.com/images/p/038550120x.0...,http://images.amazon.com/images/p/038550120x.0...,http://images.amazon.com/images/p/038550120x.0...,3
4,276747,0060517794,9,little altars everywhere,rebecca wells,,harpertorch,http://images.amazon.com/images/p/0060517794.0...,http://images.amazon.com/images/p/0060517794.0...,http://images.amazon.com/images/p/0060517794.0...,4
...,...,...,...,...,...,...,...,...,...,...,...
383837,276704,0743211383,7,dreamcatcher,stephen king,,scribner,http://images.amazon.com/images/p/0743211383.0...,http://images.amazon.com/images/p/0743211383.0...,http://images.amazon.com/images/p/0743211383.0...,383837
383838,276704,0806917695,5,perplexing lateral thinking puzzles: scholasti...,paul sloane,,sterling publishing,http://images.amazon.com/images/p/0806917695.0...,http://images.amazon.com/images/p/0806917695.0...,http://images.amazon.com/images/p/0806917695.0...,383838
383839,276704,1563526298,9,get clark smart : the ultimate guide for the s...,clark howard,,longstreet press,http://images.amazon.com/images/p/1563526298.0...,http://images.amazon.com/images/p/1563526298.0...,http://images.amazon.com/images/p/1563526298.0...,383839
383840,276709,0515107662,10,the sherbrooke bride (bride trilogy (paperback)),catherine coulter,,jove books,http://images.amazon.com/images/p/0515107662.0...,http://images.amazon.com/images/p/0515107662.0...,http://images.amazon.com/images/p/0515107662.0...,383840


In [14]:

def extract_interesting_books(ratings, dataset_lowercase, input_book, ratings_num_th=8):
    assert not ratings is None, f"Ratings should be non None, but {ratings=} was provided"
    assert not dataset_lowercase is None, f"`dataset_lowercase` should be non None, but {dataset_lowercase=} was provided"
    assert input_book, f"`input_book` should be non empty, but {input_book=} was provided"

    ### EXTRACT USER IDS WHO RATED THE INPUT BOOK
    # TODO: do search by regex or other simillar technology. UPD: actually this function should get exact name or id,
    #  and another function should extract the exact name/id by the suggested technique

    user_ids_rated_input_book = dataset_lowercase['User-ID'][
        dataset_lowercase['Book-Title'] == input_book
        ]
    user_ids_rated_input_book = np.unique(user_ids_rated_input_book.tolist())
    ###

    ### EXTRACT DATAROWS WHICH CONTAIN USER ID OF ANY PREVIOUSLY EXTRACTED USER IDS
    # final dataset
    relevant_books = dataset_lowercase[(dataset_lowercase['User-ID'].isin(user_ids_rated_input_book))]
    ###

    ### FILTER DATAROWS (USERS WHICH LEFT MORE THAN th RATINGS) AND EXTRACT "INTERESTING" BOOK TITLES
    # Number of ratings per other books in dataset
    number_of_rating_per_book = relevant_books.groupby(['Book-Title']).agg(
        'count').reset_index()

    # select only books which have actually higher number of ratings than threshold
    books_to_compare = number_of_rating_per_book['Book-Title'][number_of_rating_per_book['User-ID'] >= ratings_num_th]
    books_to_compare = books_to_compare.tolist()
    ###

    ratings_data_raw = relevant_books[['User-ID', 'Book-Rating', 'Book-Title', 'id']][
        relevant_books['Book-Title'].isin(books_to_compare)]

    return ratings_data_raw


In [33]:
ratings_data_raw = extract_interesting_books(ratings, dataset_lowercase, input_book, 8)

In [26]:

def compute_final_rating(ratings_data_raw, input_book):
    ### COMPUTE FINAL RATING
    # TODO: suggest using other rating computation (for example by weighted rating or by number of ratings
    # group by User and Book and compute mean
    ratings_data_raw_nodup = ratings_data_raw.groupby(['User-ID', 'Book-Title'])['Book-Rating'].mean()

    # reset index to see User-ID in every row
    ratings_data_raw_nodup = ratings_data_raw_nodup.to_frame().reset_index()

    dataset_for_corr = ratings_data_raw_nodup.pivot(index='User-ID', columns='Book-Title', values='Book-Rating')
    ###

    ### REMOVE INPUT BOOK FROM DATAFRAME
    # Take out the Lord of the Rings selected book from correlation dataframe
    dataset_of_other_books: pd.DataFrame = dataset_for_corr.copy(deep=False)
    dataset_of_other_books.drop([input_book], axis=1, inplace=True)
    ###

    # empty lists
    book_titles = []
    correlations = []
    avgrating = []
    indices = []

    # corr computation
    for book_title in list(dataset_of_other_books.columns.values):
        book_titles.append(book_title)
        # TODO: maybe add a method to calculate the correlation. Default='pearson'
        # Calculate correlation between (dataframe of input book) and (dataframe without the input book)
        correlations.append(dataset_for_corr[input_book].corr(dataset_of_other_books[book_title]))

        tab = ratings_data_raw[ratings_data_raw['Book-Title'] == book_title].mean(numeric_only=True)

        avgrating.append(tab['Book-Rating'].min())

        indices.append(ratings_data_raw["id"][0])

    # final dataframe of all correlation of each book
    corr_fellowship = pd.DataFrame(list(zip(book_titles, correlations, avgrating, indices)),
                                   columns=['book', 'corr', 'avg_rating', 'id'])

    # top 10 books with highest corr
    sorted_corr_fellowship = corr_fellowship.sort_values('corr', ascending=False)
    best_list = sorted_corr_fellowship.head(10)

    # worst 10 books
    worst_list = sorted_corr_fellowship.tail(10)
    return best_list, worst_list


In [34]:
best_list, _ = compute_final_rating(ratings_data_raw, input_book)
best_list

Unnamed: 0,book,corr,avg_rating,id
0,animal farm,0.821863,8.65,2257 2257 2283 2283 2284 ...
20,slaughterhouse five or the children's crusade:...,0.723364,8.0,2257 2257 2283 2283 2284 ...
5,hannibal,0.717494,7.444444,2257 2257 2283 2283 2284 ...
25,"the drawing of the three (the dark tower, book 2)",0.712461,8.25,2257 2257 2283 2283 2284 ...
16,lord of the flies,0.697109,8.272727,2257 2257 2283 2283 2284 ...
1,brave new world,0.625402,7.5,2257 2257 2283 2283 2284 ...
17,me talk pretty one day,0.614192,8.3,2257 2257 2283 2283 2284 ...
21,the alchemist: a fable about following your dream,0.58527,8.444444,2257 2257 2283 2283 2284 ...
28,the great gatsby,0.571818,6.375,2257 2257 2283 2283 2284 ...
3,dreamcatcher,0.557,7.090909,2257 2257 2283 2283 2284 ...


In [37]:

book_title = input_book
variants = dataset_lowercase[dataset_lowercase['Book-Title'] == book_title].groupby('ISBN')

variants.describe()

Unnamed: 0_level_0,User-ID,User-ID,User-ID,User-ID,User-ID,User-ID,User-ID,User-ID,Book-Rating,Book-Rating,Book-Rating,Book-Rating,Book-Rating,id,id,id,id,id,id,id,id
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
ISBN,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0151660387,1.0,260647.0,,260647.0,260647.0,260647.0,260647.0,260647.0,1.0,10.0,...,10.0,10.0,1.0,363953.0,,363953.0,363953.0,363953.0,363953.0,363953.0
0451519841,18.0,143323.277778,85908.402765,7210.0,102852.5,140900.0,209569.5,273159.0,18.0,8.888889,...,10.0,10.0,18.0,203960.722222,119120.273446,10312.0,153178.75,202994.0,294631.25,379719.0
0451524934,101.0,127640.247525,80715.571913,254.0,61147.0,121251.0,193248.0,275520.0,101.0,8.772277,...,10.0,10.0,101.0,182456.742574,111276.16683,2293.0,91105.0,177261.0,274249.0,382486.0
0452262933,11.0,131731.272727,70419.982085,11676.0,90221.5,129388.0,162304.5,246759.0,11.0,8.727273,...,10.0,10.0,11.0,188862.272727,97297.429821,18523.0,132408.5,188505.0,232194.5,345385.0
207036822x,8.0,94804.375,72629.673526,32791.0,56900.5,74941.0,98974.75,259057.0,8.0,8.5,...,9.25,10.0,8.0,137256.375,99684.373922,52904.0,85156.75,108333.5,142521.25,362278.0
8423328651,2.0,269215.5,7165.113014,264149.0,266682.25,269215.5,271748.75,274282.0,2.0,10.0,...,10.0,10.0,2.0,374532.0,9239.057203,367999.0,371265.5,374532.0,377798.5,381065.0
848328006x,2.0,141561.5,31744.144728,119115.0,130338.25,141561.5,152784.75,164008.0,2.0,9.0,...,9.5,10.0,2.0,204548.0,41939.917406,174892.0,189720.0,204548.0,219376.0,234204.0
b00006h3bo,1.0,129255.0,,129255.0,129255.0,129255.0,129255.0,129255.0,1.0,7.0,...,7.0,7.0,1.0,188308.0,,188308.0,188308.0,188308.0,188308.0,188308.0


# Inspecting if correlation is a good metric of recommendations

In [38]:

books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [39]:
books

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,0060973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...
...,...,...,...,...,...,...,...,...
271355,0440400988,There's a Bat in Bunk Five,Paula Danziger,1988,Random House Childrens Pub (Mm),http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...
271356,0525447644,From One to One Hundred,Teri Sloat,1991,Dutton Books,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...
271357,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004,HarperSanFrancisco,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...
271358,0192126040,Republic (World's Classics),Plato,1996,Oxford University Press,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...


In [61]:
interesting_ds = dataset_lowercase[dataset_lowercase["Book-Title"] == "1984"]
interesting_ds

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,id
2293,254,0451524934,9,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,2293
3663,1706,0451524934,10,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,3663
4404,2179,0451524934,8,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,4404
4436,2198,0451524934,10,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,4436
5106,2799,0451524934,10,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,5106
...,...,...,...,...,...,...,...,...,...,...,...
378425,271705,0451524934,10,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,378425
379719,273159,0451519841,10,1984,george orwell,,new amer library,http://images.amazon.com/images/p/0451519841.0...,http://images.amazon.com/images/p/0451519841.0...,http://images.amazon.com/images/p/0451519841.0...,379719
380406,273976,0451524934,7,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,380406
381065,274282,8423328651,10,1984,george orwell,,la marca editora,http://images.amazon.com/images/p/8423328651.0...,http://images.amazon.com/images/p/8423328651.0...,http://images.amazon.com/images/p/8423328651.0...,381065


In [62]:
books[books["ISBN"] == "0451524934"]

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
2104,451524934,1984,George Orwell,1990,Signet Book,http://images.amazon.com/images/P/0451524934.0...,http://images.amazon.com/images/P/0451524934.0...,http://images.amazon.com/images/P/0451524934.0...


In [46]:
interesting_ds["rating_count"] = interesting_ds.groupby("ISBN")['ISBN'].transform('count')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interesting_ds["rating_count"] = interesting_ds.groupby("ISBN")['ISBN'].transform('count')


In [47]:
interesting_ds

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,id,rating_count
2293,254,0451524934,9,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,2293,101
3663,1706,0451524934,10,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,3663,101
4404,2179,0451524934,8,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,4404,101
4436,2198,0451524934,10,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,4436,101
5106,2799,0451524934,10,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,5106,101
...,...,...,...,...,...,...,...,...,...,...,...,...
378425,271705,0451524934,10,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,378425,101
379719,273159,0451519841,10,1984,george orwell,,new amer library,http://images.amazon.com/images/p/0451519841.0...,http://images.amazon.com/images/p/0451519841.0...,http://images.amazon.com/images/p/0451519841.0...,379719,18
380406,273976,0451524934,7,1984,george orwell,,signet book,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,http://images.amazon.com/images/p/0451524934.0...,380406,101
381065,274282,8423328651,10,1984,george orwell,,la marca editora,http://images.amazon.com/images/p/8423328651.0...,http://images.amazon.com/images/p/8423328651.0...,http://images.amazon.com/images/p/8423328651.0...,381065,2


In [50]:
most_common_isbn = interesting_ds['ISBN'].value_counts().idxmax()

'0451524934'

In [52]:
ratings

Unnamed: 0,User-ID,ISBN,Book-Rating
1,276726,0155061224,5
3,276729,052165615X,3
4,276729,0521795028,6
6,276736,3257224281,8
7,276737,0600570967,6
...,...,...,...
1149773,276704,0806917695,5
1149775,276704,1563526298,9
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [53]:
dataset_lowercase

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L,id
0,276726,0155061224,5,rites of passage,judith rae,2001,heinle,http://images.amazon.com/images/p/0155061224.0...,http://images.amazon.com/images/p/0155061224.0...,http://images.amazon.com/images/p/0155061224.0...,0
1,276729,052165615x,3,help!: level 1,philip prowse,1999,cambridge university press,http://images.amazon.com/images/p/052165615x.0...,http://images.amazon.com/images/p/052165615x.0...,http://images.amazon.com/images/p/052165615x.0...,1
2,276729,0521795028,6,the amsterdam connection : level 4 (cambridge ...,sue leather,2001,cambridge university press,http://images.amazon.com/images/p/0521795028.0...,http://images.amazon.com/images/p/0521795028.0...,http://images.amazon.com/images/p/0521795028.0...,2
3,276744,038550120x,7,a painted house,john grisham,,doubleday,http://images.amazon.com/images/p/038550120x.0...,http://images.amazon.com/images/p/038550120x.0...,http://images.amazon.com/images/p/038550120x.0...,3
4,276747,0060517794,9,little altars everywhere,rebecca wells,,harpertorch,http://images.amazon.com/images/p/0060517794.0...,http://images.amazon.com/images/p/0060517794.0...,http://images.amazon.com/images/p/0060517794.0...,4
...,...,...,...,...,...,...,...,...,...,...,...
383837,276704,0743211383,7,dreamcatcher,stephen king,,scribner,http://images.amazon.com/images/p/0743211383.0...,http://images.amazon.com/images/p/0743211383.0...,http://images.amazon.com/images/p/0743211383.0...,383837
383838,276704,0806917695,5,perplexing lateral thinking puzzles: scholasti...,paul sloane,,sterling publishing,http://images.amazon.com/images/p/0806917695.0...,http://images.amazon.com/images/p/0806917695.0...,http://images.amazon.com/images/p/0806917695.0...,383838
383839,276704,1563526298,9,get clark smart : the ultimate guide for the s...,clark howard,,longstreet press,http://images.amazon.com/images/p/1563526298.0...,http://images.amazon.com/images/p/1563526298.0...,http://images.amazon.com/images/p/1563526298.0...,383839
383840,276709,0515107662,10,the sherbrooke bride (bride trilogy (paperback)),catherine coulter,,jove books,http://images.amazon.com/images/p/0515107662.0...,http://images.amazon.com/images/p/0515107662.0...,http://images.amazon.com/images/p/0515107662.0...,383840


In [55]:
ratings_data_raw

Unnamed: 0,User-ID,Book-Rating,Book-Title,id
2257,254,9,the secret life of bees,2257
2283,254,9,the bonesetter's daughter,2283
2284,254,9,harry potter and the chamber of secrets (book 2),2284
2285,254,9,harry potter and the chamber of secrets (book 2),2285
2286,254,9,harry potter and the prisoner of azkaban (book 3),2286
...,...,...,...,...
380411,273976,3,the great gatsby,380411
381065,274282,10,1984,381065
382478,275520,9,the catcher in the rye,382478
382484,275520,8,to kill a mockingbird,382484
