This uses heuristics to take the list of books from Goodreads Genre Processing and find a similar book, and a dissimilar book, to help train similarity

In [129]:
import pandas as pd
import numpy as np

In [130]:
# Import the data
rawdatadf = pd.read_csv('Datasets\goodreads_data_onehot_genres.csv')

# Print the first few rows of the dataframe
print(rawdatadf.head())

                                                Book  \
0                              To Kill a Mockingbird   
1  Harry Potter and the Philosopher’s Stone (Harr...   
2                                Pride and Prejudice   
3                                  The Little Prince   
4                                               1984   

                                         Description  \
0  The unforgettable novel of a childhood in a sl...   
1  Harry Potter thinks he is an ordinary boy - un...   
2  Since its immediate success in 1813, Pride and...   
3  A pilot stranded in the desert awakes one morn...   
4  The new novel by George Orwell is the major wo...   

                     Author  Fantasy  Adult  Romance  Young Adult  Historical  \
0                Harper Lee        0      1        0            1           1   
1              J.K. Rowling        1      1        0            1           0   
2               Jane Austen        0      0        1            0           1   
3 

In [131]:
# Create a list of authors that have written more than one book in the dataset
authors = rawdatadf['Author'].value_counts()
authors = authors[authors > 1].index.tolist()
print(authors)

['Stephen King', 'William Shakespeare', 'Terry Pratchett', 'Agatha Christie', 'Rick Riordan', 'John Grisham', 'Chuck Palahniuk', 'Isaac Asimov', 'Lee Child', 'James Rollins', 'Roald Dahl', 'James Patterson', 'David Baldacci', 'Cassandra Clare', 'Richelle Mead', 'Julie Mannino', 'Lemony Snicket', 'Robert Jordan', 'Sarah J. Maas', 'Edgar Allan Poe', 'Lucian Bane', 'Nicholas Sparks', 'Tom Clancy', 'Kirsten Fullmer', 'Michael Crichton', 'Dr. Seuss', 'Irvine Welsh', 'Charles Bukowski', 'Haruki Murakami', 'Neil Gaiman', "Louis L'Amour", 'Terry Brooks', 'Arthur C. Clarke', 'George R.R. Martin', 'Jodi Picoult', 'J.K. Rowling', 'Arthur Conan Doyle', 'Ken Follett', 'Michael Connelly', 'Ursula K. Le Guin', 'C.S. Lewis', 'Charles Dickens', 'Brandon Sanderson', 'J.R.R. Tolkien', 'Sidney Sheldon', 'Nora Roberts', 'Mark Twain', 'Dean Koontz', 'Douglas Adams', 'Jim Butcher', 'Kristin Hannah', 'Karin Slaughter', 'Colleen Hoover', 'Suzanne Collins', 'James A. Michener', 'Orson Scott Card', 'Zoe Saadia',

In [132]:
# Filter the data to only include authors that have written more than one book
filtereddf = rawdatadf[rawdatadf['Author'].isin(authors)]

# Print the first few rows of the filtered dataframe
print(filtereddf.head())

# Print the shape of the filtered dataframe
print(filtereddf.shape)

                                                Book  \
0                              To Kill a Mockingbird   
1  Harry Potter and the Philosopher’s Stone (Harr...   
2                                Pride and Prejudice   
4                                               1984   
5                                   The Great Gatsby   

                                         Description               Author  \
0  The unforgettable novel of a childhood in a sl...           Harper Lee   
1  Harry Potter thinks he is an ordinary boy - un...         J.K. Rowling   
2  Since its immediate success in 1813, Pride and...          Jane Austen   
4  The new novel by George Orwell is the major wo...        George Orwell   
5  Alternate Cover Edition ISBN: 0743273567 (ISBN...  F. Scott Fitzgerald   

   Fantasy  Adult  Romance  Young Adult  Historical  Historical Fiction  \
0        0      1        0            1           1                   1   
1        1      1        0            1           

In [133]:
# A function to recommend a book by the same author
def get_similar_author_book(title, rawdatadf):
    # Get the author of the book
    author = rawdatadf[rawdatadf['Book'] == title]['Author'].values[0]
    # Get all the books by the author
    books = rawdatadf[rawdatadf['Author'] == author]['Book'].values
    # Remove the requested book from the list
    books = np.delete(books, np.where(books == title))
    # If there are no other books by the author, return None
    if len(books) == 0:
        return None
    # Randomly select a book from the list and return it
    choice = np.random.choice(books)
    return choice

# Test the function
print(get_similar_author_book("Pride and Prejudice", rawdatadf))

Mansfield Park


In [134]:
# Now, a function to recommend a dissimilar book
def get_dissimilar_book(title, rawdatadf):
    # Get the author of the book
    author = rawdatadf[rawdatadf['Book'] == title]['Author'].values[0]
    # Get the genres of the book
    genres = rawdatadf[rawdatadf['Book'] == title].drop(columns=['Author','Book', 'Description'])
    #print(author)
    #print(genres)
    # Get all books written by other authors
    books = rawdatadf[rawdatadf['Author'] != author]['Book']
    #print(books.shape)
    # Filter out books that have genres in common with the requested book
    for genre in genres.columns:
        # If the genre is present in the book, filter it out
        if genres[genre].values[0] == 1:
            books = books[rawdatadf[genre] == 0]
    #print(books.shape)
    # Randomly select one of the remaining books and return it
    choice = np.random.choice(books)
    return choice

In [135]:
# Test the function
print(get_dissimilar_book("Pride and Prejudice", rawdatadf))

Eeny Meeny (Helen Grace, #1)


In [136]:
# Create an empty list to store the recommendations
recommendations = []

# Iterate over the books in the filtered dataframe
for book in filtereddf['Book'].values:
    # Get a similar book
    similar = get_similar_author_book(book, filtereddf)
    # Get a dissimilar book
    dissimilar = get_dissimilar_book(book, filtereddf)
    # If either the similar or dissimilar book is None, skip this book
    if similar is None or dissimilar is None:
        continue
    # Add the books to the list
    recommendations.append({'Book': book, 'Similar': similar, 'Dissimilar': dissimilar})

# Convert the list to a DataFrame
recommendations_titles = pd.DataFrame(recommendations)

# Print the recommendations
print(recommendations_titles)

                                                   Book  \
0                                 To Kill a Mockingbird   
1     Harry Potter and the Philosopher’s Stone (Harr...   
2                                   Pride and Prejudice   
3                                                  1984   
4                                      The Great Gatsby   
...                                                 ...   
3810                                  Wolf Among Wolves   
3811                                         Play Dirty   
3812             The Rise of Nine (Lorien Legacies, #3)   
3813                                       Rats Saw God   
3814         The Evolution of Mara Dyer (Mara Dyer, #2)   

                                                Similar  \
0                                     Go Set a Watchman   
1     Harry Potter and the Order of the Phoenix (Har...   
2                                 Sense and Sensibility   
3                                    Animal Farm / 1984

In [137]:
# Function to get a book description from the title
def get_book_description(title, rawdatadf):
    return rawdatadf[rawdatadf['Book'] == title]['Description'].values[0]

# Test the function
print(get_book_description("Pride and Prejudice", rawdatadf))

Since its immediate success in 1813, Pride and Prejudice has remained one of the most popular novels in the English language. Jane Austen called this brilliant work "her own darling child" and its vivacious heroine, Elizabeth Bennet, "as delightful a creature as ever appeared in print." The romantic clash between the opinionated Elizabeth and her proud beau, Mr. Darcy, is a splendid performance of civilized sparring. And Jane Austen's radiant wit sparkles as her characters dance a delicate quadrille of flirtation and intrigue, making this book the most superb comedy of manners of Regency England.Alternate cover edition of ISBN 9780679783268


In [138]:
# Convert the recommendation_titles dataframe to a dataframe with descriptions
recommendations_desc = recommendations_titles.copy()
recommendations_desc['Book Description'] = recommendations_desc['Book'].apply(lambda x: get_book_description(x, rawdatadf))
recommendations_desc['Similar Description'] = recommendations_desc['Similar'].apply(lambda x: get_book_description(x, rawdatadf))
recommendations_desc['Dissimilar Description'] = recommendations_desc['Dissimilar'].apply(lambda x: get_book_description(x, rawdatadf))

# Print the recommendations with descriptions
print(recommendations_desc)
print(recommendations_desc.shape)

# Write the recommendations to a CSV file
recommendations_desc.to_csv('Datasets\goodreads_recommendations.csv', index=False)

                                                   Book  \
0                                 To Kill a Mockingbird   
1     Harry Potter and the Philosopher’s Stone (Harr...   
2                                   Pride and Prejudice   
3                                                  1984   
4                                      The Great Gatsby   
...                                                 ...   
3810                                  Wolf Among Wolves   
3811                                         Play Dirty   
3812             The Rise of Nine (Lorien Legacies, #3)   
3813                                       Rats Saw God   
3814         The Evolution of Mara Dyer (Mara Dyer, #2)   

                                                Similar  \
0                                     Go Set a Watchman   
1     Harry Potter and the Order of the Phoenix (Har...   
2                                 Sense and Sensibility   
3                                    Animal Farm / 1984