In [2]:
# import libraries
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

In [3]:
# get data files
!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip

!unzip book-crossings.zip

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

--2023-07-23 19:29:48--  https://cdn.freecodecamp.org/project-data/books/book-crossings.zip
Resolving cdn.freecodecamp.org (cdn.freecodecamp.org)... 172.67.70.149, 104.26.3.33, 104.26.2.33, ...
Connecting to cdn.freecodecamp.org (cdn.freecodecamp.org)|172.67.70.149|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26085508 (25M) [application/zip]
Saving to: ‘book-crossings.zip.1’


2023-07-23 19:29:55 (5.11 MB/s) - ‘book-crossings.zip.1’ saved [26085508/26085508]

Archive:  book-crossings.zip
replace BX-Book-Ratings.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [4]:
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [4]:
print(df_books.shape)
print(df_ratings.shape)

(271379, 3)
(1149780, 3)


In [5]:
user_review_counts = df_ratings['user'].value_counts()

book_review_counts = df_ratings['isbn'].value_counts()

filtered_ratings = df_ratings[
    (df_ratings['user'].isin(user_review_counts[user_review_counts > 100].index)) &
    (df_ratings['isbn'].isin(book_review_counts[book_review_counts > 5].index))
]

final_data = filtered_ratings.merge(df_books, on='isbn')

In [6]:
final_data

Unnamed: 0,user,isbn,rating,title,author
0,276925,002542730X,10.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner
1,277427,002542730X,10.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner
2,3363,002542730X,0.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner
3,10030,002542730X,7.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner
4,11676,002542730X,6.0,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner
...,...,...,...,...,...
353727,276018,3423125772,0.0,Zauber gegen die KÃ?Â¤lte. Erotische Gedichte....,Gioconda Belli
353728,276018,3423128399,0.0,Die Entdeckung der Currywurst.,Uwe Timm
353729,276018,3453042905,0.0,Gevatter Tod. Roman. ( Fantasy).,Terry Pratchett
353730,276018,3498044761,0.0,Der TÃ?Â¤nzer.,Colum McCann


In [7]:
titles=final_data['title']
tr=titles.drop_duplicates()
tr

0         Politically Correct Bedtime Stories: Modern Ta...
102           Sushi for Beginners : A Novel (Keyes, Marian)
118               Wasted : A Memoir of Anorexia and Bulimia
130                               La casa de los espÃ­ritus
138                                     The Music of Chance
                                ...                        
353727    Zauber gegen die KÃ?Â¤lte. Erotische Gedichte....
353728                       Die Entdeckung der Currywurst.
353729                     Gevatter Tod. Roman. ( Fantasy).
353730                                       Der TÃ?Â¤nzer.
353731                                            Mondlaub.
Name: title, Length: 28681, dtype: object

In [8]:
table=final_data.pivot_table(index='title',columns='user',values='rating')
table.fillna(0,inplace=True)
table

user,183,254,507,882,1424,1435,1733,1903,2033,2110,...,276463,276538,276680,276925,277427,277478,277639,278137,278188,278418
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Good Wives: Image and Reality in the Lives of Women in Northern New England, 1650-1750",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Goosebumps Monster Edition 1: Welcome to Dead House, Stay Out of the Basement, and Say Cheese and Die!",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Murder of a Sleeping Beauty (Scumble River Mysteries (Paperback)),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Q-Space (Star Trek The Next Generation, Book 47)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
operation : mistletoe (the special agents),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
stardust,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
together by christmas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
why I'm like this : True Stories,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
table.shape

(28681, 1822)

In [10]:
table.info()

<class 'pandas.core.frame.DataFrame'>
Index: 28681 entries,  Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth to Â¿QuÃ© me quieres, amor?
Columns: 1822 entries, 183 to 278418
dtypes: float32(1822)
memory usage: 199.6+ MB


In [8]:
import sqlite3


In [42]:

# Connect to the SQLite database (create a new database if it doesn't exist)
conn = sqlite3.connect('my_database.db')

# Create a new table 'titles_table' in the database with a column 'title'
tr.to_sql('table_name', conn, if_exists='replace', index=False, dtype={'title': 'TEXT'})

# Close the connection
conn.close()

In [45]:
def get_db_connection():
    conn = sqlite3.connect('my_database.db')
    return conn

def get_book_titles():
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute('SELECT title FROM table_name')
    titles = [row[0] for row in cursor.fetchall()]
    conn.close()
    return titles

# Assuming you have already connected to the SQLite database and assigned the connection to 'conn'

titles2 = get_book_titles()

In [36]:
def get_table_schema(table_name):
    query = f"PRAGMA table_info({table_name})"
    cursor = conn.execute(query)
    schema = cursor.fetchall()

    # Convert the result to a dictionary for easy access
    table_schema = {}
    for column_info in schema:
        column_name, data_type, _, _, _, _ = column_info
        table_schema[column_name] = data_type

    return table_schema

# Get the schema of the newly created table
schema = get_table_schema('table_name')
print(schema)

# Close the connection
conn.close()

{0: '183', 1: '254', 2: '507', 3: '882', 4: '1424', 5: '1435', 6: '1733', 7: '1903', 8: '2033', 9: '2110', 10: '2276', 11: '2358', 12: '2442', 13: '2766', 14: '2891', 15: '2977', 16: '3363', 17: '3371', 18: '3757', 19: '3827', 20: '4017', 21: '4157', 22: '4385', 23: '5582', 24: '5903', 25: '6242', 26: '6251', 27: '6323', 28: '6543', 29: '6563', 30: '6575', 31: '6703', 32: '7105', 33: '7125', 34: '7158', 35: '7286', 36: '7346', 37: '7915', 38: '8066', 39: '8067', 40: '8245', 41: '8253', 42: '8284', 43: '8362', 44: '8454', 45: '8487', 46: '8681', 47: '8782', 48: '8890', 49: '8936', 50: '8971', 51: '9141', 52: '9417', 53: '9747', 54: '9856', 55: '9908', 56: '10030', 57: '10314', 58: '10354', 59: '10447', 60: '10560', 61: '10819', 62: '11120', 63: '11601', 64: '11629', 65: '11676', 66: '11724', 67: '11993', 68: '12538', 69: '12657', 70: '12824', 71: '12835', 72: '12982', 73: '13082', 74: '13093', 75: '13273', 76: '13540', 77: '13552', 78: '13582', 79: '13850', 80: '13935', 81: '13995', 82:

In [85]:
from fuzzywuzzy import fuzz, process




In [11]:
nbrs = NearestNeighbors(n_neighbors=6, algorithm='auto', metric='cosine')
nbrs.fit(table)

# Compute the cosine similarity with indices
similarities, indices = nbrs.kneighbors(table)

# Transform the cosine similarity to get similarity between 0 and 1
closeness =1- similarities

print(closeness)
print(indices)

[[1.         0.6772855  0.6772855  0.6095569  0.6095569  0.6095569 ]
 [0.99999994 0.70710677 0.70710677 0.70710677 0.70710677 0.70710677]
 [1.         1.         1.         1.         0.78086877 0.70710677]
 ...
 [0.9999999  0.8389777  0.83896166 0.8389195  0.83752984 0.83319163]
 [1.         0.7624929  0.7624929  0.620466   0.5674536  0.5391639 ]
 [0.         0.         0.         0.         0.         0.        ]]
[[    0 19921   242 27915  3597  1024]
 [    1   300 15198 28151 15835 28018]
 [18107  7297     2 21337 26392  3212]
 ...
 [28678  6746 15273 20367  9468 18078]
 [28679 18492  6281 10154  5425 25029]
 [19120 19118 19122 19119 19123 19121]]


In [12]:
def get_recommends(book = ""):
  ind=np.where(table.index==book)[0][0]
  temp=indices[ind]
  rc=[]
  for i in range(len(temp)):

    b=table.iloc[temp[i]].name
    # temp_df=df_books[df_books['title']==b]
    # temp_df.drop_duplicates('title')['title']
    s=closeness[ind][i]
    t2=[b]
    rc.append(b)
  recommended_books=[rc]


  return recommended_books

In [13]:
books = get_recommends("Anna Karenina")
print(books)

[['Anna Karenina', '2041: Twelve Short Stories About the Future by Top Science Fiction Writers', "Cleveland Amory's Compleat Cat: The Cat Who Came for Christmas : The Cat and the Curmudgeon, the Best Cat Ever", 'The Ultimate Star Trek Quiz Book', 'The Book of Lost Tales, Part 1 (The History of Middle-Earth, Volume 1)', 'Crossroad (Star Trek, Book 71)']]


In [22]:
import pickle
pickle.dump(table,open('table.pkl','wb'))
pickle.dump(indices,open('indices.pkl','wb'))
pickle.dump(closeness,open('closeness.pkl','wb'))

In [32]:
booksdb = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author','y','p','img2','img'],
    usecols=['isbn', 'title', 'author','y','p','img2','img'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str','y':'str','p':'str','img2':'str','img':'str'})

In [33]:
pickle.dump(booksdb,open('booksdb.pkl','wb'))

In [46]:
pickle.dump(titles2,open('titles.pkl','wb'))

In [34]:
booksdb

Unnamed: 0,isbn,title,author,y,p,img2,img
0,0195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,0002005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,0060973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,0393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...
...,...,...,...,...,...,...,...
271374,0440400988,There's a Bat in Bunk Five,Paula Danziger,1988,Random House Childrens Pub (Mm),http://images.amazon.com/images/P/0440400988.0...,http://images.amazon.com/images/P/0440400988.0...
271375,0525447644,From One to One Hundred,Teri Sloat,1991,Dutton Books,http://images.amazon.com/images/P/0525447644.0...,http://images.amazon.com/images/P/0525447644.0...
271376,006008667X,Lily Dale : The True Story of the Town that Ta...,Christine Wicker,2004,HarperSanFrancisco,http://images.amazon.com/images/P/006008667X.0...,http://images.amazon.com/images/P/006008667X.0...
271377,0192126040,Republic (World's Classics),Plato,1996,Oxford University Press,http://images.amazon.com/images/P/0192126040.0...,http://images.amazon.com/images/P/0192126040.0...
