In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
df_books = pd.read_csv("data/books_data/books.csv",nrows=100,sep=",",encoding='utf-8', on_bad_lines='skip',skip_blank_lines=True)

In [3]:
df_books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [4]:
df_books['Book-Title']

0                                   Classical Mythology
1                                          Clara Callan
2                                  Decision in Normandy
3     Flu: The Story of the Great Influenza Pandemic...
4                                The Mummies of Urumchi
                            ...                        
95                            Pretend You Don't See Her
96                                           Fast Women
97                                  Female Intelligence
98        Pasquale's Nose: Idle Days in an Italian Town
99                         The Gospel of Judas: A Novel
Name: Book-Title, Length: 100, dtype: object

Declare the stop words: words that dont count in a language

In [7]:
tfidf = TfidfVectorizer(stop_words='english')

Fill all empty columns with empty stings

In [5]:
df_books['Book-Title'] = df_books['Book-Title'].fillna('')

Build the vector space model matrix of all the words for the movies dataframe

In [8]:
tfidf_matrix = tfidf.fit_transform(df_books['Book-Title'])

Develop the linear kernel that will be used to calculate the cosine similarity

In [9]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

Extract the indices from the df

In [10]:
indices = pd.Series(df_books.index, index=df_books['Book-Title'])
indices

Book-Title
Classical Mythology                                                                                    0
Clara Callan                                                                                           1
Decision in Normandy                                                                                   2
Flu: The Story of the Great Influenza Pandemic of 1918 and the Search for the Virus That Caused It     3
The Mummies of Urumchi                                                                                 4
                                                                                                      ..
Pretend You Don't See Her                                                                             95
Fast Women                                                                                            96
Female Intelligence                                                                                   97
Pasquale's Nose: Idle Days in an Italian Tow

In [11]:
indices['Fast Women']

96

Function to get the recommendations

In [25]:
def get_recommendations(title, cosine_sim = cosine_sim):
    index = indices[title]
    similarity_scores = enumerate(cosine_sim[index])
    similarity_scores = sorted(similarity_scores, key = lambda x:x[1], reverse=True)
    #similarity_scores = similarity_scores[1:11]
    similarity_scores_index = [i[0] for i in similarity_scores]
    recommendations = df_books["Book-Title"].iloc[similarity_scores_index]
    print( recommendations )
    return recommendations.to_json()

In [26]:
get_recommendations('Fast Women')

96                                           Fast Women
0                                   Classical Mythology
1                                          Clara Callan
2                                  Decision in Normandy
3     Flu: The Story of the Great Influenza Pandemic...
                            ...                        
94    Midnight in the Garden of Good and Evil: A Sav...
95                            Pretend You Don't See Her
97                                  Female Intelligence
98        Pasquale's Nose: Idle Days in an Italian Town
99                         The Gospel of Judas: A Novel
Name: Book-Title, Length: 100, dtype: object


'{"96":"Fast Women","0":"Classical Mythology","1":"Clara Callan","2":"Decision in Normandy","3":"Flu: The Story of the Great Influenza Pandemic of 1918 and the Search for the Virus That Caused It","4":"The Mummies of Urumchi","5":"The Kitchen God\'s Wife","6":"What If?: The World\'s Foremost Military Historians Imagine What Might Have Been","7":"PLEADING GUILTY","8":"Under the Black Flag: The Romance and the Reality of Life Among the Pirates","9":"Where You\'ll Find Me: And Other Stories","10":"Nights Below Station Street","11":"Hitler\'s Secret Bankers: The Myth of Swiss Neutrality During the Holocaust","12":"The Middle Stories","13":"Jane Doe","14":"A Second Chicken Soup for the Woman\'s Soul (Chicken Soup for the Soul Series)","15":"The Witchfinder (Amos Walker Mystery Series)","16":"More Cunning Than Man: A Social History of Rats and Man","17":"Goodbye to the Buttermilk Sky","18":"The Testament","19":"Beloved (Plume Contemporary Fiction)","20":"Our Dumb Century: The Onion Presents 

In [16]:
get_recommendations('Classical Mythology')

0                                   Classical Mythology
1                                          Clara Callan
2                                  Decision in Normandy
3     Flu: The Story of the Great Influenza Pandemic...
4                                The Mummies of Urumchi
                            ...                        
95                            Pretend You Don't See Her
96                                           Fast Women
97                                  Female Intelligence
98        Pasquale's Nose: Idle Days in an Italian Town
99                         The Gospel of Judas: A Novel
Name: Book-Title, Length: 100, dtype: object
