Creating feature based perfume recommendation system

In [1]:
#importing the libraries
import pandas as pd
import numpy as np

In [2]:
#importing libraries for recommendation modelling, to be moved to modelling
from sklearn.metrics.pairwise import cosine_similarity

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import linear_kernel

In [3]:
#getting the data
perfume= pd.read_csv('perfume_cl.csv')

In [4]:
#removing rows with NA values
perfume.dropna(inplace = True)
perfume.reset_index(inplace = True)

In [5]:
#feathures used for recommendation
perfume_features = perfume.copy().drop(columns = ['index','brand', 'title', 'rating_score'])

In [6]:
#titles of perfumes
perfume_title = perfume['title']

In [7]:
#user input for which we are providing in recommendation
perfume_example = 'Mon Paris Yves Saint Laurent for women'

In [8]:
#index of the perfume for which we are providing in recommendation 
ex_index = (perfume_title[perfume_title == perfume_example]).index.values

In [9]:
#taking numerical value from index object
index_val = ex_index[0]

Both cosine similaity and corrolation function give similarity matrix which are very close to each other, and the recommended perfume based on the two approach are mostly identical. Both formulations are kept here for futute use.

In [10]:
#cosine similarity matrix
cosine_sim = cosine_similarity(perfume_features)

In [11]:
#row of similarity matrix for the target perfume
cosine_sim[ex_index]

array([[0.54422138, 0.54025221, 1.        , ..., 0.26604358, 0.28166789,
        0.22783051]])

In [12]:
#recreating the dataframe with titles included
cosine_concat = pd.DataFrame(cosine_sim[ex_index], columns = perfume_title, index = ex_index).T

In [13]:
#adding index to data frame
cosine_concat.reset_index()

Unnamed: 0,title,2
0,Miss Dior Cherie Eau de Parfum Christian Dior ...,0.544221
1,Hanae Mori Hanae Mori for women,0.540252
2,Mon Paris Yves Saint Laurent for women,1.000000
3,Mukhallat Montale for women and men,0.707721
4,Roses Elixir Montale for women,0.521192
...,...,...
4267,Vamp à NY Honore des Pres for women,0.534640
4268,So Pretty Cartier for women,0.303978
4269,Ultraviolet Paco Rabanne for men,0.266044
4270,Venice Yves Rocher for women,0.281668


In [14]:
#testing that maximum index is the same as original index (item is the most similar to itself)
maxindex = cosine_concat.max(axis=0).index

In [15]:
#sorting the similarity data frame from most to least similar
cosine_concat.sort_values(by = [index_val], inplace = True, ascending=False)
cosine_concat.reset_index(inplace = True)

In [16]:
#starting from second value (first value is the target perfume itself) listing 10 most similar perfumes
cosine_concat[1:11]['title']

1                        Deci Dela Nina Ricci for women
2                            Twirl Kate Spade for women
3                        Heiress Paris Hilton for women
4                     Hedonist Viktoria Minya for women
5                      Play It Lovely Playboy for women
6            Fath de Fath (1993) Jacques Fath for women
7                  Gorgeous Victoria's Secret for women
8              Glow after Dark Jennifer Lopez for women
9     La Petite Robe Noire Eau de Toilette Guerlain ...
10                    Dalissime Salvador Dali for women
Name: title, dtype: object

In [17]:
#corrolation matrix
corrolation_sim = np.corrcoef(perfume_features)

In [18]:
#row of similarity matrix for the target perfume
corrolation_sim[ex_index]

array([[0.51443562, 0.51022136, 1.        , ..., 0.21610528, 0.23541527,
        0.17582032]])

In [19]:
#recreating the dataframe with titles included
corrolation_concat = pd.DataFrame(corrolation_sim[ex_index], columns = perfume_title, index = ex_index).T

In [20]:
#adding index to data frame
corrolation_concat.reset_index()

Unnamed: 0,title,2
0,Miss Dior Cherie Eau de Parfum Christian Dior ...,0.514436
1,Hanae Mori Hanae Mori for women,0.510221
2,Mon Paris Yves Saint Laurent for women,1.000000
3,Mukhallat Montale for women and men,0.687688
4,Roses Elixir Montale for women,0.489899
...,...,...
4267,Vamp à NY Honore des Pres for women,0.504287
4268,So Pretty Cartier for women,0.258121
4269,Ultraviolet Paco Rabanne for men,0.216105
4270,Venice Yves Rocher for women,0.235415


In [21]:
#sorting the similarity data frame from most to least similar
corrolation_concat.sort_values(by = [index_val], inplace = True, ascending=False)
corrolation_concat.reset_index(inplace = True)

In [22]:
#starting from second value (first value is the target perfume itself) listing 10 most similar perfumes
corrolation_concat[1:11]['title']

1                        Deci Dela Nina Ricci for women
2                            Twirl Kate Spade for women
3                        Heiress Paris Hilton for women
4                     Hedonist Viktoria Minya for women
5                      Play It Lovely Playboy for women
6            Fath de Fath (1993) Jacques Fath for women
7                  Gorgeous Victoria's Secret for women
8              Glow after Dark Jennifer Lopez for women
9     La Petite Robe Noire Eau de Toilette Guerlain ...
10                    Dalissime Salvador Dali for women
Name: title, dtype: object