## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import nltk

In [17]:
# Download the punkt resource

nltk.download('punkt')

[nltk_data] Downloading package punkt to /Users/apple/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

## Loading the dataset

In [3]:
amazon_df = pd.read_csv("amazon_product.csv")
amazon_df.head()

Unnamed: 0,id,Title,Description,Category
0,1,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,2,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,5,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,6,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,8,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


In [8]:
amazon_df.drop("id", axis=1, inplace=True)
amazon_df.head()

Unnamed: 0,Title,Description,Category
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S..."
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...


## Handling Null Values

In [9]:
amazon_df.isnull().sum()

Title          0
Description    0
Category       0
dtype: int64

## NLP

In [10]:
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer('english')

def tokenize_stem(text):
    tokens = nltk.word_tokenize(text.lower())
    stemmed = [stemmer.stem(w) for w in tokens]
    return " ".join(stemmed)

In [18]:
amazon_df["stemmed_tokens"] = amazon_df.apply(lambda row:tokenize_stem(row["Title"] + " " + row["Description"]),axis=1)
amazon_df.head()

Unnamed: 0,Title,Description,Category,stemmed_tokens
0,Swissmar Capstore Select Storage Rack for 18-...,Swissmar's capstore select 18 storage unit kee...,Home & Kitchen Kitchen & Dining Kitchen Utens...,swissmar capstor select storag rack for 18-pac...
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...,gemini200 delta cv-880 gold crown liveri aircr...
2,Superior Threads 10501-2172 Magnifico Cream P...,"For quilting and embroidery, this product is m...","Arts, Crafts & Sewing Sewing Thread & Floss S...",superior thread 10501-2172 magnifico cream puf...
3,Fashion Angels Color Rox Hair Chox Kit,Experiment with the haute trend of hair chalki...,Beauty & Personal Care Hair Care Hair Colorin...,fashion angel color rox hair chox kit experi w...
4,Union Creative Giant Killing Figure 05: Daisu...,From Union Creative. Turn your display shelf i...,Toys & Games › Action Figures & Statues › Sta...,union creativ giant kill figur 05 : daisuk tsu...


In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

tfidfv = TfidfVectorizer(tokenizer=tokenize_stem)

def cosine_sim(txt1, txt2):
    matrix = tfidfv.fit_transform([txt1,txt2])
    return cosine_similarity(matrix)

In [35]:
def search_product(query):
    stemmed_query = tokenize_stem(query)
    
    # Calculating cosine similarity between query and stemmed tokens columns
    amazon_df["similarity"] = amazon_df["stemmed_tokens"].apply(lambda x: cosine_sim(stemmed_query, x)[0, 1] if len(x) > 0 else 0)
    res = amazon_df.sort_values(by=["similarity"], ascending=False).head(10)[["Title", "Description", "Category"]]
    return res

In [38]:
search_product(amazon_df["Title"][1])



Unnamed: 0,Title,Description,Category
1,Gemini200 Delta CV-880 Gold Crown Livery Airc...,Welcome to the exciting world of GeminiJets! O...,Toys & Games Hobbies Models & Model Kits Pre-...
150,PM Company Perfection Credit/Debit Verificati...,"Sold as a carton of 10 rolls, 1 ribbon, 1 clea...",Office Products Office & School Supplies Pape...
451,Premium Compatibles Inc. 310-8401PC Ink and T...,Premium Compatibles Inc. PCI Brand 310-8400PCI...,Office Products › Office & School Supplies › ...
482,Arthur Imaging Compatible Ink Cartridge Repla...,Arthur Imaging Compatible Ink Cartridge Replac...,Office Products › Office & School Supplies › ...
597,Cooper-Atkins 212-158-8 Bi-Metals Dry Storage...,The HACCP thermometer is brightly colored and ...,"Industrial & Scientific › Test, Measure & Ins..."
248,Insinger DE1-115 Auxiliary Contact 98220-332,"DE1-115, AUX CONTACT 98220-332. Insinger Genui...",Appliances › Parts & Accessories › Cooktop Pa...
206,Filofax 2018 Personal/Compact Academic Week t...,"Over the years, the Filofax organizer has evol...",Office Products › Office & School Supplies › ...
183,Patch Cable - RJ-45 - Male - RJ-45 - Male - 1...,As a global leader in power availability solut...,Electronics › Computers & Accessories › Compu...
589,Akashiya Barrier Mini Card Sleeves (50 Piece)...,"High quality card sleeves from KMC, card sleev...",Toys & Games Games Card Games
435,Vickerman Wreath with 180 PVC tips & 50 Dura-...,"24"" Purple wreath featuring 180 PVC tips and 5...","Home & Kitchen › Seasonal Décor › Wreaths, Ga..."
