##  Consumer Oriented Amazon Product Recommendation Engine

In [76]:
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
import csv

In [77]:
df = pd.read_csv('amazon.csv')

### Data Cleaning
- Limit the dataset to 1000 entries for faster processing
- Convert necessary string column types like price, discount_price to float
- Drop unneeded columns like product link and image link
- Assign categorical value to Category column of dataframe
- Convert non-dollar currencies to dollar values
- Limit the number of words in each Amazon product name to 10

In [78]:
#strip the product name to three words 
df['product_name'] = df['product_name'].str.split().str[:10].str.join(' ')
df['product_name']

0      Reffair AX30 [MAX] Portable Air Purifier for C...
1           rts [2 Pack] Mini USB C Type C Adapter Plug,
2            Kanget [2 Pack] Type C Female to USB A Male
3        Hp Wired On Ear Headphones With Mic With 3.5 Mm
4      JBL Commercial CSLM20B Auxiliary Omnidirection...
                             ...                        
994    Havells Ambrose 1200mm Ceiling Fan (Gold Mist ...
995              Bajaj Frore 1200 mm Ceiling Fan (Brown)
996    Crompton Sea Sapphira 1200 mm Ultra High Speed...
997    Havells Glaze 74W Pearl Ivory Gold Ceiling Fan...
998    Crompton Hill Briz Deco 1200mm (48 inch) High ...
Name: product_name, Length: 999, dtype: object

In [104]:
#add a more general category 
df['general_cat'] = df['category'].str.split('|', n=2).str[0]
set(df['general_cat'])

KeyError: 'category'

In [80]:
df.to_csv('amazon.csv')

In [4]:
# Establish a connection to your Neo4j database
uri = "bolt://localhost:7687"
username = "neo4j"
password = "password"
driver = GraphDatabase.driver(uri, auth=(username, password)) # Connect to Neo4j database

In [5]:
# Create a product node in Neo4j
def create_product(tx, product):
    query = """
    CREATE (p:Product {product_name: $product_name, price: $price, rating: $rating, 
    category: $category, discount_percentage: $discount_percentage, 
    review_title: $review_title, discounted_price: $discounted_price})
    """
    tx.run(query, product_name=product['product_name'], price=product['price'], 
           rating=product['rating'], category=product['category'], 
           discount_percentage=product['discount_percentage'], 
           review_title=product['review_title'], discounted_price=product['discounted_price'])


In [6]:
# Read product data from CSV file
def read_product_data_from_csv(file_path):
    products = []
    with open(file_path, newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            product = {
                'product_name': row['product_name'],
                'price': float(row['price']),
                'rating': float(row['rating']),
                'category': row['category'],
                'discount_percentage': float(row['discount_percentage']),
                'review_title': row['review_title'],
                'discounted_price': float(row['discounted_price'])
                'general_cat': row['general_cat'],
                'product_id': row['product_id'],
                'image_link': row['img_link']
            }
            products.append(product)
    return products

In [20]:
# Amazon CSV file path
csv_file = 'amazon.csv'

# Create product nodes in Neo4j
with driver.session() as session:
    products = read_product_data_from_csv(csv_file)
    for product in products:
        session.execute_write(create_product, product)

In [26]:
# Create an edge between two products in Neo4j
def create_edge(tx, product1, product2):
    query = """
    MATCH (p1:Product {product_name: $product_name_1})
    MATCH (p2:Product {product_name: $product_name_2})
    CREATE (p1)-[:SIMILAR]->(p2)
    """
    tx.run(query, product_name_1=product1['product_name'], product_name_2=product2['product_name'])

In [27]:
with driver.session() as session:        
    # Create edges between products based on criteria
    for i in range(len(products)):
        for j in range(i + 1, len(products)):
            product1 = products[i]
            product2 = products[j]

            # Check criteria for creating edge
            if (product1['category'] == product2['category'] and
                    abs(product1['price'] - product2['price']) <= 10 and
                    product1['rating'] == product2['rating'] and
                    abs(product1['discount_percentage'] - product2['discount_percentage']) <= 0.1):
                session.execute_write(create_edge, product1, product2)


  session.write_transaction(create_edge, product1, product2)
Failed to write data to connection IPv4Address(('localhost', 7687)) (IPv4Address(('127.0.0.1', 7687)))
Transaction failed and will be retried in 1.0619362631833467s (Failed to write data to connection IPv4Address(('localhost', 7687)) (IPv4Address(('127.0.0.1', 7687))))


In [28]:
#delete some edges in neo4j

# MATCH ()-[r]->()
# WITH r LIMIT 10000
# DELETE r


### User Provided Input 
Categories of products available: 
1. 'Car & Motorbike',
2. 'Computers & Accessories',
3. 'Electronics',
4. 'Health & PersonalCare',
5. 'Home & Kitchen'

In [74]:
cat_num = {1: 'Car&Motorbike',
 2: 'Computers&Accessories',
 3: 'Electronics',
 4: 'Health&PersonalCare',
 5: 'Home&Kitchen'}

In [83]:
print("Welcome to the Amazon Product Recommender! ")
print("For the questions, enter only an integer")
input_category = int(input("From the list above of Product Categories, What type of product are you interested in? "))
input_max_price = int(input("What is your maximum budget? "))
input_rating = int(input("What is the minimum number of stars you want for the product: "))
input_num_ratings = int(input("What is the minumum number of ratings you want for the product: "))
        

Welcome to the Amazon Product Recommender! 
For the questions, enter only an integer
From the list above of Product Categories, What type of product are you interested in? 2
What is your maximum budget? 100
What is the minimum number of stars you want for the product: 4
What is the minumum number of ratings you want for the product: 100


In [93]:
from neo4j import GraphDatabase

# # Define the user input values for price, rating, and category
# input_price = 100
# input_rating = 4.5
# input_category = "Electronics"

# Define the Cypher query with parameters for user input
query = """
MATCH (inputProduct:Product)
WHERE inputProduct.price <= $inputPrice 
AND inputProduct.rating >= $inputRating 
AND inputProduct.general_cat = $inputCategory
WITH inputProduct
MATCH (similarProduct:Product)
WHERE similarProduct.price <= $inputPrice 
AND similarProduct.rating >= $inputRating 
AND similarProduct.category = $inputCategory
AND inputProduct <> similarProduct
WITH inputProduct, similarProduct
RETURN inputProduct, similarProduct, 
gds.similarity.euclideanDistance([inputProduct.price, inputProduct.rating, inputProduct.general_cat, inputProduct.category], 
[similarProduct.price, similarProduct.rating, similarProduct.general_cat, similarProduct.category]) AS similarity
ORDER BY similarity DESC
LIMIT 5
"""


In [98]:
#store results in a df 
# empty lists to store the data
input_product_list = []
similar_product_list = []
similarity_list = []

# Execute the Cypher query with parameters
with driver.session() as session:
    result = session.run(query, inputPrice=input_max_price, inputRating=input_rating, inputCategory=input_category)

    # Process the query result
    for record in result:
        input_product = record["inputProduct"]["product_name"]
        similar_product = record["similarProduct"]["product_name"]
        similarity = record["similarity"]
        
        # Process the results as needed, e.g., display or store recommendations
        #print(f"Input Product: {input_product}, Similar Product: {similar_product}, Similarity: {similarity}")
        # Append the data to the respective lists
        input_product_list.append(input_product)
        similar_product_list.append(similar_product)
        similarity_list.append(similarity)

# Create a DataFrame from the lists
df = pd.DataFrame({
    "Input Product": input_product_list,
    "Similar Product": similar_product_list,
    "Similarity": similarity_list
})

In [100]:
display(df)

Unnamed: 0,Input Product,Similar Product,Similarity
0,Seagate One Touch 2TB External HDD with Passwo...,E-COSMOS Plug in LED Night Light Mini USB LED ...,94.800475
1,Seagate One Touch 2TB External HDD with Passwo...,E-COSMOS Plug in LED Night Light Mini USB LED ...,94.800475
2,E-COSMOS Plug in LED Night Light Mini USB LED ...,Seagate One Touch 2TB External HDD with Passwo...,94.800475
3,E-COSMOS Plug in LED Night Light Mini USB LED ...,Seagate One Touch 2TB External HDD with Passwo...,94.800475
4,"realme narzo 50i (Mint Green, 2GB RAM+32GB Sto...",Apsara Platinum Pencils Value Pack - Pack of 20,94.800475


In [None]:
# Close the Neo4j driver connection
driver.close()
