##  Consumer Oriented Amazon Product Recommendation Engine

In [33]:
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
import csv

In [36]:
df = pd.read_csv('amazon.csv')

### Data Cleaning
- Limit the dataset to 1000 entries for faster processing
- Convert necessary string column types like price, discount_price to float
- Drop unneeded columns like product link and image link
- Assign categorical value to Category column of dataframe
- Convert non-dollar currencies to dollar values
- Limit the number of words in each Amazon product name to 10

In [37]:
#strip the product name to three words 
df['product_name'] = df['product_name'].str.split().str[:10].str.join(' ')
df['product_name']

0      Reffair AX30 [MAX] Portable Air Purifier for C...
1           rts [2 Pack] Mini USB C Type C Adapter Plug,
2            Kanget [2 Pack] Type C Female to USB A Male
3        Hp Wired On Ear Headphones With Mic With 3.5 Mm
4      JBL Commercial CSLM20B Auxiliary Omnidirection...
                             ...                        
994    Havells Ambrose 1200mm Ceiling Fan (Gold Mist ...
995              Bajaj Frore 1200 mm Ceiling Fan (Brown)
996    Crompton Sea Sapphira 1200 mm Ultra High Speed...
997    Havells Glaze 74W Pearl Ivory Gold Ceiling Fan...
998    Crompton Hill Briz Deco 1200mm (48 inch) High ...
Name: product_name, Length: 999, dtype: object

In [4]:
# Establish a connection to your Neo4j database
uri = "bolt://localhost:7687"
username = "neo4j"
password = "password"
driver = GraphDatabase.driver(uri, auth=(username, password)) # Connect to Neo4j database

In [5]:
# Create a product node in Neo4j
def create_product(tx, product):
    query = """
    CREATE (p:Product {product_name: $product_name, price: $price, rating: $rating, 
    category: $category, discount_percentage: $discount_percentage, 
    review_title: $review_title, discounted_price: $discounted_price})
    """
    tx.run(query, product_name=product['product_name'], price=product['price'], 
           rating=product['rating'], category=product['category'], 
           discount_percentage=product['discount_percentage'], 
           review_title=product['review_title'], discounted_price=product['discounted_price'])


In [6]:
# Read product data from CSV file
def read_product_data_from_csv(file_path):
    products = []
    with open(file_path, newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            product = {
                'product_name': row['product_name'],
                'price': float(row['price']),
                'rating': float(row['rating']),
                'category': row['category'],
                'discount_percentage': float(row['discount_percentage']),
                'review_title': row['review_title'],
                'discounted_price': float(row['discounted_price'])
            }
            products.append(product)
    return products

In [20]:
# Amazon CSV file path
csv_file = 'amazon.csv'

# Create product nodes in Neo4j
with driver.session() as session:
    products = read_product_data_from_csv(csv_file)
    for product in products:
        session.execute_write(create_product, product)

In [26]:
# Create an edge between two products in Neo4j
def create_edge(tx, product1, product2):
    query = """
    MATCH (p1:Product {product_name: $product_name_1})
    MATCH (p2:Product {product_name: $product_name_2})
    CREATE (p1)-[:SIMILAR]->(p2)
    """
    tx.run(query, product_name_1=product1['product_name'], product_name_2=product2['product_name'])

In [27]:
with driver.session() as session:        
    # Create edges between products based on criteria
    for i in range(len(products)):
        for j in range(i + 1, len(products)):
            product1 = products[i]
            product2 = products[j]

            # Check criteria for creating edge
            if (product1['category'] == product2['category'] and
                    abs(product1['price'] - product2['price']) <= 10 and
                    product1['rating'] == product2['rating'] and
                    abs(product1['discount_percentage'] - product2['discount_percentage']) <= 0.1):
                session.execute_write(create_edge, product1, product2)


  session.write_transaction(create_edge, product1, product2)
Failed to write data to connection IPv4Address(('localhost', 7687)) (IPv4Address(('127.0.0.1', 7687)))
Transaction failed and will be retried in 1.0619362631833467s (Failed to write data to connection IPv4Address(('localhost', 7687)) (IPv4Address(('127.0.0.1', 7687))))


In [28]:
#delete some edges in neo4j

# MATCH ()-[r]->()
# WITH r LIMIT 10000
# DELETE r


In [None]:
#TODO- recommendation algorithm

In [None]:
# Close the Neo4j driver connection
driver.close()
