In [None]:
import pandas as pd
import numpy as np
import networkx as nx
from gensim.models import Word2Vec
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity

# Load Data
data = pd.read_csv("sample30.csv")

# Encode Users
user_encoder = LabelEncoder()
data['user_id'] = user_encoder.fit_transform(data['reviews_username'])

# Encode Products
product_encoder = LabelEncoder()
data['product_id'] = product_encoder.fit_transform(data['name'])

# Prepare Category Data for Word2Vec
category_sentences = data['categories'].apply(lambda x: x.split()).tolist()
word2vec_model = Word2Vec(sentences=category_sentences, vector_size=50, window=5, min_count=1, workers=4)

# Compute Category Embeddings
def get_category_embedding(category):
    words = category.split()
    vectors = [word2vec_model.wv[word] for word in words if word in word2vec_model.wv]
    return np.mean(vectors, axis=0) if vectors else np.zeros(50)

data['category_embedding'] = data['categories'].apply(get_category_embedding)
category_embeddings_matrix = np.vstack(data['category_embedding'].values)

# Compute Similarity Based on Word2Vec Embeddings
category_similarity = cosine_similarity(category_embeddings_matrix)
category_similarity_df = pd.DataFrame(category_similarity, index=data['product_id'], columns=data['product_id'])

# Create Graph
graph = nx.Graph()
for i in range(len(data)):
    for j in range(i + 1, len(data)):
        similarity = category_similarity[i, j]
        if similarity > 0.5:  # Threshold to create meaningful edges
            graph.add_edge(data['product_id'][i], data['product_id'][j], weight=similarity)

# Recommendation Function using Graph-Based Approach
def recommend_products(username, num_recommendations=3):
    if username not in user_encoder.classes_:
        print("User not found!")
        return []

    user_id = user_encoder.transform([username])[0]
    user_products = data[data['user_id'] == user_id]['product_id'].unique()

    recommended_items = set()
    for product in user_products:
        if product in graph:
            neighbors = sorted(graph[product].items(), key=lambda x: x[1]['weight'], reverse=True)
            similar_products = [p[0] for p in neighbors[:num_recommendations]]
            recommended_items.update(similar_products)

    return product_encoder.inverse_transform(list(recommended_items))

# Example Usage
recommended_items = recommend_products("liviasuexo")
print("Recommended Products:", recommended_items)

Recommended Products: ['Creme Of Nature Intensive Conditioning Treatment, 32'
 "L'or233al Paris Elvive Extraordinary Clay Rebalancing Conditioner - 12.6 Fl Oz"
 "L'oreal Paris Advanced Hairstyle Boost It High Lift Creation Spray"]


In [None]:
recommended_items = recommend_products("liviasuexo")
print("Recommended Products:", recommended_items)

Recommended Products: ['Creme Of Nature Intensive Conditioning Treatment, 32'
 "L'or233al Paris Elvive Extraordinary Clay Rebalancing Conditioner - 12.6 Fl Oz"
 "L'oreal Paris Advanced Hairstyle Boost It High Lift Creation Spray"]


In [None]:
recommended_items = recommend_products("charlie")
print("Recommended Products:", recommended_items)

Recommended Products: ['Queen Helene Cocoa Butter Solid'
 "Stargate (ws) (ultimate Edition) (director's Cut) (dvdvideo)"
 'Red (special Edition) (dvdvideo)' 'Alex Cross (dvdvideo)'
 'Creme Of Nature Intensive Conditioning Treatment, 32'
 'Romantic Drama: Greatest Classic Films Collection 2 Discs'
 'Yes To Grapefruit Rejuvenating Body Wash'
 'The Resident Evil Collection 5 Discs (blu-Ray)'
 'Aussie Aussome Volume Shampoo, 13.5 Oz' "Cars Toon: Mater's Tall Tales"
 'Caress Moisturizing Body Bar Natural Silk, 4.75oz'
 'Just For Men Touch Of Gray Gray Hair Treatment, Black T-55'
 'Godzilla 3d Includes Digital Copy Ultraviolet 3d/2d Blu-Ray/dvd'
 'Equals (blu-Ray)' 'Mike Dave Need Wedding Dates (dvd + Digital)'
 'Planes: Fire Rescue (2 Discs) (includes Digital Copy) (blu-Ray/dvd)'
 'My Big Fat Greek Wedding 2 (blu-Ray + Dvd + Digital)']


In [None]:
import pickle

# Save the graph
with open("product_graph.pkl", "wb") as f:
    pickle.dump(graph, f)

In [None]:
import pickle
import networkx as nx

# Load the graph
with open("product_graph.pkl", "rb") as f:
    loaded_graph = pickle.load(f)

print("Graph Loaded Successfully!")
print("Number of Nodes:", len(loaded_graph.nodes()))
print("Number of Edges:", len(loaded_graph.edges()))

Graph Loaded Successfully!
Number of Nodes: 271
Number of Edges: 12525


In [None]:
data['reviews_username'].value_counts()[:30]

Unnamed: 0_level_0,count
reviews_username,Unnamed: 1_level_1
mike,41
byamazon customer,41
chris,32
lisa,16
sandy,15
tony,15
rick,15
dave,14
john,14
jojo,13
