In [1]:
import collections
import pandas as pd
import networkx as nx
import community as community_louvain
from collections import Counter, defaultdict

In [2]:
G = nx.read_gml('../network/products.gml')
df = pd.read_csv('../dataset/new_features.csv')

In [7]:
aa_list = []
for u, v, score in nx.adamic_adar_index(G):
    aa_list.append((u, v, score))

adamic_adar_df = pd.DataFrame(aa_list, columns=['product_u', 'product_v', 'AA_score'])

In [8]:
jc_list = []
for u, v, score in nx.jaccard_coefficient(G):
    jc_list.append((u, v, score))

jaccard_df = pd.DataFrame(jc_list, columns=['product_u', 'product_v', 'Jaccard'])

In [9]:
proj_df = adamic_adar_df.copy()
proj_df['jaccard'] = jaccard_df['Jaccard']
proj_df.to_csv('../dataset/products_projection.csv', index=False)

In [None]:
c = community_louvain.best_partition(G)

In [None]:
# number of communities

len(set(c.values()))

In [None]:
# number of products per community

collections.Counter(c.values())

In [None]:
# top 5 main_categories per community

communities_categories = defaultdict(list)
for node, comm in c.items():
    main_category = G.nodes[node].get('main_category')
    if main_category:
        communities_categories[comm].append(main_category)

for comm in sorted(communities_categories.keys()):
    categories = communities_categories[comm]

    category_counts = Counter(categories)
    top_5 = category_counts.most_common(5)
    
    print(f"\n community {comm}")
    for i, (category, count) in enumerate(top_5, 1):
        percentage = (count / len(categories)) * 100
        print(f"   {i}. {category.title()}: {count} products ({percentage:.1f}%)")

In [None]:
# average price per community

communities_prices = defaultdict(list)

for node, comm in c.items():
    price = G.nodes[node].get('price')
    
    if price:
        try:
            communities_prices[comm].append(float(price))
        except ValueError:
            pass

for comm in sorted(communities_prices.keys()):
    prices = communities_prices[comm]
    avg_price = sum(prices) / len(prices)
    
    print(f"\n community {comm}")
    print(f" Average price: ${avg_price:.2f}")

In [None]:
df = pd.read_csv('../dataset/new_features.csv')
df['product_community'] = df['parent_asin'].map(c)
df.to_csv('../dataset/new_features.csv', index=False)