In [146]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Loading Data

In [147]:
baby_meta_df= pd.read_csv('baby_meta.csv', dtype={'salesRank': dict}) 

In [148]:
baby_rating_df= pd.read_csv('ratings_Baby.csv', names = ['reviewerID', 'asin', 'overall', 'unixReviewTime']) 

In [149]:
baby_cluster_df = pd.read_csv('baby_4000_cluster_df.csv')[['asin', 'clusterId']]

In [150]:
products_count_df = baby_cluster_df.groupby('clusterId').count()\
                    .rename(index=str, columns={"asin": "product_count"}).reset_index()

# Finding Cluster Attributes

## Product Ratings

In [151]:
def create_product_rating_count_df(rating_x_meta_df):
    return pd.DataFrame(rating_x_meta_df.groupby('asin').overall.count())\
            .rename(index=str, columns={"overall": "rating_count"})
    

In [152]:
def create_product_avg_rating_df(rating_x_meta_df):
    return pd.DataFrame(rating_x_meta_df.groupby('asin').overall.mean())\
            .rename(index=str, columns={"overall": "avg_rating"})

In [153]:
def create_product_rating_summary_df(rating_df, meta_df):
    rating_x_meta_df = rating_df.join(meta_df.set_index('asin'), on='asin')
    count_df = create_product_rating_count_df(rating_x_meta_df)
    avg_df = create_product_avg_rating_df(rating_x_meta_df)
    return meta_df[['asin']].join(count_df, on='asin').join(avg_df, on='asin')

In [154]:
baby_product_rating_summary_df=create_product_rating_summary_df(baby_rating_df, baby_meta_df)

In [173]:
baby_product_summary_df = baby_product_rating_summary_df.join(baby_meta_df.set_index('asin'), on='asin')\
                            .join(baby_cluster_df.set_index('asin'), on='asin')

## Cluster rating attributes

In [156]:
def theil_index(array):
    mean = np.mean(array)
    count = len(array)-np.isnan(array).sum()
    return np.sum(((array/mean)*np.log(array/mean))/count)

In [157]:
def create_cluster_avg_rating_df(cluster_x_rating_summary_df):
    return cluster_x_rating_summary_df.groupby('clusterId').avg_rating.mean().to_frame()\
        .rename(index=str, columns={"avg_rating": "cluster_avg_rating"})

In [158]:
def create_cluster_rating_var_df(cluster_x_rating_summary_df):
    return pd.DataFrame(data=cluster_x_rating_summary_df.groupby('clusterId').avg_rating.var())\
           .rename(index=str, columns={"avg_rating": "cluster_avg_rating_var"})

In [159]:
def create_cluster_rating_theil_index_df(cluster_x_rating_summary_df):
    return pd.DataFrame(data=cluster_x_rating_summary_df.groupby('clusterId').avg_rating.apply(theil_index))\
            .rename(index=str, columns={"avg_rating": "cluster_rating_theil_index"})

In [160]:
def create_cluster_rating_count_df(cluster_x_rating_summary_df):
    return pd.DataFrame(cluster_x_rating_summary_df.groupby('clusterId').rating_count.sum())\
            .rename(index=str, columns={"rating_count": "cluster_rating_count"})

In [161]:
def create_cluster_rating_summary_df(cluster_df, product_rating_summary_df):
    cluster_x_rating_summary_df = cluster_df.join(product_rating_summary_df.set_index('asin'), on='asin')
    avg_df = create_cluster_avg_rating_df(cluster_x_rating_summary_df)
    count_df = create_cluster_rating_count_df(cluster_x_rating_summary_df)
    var_df = create_cluster_rating_var_df(cluster_x_rating_summary_df)
    t_df = create_cluster_rating_theil_index_df(cluster_x_rating_summary_df)
    return avg_df.join(count_df).join(var_df).join(t_df).reset_index()



In [162]:
baby_cluster_rating_summary_df = create_cluster_rating_summary_df(baby_cluster_df, baby_product_rating_summary_df)

## Cluster price attributes

In [163]:
def create_cluster_price_var_df(cluster_x_meta_df):
    return pd.DataFrame(data=cluster_x_meta_df.groupby('clusterId').price.var())\
           .rename(index=str, columns={"price": "cluster_price_var"})

In [164]:
def create_cluster_price_theil_index_df(cluster_x_meta_df):
     return pd.DataFrame(data=cluster_x_meta_df.groupby('clusterId').price.apply(theil_index))\
            .rename(index=str, columns={"price": "cluster_price_theil_index"})

In [165]:
def create_cluster_price_summary_df(cluster_df, meta_df):
    cluster_x_meta_df = cluster_df.join(meta_df.set_index('asin'), on='asin')
    var_df = create_cluster_price_var_df(cluster_x_meta_df)
    t_df = create_cluster_price_theil_index_df(cluster_x_meta_df)
    return var_df.join(t_df).reset_index()

In [166]:
baby_cluster_price_summary_df = create_cluster_price_summary_df(baby_cluster_df, baby_meta_df)

## Combine attributes

In [167]:
pd.options.display.max_columns = 20
pd.options.display.max_rows = 200

In [168]:
baby_cluster_summary_df = baby_cluster_rating_summary_df.join(baby_cluster_price_summary_df.\
                                                              set_index('clusterId'), on='clusterId')
#baby_cluster_summary_df['clusterId'] = baby_cluster_summary_df['clusterId'].astype(int)
baby_cluster_summary_df =baby_cluster_summary_df.join(products_count_df.set_index('clusterId'), on='clusterId')
baby_cluster_summary_df['ratings per product']= baby_cluster_summary_df['cluster_rating_count']/baby_cluster_summary_df['product_count']

In [169]:
baby_cluster_summary_df.sort_values(by='cluster_avg_rating')

Unnamed: 0,clusterId,cluster_avg_rating,cluster_rating_count,cluster_avg_rating_var,cluster_rating_theil_index,cluster_price_var,cluster_price_theil_index,product_count,ratings per product
3074,3074,1.000000,3.0,,0.000000,,0.000000e+00,1,3.000000
3490,3490,1.000000,1.0,,0.000000,,0.000000e+00,1,1.000000
3742,3742,1.000000,1.0,,0.000000,,0.000000e+00,1,1.000000
460,460,1.000000,1.0,,0.000000,,0.000000e+00,1,1.000000
2517,2517,1.000000,1.0,,0.000000,,0.000000e+00,1,1.000000
1135,1135,1.000000,1.0,,0.000000,,0.000000e+00,1,1.000000
3382,3382,1.000000,1.0,,0.000000,,0.000000e+00,1,1.000000
1942,1942,1.000000,1.0,,0.000000,,0.000000e+00,1,1.000000
2219,2219,1.000000,1.0,,0.000000,,0.000000e+00,1,1.000000
2535,2535,1.000000,1.0,,0.000000,,0.000000e+00,1,1.000000


# Viewing Clusters and Products of interest

In [170]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', 120)

## View single product ('asin')

In [171]:
baby_product_summary_df.loc[baby_product_summary_df['asin'] == '0980027500']

Unnamed: 0.1,asin,rating_count,avg_rating,Unnamed: 0,categories,description,title,price,imUrl,brand,related,salesRank
10,980027500,12.0,4.583333,10,[['Baby']],This calendar provides busy parents with a simple way to note both big and little moments from baby's first year. A ...,Nature's Lullabies First Year Sticker Calendar,9.99,http://ecx.images-amazon.com/images/I/31PYzNf0RBL._SY300_.jpg,,"{'also_bought': ['0980027594', '0980027586', '0307342301', 'B003NMTJGS', 'B004LE8TAE', '0307461971', '1593596103', '...",


## View products by cluster ('clusterId')

In [174]:
baby_product_summary_df.loc[baby_product_summary_df['clusterId'] == 3208]

Unnamed: 0.1,asin,rating_count,avg_rating,Unnamed: 0,categories,description,title,price,imUrl,brand,related,salesRank,clusterId
5191,B000H46YIQ,2.0,4.5,5191,[['Baby']],Wider 8 x 8 Inch Corner Cushion in Brown for Safety Use this wide hearth guard size edge cushion for industrial safe...,"Kids Edge Fire Resistant Safety Pad Corner, BROWN 8x8 Inches",12.0,http://ecx.images-amazon.com/images/I/31VTM1xwt%2BL._SX300_.jpg,,"{'also_viewed': ['B000056OVS', 'B00FG6G7XM', 'B000CSK15E', 'B002NG6TV8', 'B004GCJMLG', 'B00081MHJI', 'B0082IZ8ZO', '...",,3208.0
5196,B000H48FBA,1.0,5.0,5196,[['Baby']],54 Inch Stick of Gray Hearth Padding Cushions Edges for Child Safety Use Kids Edge Hearth Guard Pads for child safet...,"Kids Edge Wide Profile Pad, One Stick W/Tape GRAY 54 inch",32.95,http://ecx.images-amazon.com/images/I/41GFE56US8L._SY300_.jpg,,"{'also_viewed': ['B001KXEE58', 'B001KXKAF6', 'B000056OVS', 'B00FG6G7XM', 'B0019L6ZDA', 'B0015SBQZ4', 'B000F1S9A2', '...",,3208.0
5199,B000H4C660,1.0,4.0,5199,[['Baby']],Gray Long 8 Inch by 8 Inch Safety Product Pads Corners Are 2 3/8 Inch Wide Use these corner protectors to improve sa...,"Kids Edge Fire Resistant Safety Pad Corner, GRAY, 8x8 Inches",12.0,http://ecx.images-amazon.com/images/I/31sNYQQLLEL._SX300_.jpg,,"{'also_viewed': ['B00FG6G7XM', 'B00GX65WZI', 'B001MTELKI'], 'buy_after_viewing': ['B001KXEE58', 'B002SG7JSU', 'B0008...",,3208.0
22951,B002UPR542,4.0,1.0,22951,[['Baby']],New 4 Pack Corner Cushions. These child safety cushions prevent injury on sharp corner and edges. Keep your child al...,4 Pc Child Safety Table Corner Cushions Pad Baby Edge Protection Kids Edge Guard,1.26,http://ecx.images-amazon.com/images/I/410oLDIUTbL._SY300_.jpg,4SGM,"{'also_viewed': ['B000USV8JO', 'B002SG7JSU', 'B005EJWQIQ', 'B007A2ZR36', 'B009TO9UZE', 'B009B390T4', 'B001MTELKI', '...",,3208.0
53371,B0091SNNFG,2.0,1.0,53371,[['Baby']],"They have adhesive pads to stick to all even surfaces\n Easy to install, first, open the safety lock and peel off...",Child Safety Products Baby Care Drawer Safety Lock Door Baby Infant Safety Cabinet Locker 10pcs/5pack,16.99,http://ecx.images-amazon.com/images/I/518sYKIw9FL._SX300_.jpg,,"{'buy_after_viewing': ['B008BVY9CK', 'B002FQKB42', 'B007A2ZSZ8', 'B005QU55EA']}",,3208.0
59871,B00BLQOGNO,1.0,1.0,59871,[['Baby']],"This item is 2M Length 3.5cm Width Baby Safety Anti-crash Desk Table Protection Strip. It can be used for wood, glas...",200cm.x3.5cm. Beautiful and Practical Baby &amp; Kids Safety Anti-crash Table Edge Kushions (Random Color),19.99,http://ecx.images-amazon.com/images/I/51b8RBv%2B8jL._SY300_.jpg,,"{'buy_after_viewing': ['B002SG7JSK', 'B000MHXFTO', 'B00081J3N6', 'B00313J2UC']}",,3208.0
65051,B00EDZUICI,1.0,1.0,65051,[['Baby']],"DescriptionThe item is a 2M L-shaped baby safety corner anti-collision strip, which is made of non-toxic soft thicke...",Pixnor&reg; 2M Baby Bumper Strip Safety Table Edge Corner Protector Guard Cushion Anti-collision Strip (Random Color),11.39,http://ecx.images-amazon.com/images/I/517fz1iX%2BDL._SY300_.jpg,,"{'buy_after_viewing': ['B000MHXFTO', 'B002SG7JSK', 'B001KXKAF6', 'B00FG6G7XM']}",,3208.0
70138,B00IJTTRZI,2.0,1.5,70138,[['Baby']],"From the manufacturer:1. Easy to install: Firstly peel off the backing from the adhesive pad, and then affix the pad...",5pcs Infant Child Kid Lock Safety Fridge Latch Babysafe Cabinet Door Cupboard Fridge Drawer Baby Smiling Face Safety...,6.6,http://ecx.images-amazon.com/images/I/31bTMAKDQ6L._SY300_.jpg,,"{'also_viewed': ['B007A2ZSZ8', 'B00CHHA8EW', 'B008BVY9CK'], 'buy_after_viewing': ['B007A2ZSZ8']}",,3208.0
70531,B00IZTMVLO,1.0,2.0,70531,[['Baby']],Features:1. Color: White2. Material: Plastic3. Dimensions: 19 x4 x 1.4 cm(L x W x H)4. They have adhesive pads to st...,4pcs Baby Kids Toddler Children Infant U Shaped Safety Door Fridge Drawer Wardrobe Cabinet Slide Sliding Lock,6.29,http://ecx.images-amazon.com/images/I/31%2BQaYFJ2fL._SY300_.jpg,,"{'also_viewed': ['B004ALNH9Q', 'B0014J0W8G', 'B003NSAY7U', 'B00B91OR5S', 'B005QU55EA', 'B00H7XUKRQ', 'B0009ET8EG', '...",,3208.0
70585,B00J1ZLOZ0,1.0,1.0,70585,[['Baby']],Features:1. Color: Blue2. Material: Plastic3. Dimensions: 15 x 2 cm(L x W )4. They have adhesive pads to stick to al...,4 Pack Baby Child Infant Toddler Kids Safe Safety Home Toilet Door Wardrobe Cabinet Cupboard Drawer Fridge Lock Latc...,4.39,http://ecx.images-amazon.com/images/I/31ZiNWVvRWL._SY300_.jpg,,"{'also_viewed': ['B007A2ZSZ8', 'B008BVY9CK', 'B00JIMN044', 'B00CHHA8EW', 'B003NSAY7U', 'B00J1YKQVE', 'B00068O250', '...",,3208.0


# Output csv

In [175]:
baby_cluster_summary_df.to_csv('baby_cluster_summary.csv')
baby_product_summary_df.to_csv('baby_product_summary.csv')