In [1]:
import pandas as pd
data = pd.read_csv("data.csv")
data.head()

Unnamed: 0,product_title,category,product_subcategory,brand,selling_price,original_price,product_type,product_rating,product_description,customer_reviews_count,seasonal_indicator,promotion_indicator,shipping_weight,bundle_indicator,customer_demographics,discount_offered,brand_scale
0,Garlic Oil - Vegetarian Capsule 500 mg,Beauty & Hygiene,Hair Care,Sri Sri Ayurveda,220.0,220.0,Hair Oil & Serum,4.1,This Product contains Garlic Oil that is known...,7,Spring,Yes,0.658145,Individual,Female,0.0,medium
1,Water Bottle - Orange,"Kitchen, Garden & Pets",Storage & Accessories,Mastercook,180.0,180.0,Water & Fridge Bottles,2.3,"Each product is microwave safe (without lid), ...",54,Winter,No,4.734585,Bundle,Female,0.0,medium
2,"Brass Angle Deep - Plain, No.2",Cleaning & Household,Pooja Needs,Trm,119.0,250.0,Lamp & Lamp Oil,3.4,"A perfect gift for all occasions, be it your m...",31,Winter,No,2.02414,Bundle,Female,52.4,medium
3,Cereal Flip Lid Container/Storage Jar - Assort...,Cleaning & Household,Bins & Bathroom Ware,Nakoda,149.0,176.0,"Laundry, Storage Baskets",3.7,Multipurpose container with an attractive desi...,49,Spring,No,3.869239,Individual,Other,15.34,medium
4,Creme Soft Soap - For Hands & Body,Beauty & Hygiene,Bath & Hand Wash,Nivea,162.0,162.0,Bathing Bars & Soaps,4.4,Nivea Creme Soft Soap gives your skin the best...,74,Winter,Yes,0.812489,Individual,Male,0.0,medium


In [2]:
category_stats = data.groupby(['category', 'product_subcategory']).agg({'product_rating': 'mean', 'customer_reviews_count': 'sum', 'selling_price': 'mean'}).reset_index()
category_stats = category_stats.rename(columns={'product_rating':'avg_product_rating','customer_reviews_count':'total_reviews','selling_price':'avg_selling_price'})
category_stats = category_stats.sort_values(by=['avg_product_rating', 'total_reviews'], ascending=False)
category_stats

Unnamed: 0,category,product_subcategory,avg_product_rating,total_reviews,avg_selling_price
34,Cleaning & Household,Detergents & Dishwash,4.269349,12864,269.750536
71,Gourmet & World Food,Oils & Vinegar,4.231590,12919,952.411297
30,Beverages,Water,4.170588,1108,153.191176
26,Beverages,Energy & Soft Drinks,4.168018,5560,150.441982
63,Gourmet & World Food,"Atta, Flours & Sooji",4.166667,550,195.500000
...,...,...,...,...,...
13,"Bakery, Cakes & Dairy",Ice Creams & Desserts,3.622917,2289,170.054167
0,Baby Care,"Atta, Flours & Sooji",3.600000,96,300.000000
79,"Kitchen, Garden & Pets",Cookware & Non Stick,3.580085,17533,1204.869774
1,Baby Care,Baby Accessories,3.145946,2169,409.648649


In [3]:
top_category = pd.pivot_table(category_stats,index='category', values=['avg_product_rating','total_reviews','avg_selling_price'],aggfunc={'avg_product_rating': 'mean',
                                      'total_reviews': 'sum',
                                      'avg_selling_price': 'mean'})

top_category= top_category.sort_values(by='avg_product_rating', ascending=False).reset_index()

In [24]:
import plotly.express as px

# Create bubble chart with greenish color theme and adjusted category labels
fig = px.scatter(top_category, 
                 x='avg_product_rating', 
                 y='total_reviews', 
                 size='avg_selling_price',  # Size of bubbles determined by avg_selling_price
                 color='avg_product_rating',  # Color of bubbles determined by avg_product_rating
                 color_continuous_scale='greens',  # Greenish color theme
                 hover_data={'category': True,
                             'avg_selling_price': True,
                             'avg_product_rating': True},
                 title='Distribution of Avg Selling Price, Total Customer reviews and Avg Rating across all categories',
                 labels={'category': 'Category', 
                         'avg_selling_price': 'Avg Selling Price',
                         'avg_product_rating': 'Avg Product Rating'},
                 size_max=30,  # Adjust maximum bubble size as needed
                 text='category',  # Text labels for bubbles
                )

# Adjust label position, color, and size
fig.update_traces(textposition='top center',  # Place labels outside the bubbles
                  textfont_color='black',    # Set label color to black
                  textfont=dict(size=10),     # Set font size to 10
                  marker=dict(line=dict(width=1, color='black'))  # Add outline borders to bubbles
                 )

# Update layout to include visible axis lines
fig.update_layout(
    xaxis_title='Avg Product Rating',
    yaxis_title='Customer Review Count',
    plot_bgcolor='white',
    coloraxis_colorbar=dict(title='Avg Product Rating'),  # Color bar title for the color scale
    xaxis=dict(showline=True, linewidth=1, linecolor='black'),  # Show x-axis line
    yaxis=dict(showline=True, linewidth=1, linecolor='black')   # Show y-axis line
)

# Show the plot
fig.show()


## Subcategory Analysis

In [5]:
# Find the index of the maximum average product_rating for each category
max_rating_index = category_stats.groupby('category')['avg_product_rating'].idxmax()
min_rating_index = category_stats.groupby('category')['avg_product_rating'].idxmin()

# Find the index of the maximum average customer_reviews_count for each category
max_reviews_index = category_stats.groupby('category')['total_reviews'].idxmax()
min_reviews_index = category_stats.groupby('category')['total_reviews'].idxmin()

# Extract the corresponding sub-categories with the highest average rating and customer reviews count for each category
best_subcategories_rating = category_stats.loc[max_rating_index]
low_subcategories_rating = category_stats.loc[min_rating_index]
best_subcategories_reviews = category_stats.loc[max_reviews_index]
low_subcategories_reviews = category_stats.loc[min_reviews_index]

In [6]:
best_subcategories_rating

Unnamed: 0,category,product_subcategory,avg_product_rating,total_reviews,avg_selling_price
3,Baby Care,Baby Food & Formula,4.165957,2363,361.152128
12,"Bakery, Cakes & Dairy",Gourmet Breads,4.105714,1881,107.455714
23,Beauty & Hygiene,Oral Care,4.124723,14320,182.256827
30,Beverages,Water,4.170588,1108,153.191176
34,Cleaning & Household,Detergents & Dishwash,4.269349,12864,269.750536
44,"Eggs, Meat & Fish",Mutton & Lamb,4.023684,841,721.110526
51,"Foodgrains, Oil & Masala",Edible Oils & Ghee,4.125605,12037,540.90871
56,Fruits & Vegetables,Cuts & Sprouts,4.000526,4875,35.368947
71,Gourmet & World Food,Oils & Vinegar,4.23159,12919,952.411297
84,"Kitchen, Garden & Pets",Pet Food & Accessories,4.138202,17233,857.348034


In [7]:
low_subcategories_rating

Unnamed: 0,category,product_subcategory,avg_product_rating,total_reviews,avg_selling_price
1,Baby Care,Baby Accessories,3.145946,2169,409.648649
13,"Bakery, Cakes & Dairy",Ice Creams & Desserts,3.622917,2289,170.054167
17,Beauty & Hygiene,Fragrances & Deos,3.74695,48037,893.09959
25,Beverages,Coffee,3.967978,4193,208.07236
32,Cleaning & Household,Bins & Bathroom Ware,3.760077,20064,272.68648
45,"Eggs, Meat & Fish",Pork & Other Meats,2.64,102,263.0
50,"Foodgrains, Oil & Masala",Dry Fruits,3.890394,10139,255.474877
60,Fruits & Vegetables,Fresh Vegetables,4.0,8294,30.773452
66,Gourmet & World Food,Chocolates & Biscuits,3.875205,31412,269.818177
79,"Kitchen, Garden & Pets",Cookware & Non Stick,3.580085,17533,1204.869774


In [8]:
best_subcategories_reviews

Unnamed: 0,category,product_subcategory,avg_product_rating,total_reviews,avg_selling_price
4,Baby Care,Diapers & Wipes,4.095547,12573,741.90996
11,"Bakery, Cakes & Dairy",Dairy,4.054156,19498,149.439169
24,Beauty & Hygiene,Skin Care,3.947319,115724,412.119608
29,Beverages,Tea,4.080389,16106,269.842874
31,Cleaning & Household,All Purpose Cleaners,4.126211,24219,271.590611
46,"Eggs, Meat & Fish","Sausages, Bacon & Salami",4.02327,7610,282.237736
52,"Foodgrains, Oil & Masala",Masalas & Spices,4.094094,46249,90.488481
60,Fruits & Vegetables,Fresh Vegetables,4.0,8294,30.773452
75,Gourmet & World Food,"Snacks, Dry Fruits, Nuts",3.988333,42189,227.910952
86,"Kitchen, Garden & Pets",Storage & Accessories,3.827833,49877,303.489655


In [9]:
low_subcategories_reviews

Unnamed: 0,category,product_subcategory,avg_product_rating,total_reviews,avg_selling_price
0,Baby Care,"Atta, Flours & Sooji",3.6,96,300.0
7,"Bakery, Cakes & Dairy",Bakery Snacks,3.938,1315,104.4484
22,Beauty & Hygiene,Mothers & Maternity,3.885714,332,333.714286
30,Beverages,Water,4.170588,1108,153.191176
33,Cleaning & Household,Car & Shoe Care,3.888542,2056,198.100417
43,"Eggs, Meat & Fish",Marinades,4.0,48,225.0
48,"Foodgrains, Oil & Masala",Baby Food & Formula,4.0,52,225.0
58,Fruits & Vegetables,"Flower Bouquets, Bunches",4.0,473,83.25
70,Gourmet & World Food,Mutton & Lamb,4.05,97,619.0
78,"Kitchen, Garden & Pets",Bakeware,3.65,2288,214.666667


In [10]:
merged_df = pd.concat([best_subcategories_rating, best_subcategories_reviews], ignore_index=True)
merged_df_low = pd.concat([low_subcategories_rating, low_subcategories_reviews], ignore_index=True)

# Sort the merged dataframe by category and then by either product_rating or customer_reviews_count
merged_df.sort_values(by=['category', 'avg_product_rating', 'total_reviews'], ascending=[True, False, False], inplace=True)
merged_df_low.sort_values(by=['category', 'avg_product_rating', 'total_reviews'], ascending=[True, False, False], inplace=True)

merged_df

Unnamed: 0,category,product_subcategory,avg_product_rating,total_reviews,avg_selling_price
0,Baby Care,Baby Food & Formula,4.165957,2363,361.152128
11,Baby Care,Diapers & Wipes,4.095547,12573,741.90996
1,"Bakery, Cakes & Dairy",Gourmet Breads,4.105714,1881,107.455714
12,"Bakery, Cakes & Dairy",Dairy,4.054156,19498,149.439169
2,Beauty & Hygiene,Oral Care,4.124723,14320,182.256827
13,Beauty & Hygiene,Skin Care,3.947319,115724,412.119608
3,Beverages,Water,4.170588,1108,153.191176
14,Beverages,Tea,4.080389,16106,269.842874
4,Cleaning & Household,Detergents & Dishwash,4.269349,12864,269.750536
15,Cleaning & Household,All Purpose Cleaners,4.126211,24219,271.590611


In [11]:
merged_df_low

Unnamed: 0,category,product_subcategory,avg_product_rating,total_reviews,avg_selling_price
11,Baby Care,"Atta, Flours & Sooji",3.6,96,300.0
0,Baby Care,Baby Accessories,3.145946,2169,409.648649
12,"Bakery, Cakes & Dairy",Bakery Snacks,3.938,1315,104.4484
1,"Bakery, Cakes & Dairy",Ice Creams & Desserts,3.622917,2289,170.054167
13,Beauty & Hygiene,Mothers & Maternity,3.885714,332,333.714286
2,Beauty & Hygiene,Fragrances & Deos,3.74695,48037,893.09959
14,Beverages,Water,4.170588,1108,153.191176
3,Beverages,Coffee,3.967978,4193,208.07236
15,Cleaning & Household,Car & Shoe Care,3.888542,2056,198.100417
4,Cleaning & Household,Bins & Bathroom Ware,3.760077,20064,272.68648


In [12]:
import pandas as pd
import plotly.express as px

# Data preparation
top_data = {
    'category': ['Baby Care', 'Baby Care', 'Bakery, Cakes & Dairy', 'Bakery, Cakes & Dairy', 'Beauty & Hygiene', 
                 'Beauty & Hygiene', 'Beverages', 'Beverages', 'Cleaning & Household', 'Cleaning & Household',
                 'Eggs, Meat & Fish', 'Eggs, Meat & Fish', 'Foodgrains, Oil & Masala', 'Foodgrains, Oil & Masala',
                 'Fruits & Vegetables', 'Fruits & Vegetables', 'Gourmet & World Food', 'Gourmet & World Food',
                 'Kitchen, Garden & Pets', 'Kitchen, Garden & Pets', 'Snacks & Branded Foods', 'Snacks & Branded Foods'],
    'product_subcategory': ['Baby Food & Formula', 'Diapers & Wipes', 'Gourmet Breads', 'Dairy', 'Oral Care', 'Skin Care',
                            'Water', 'Tea', 'Detergents & Dishwash', 'All Purpose Cleaners', 'Mutton & Lamb', 
                            'Sausages, Bacon & Salami', 'Edible Oils & Ghee', 'Masalas & Spices', 'Cuts & Sprouts', 
                            'Fresh Vegetables', 'Oils & Vinegar', 'Snacks, Dry Fruits, Nuts', 'Pet Food & Accessories',
                            'Storage & Accessories', 'Biscuits & Cookies', 'Ready To Cook & Eat'],
    'avg_product_rating': [4.165957447, 4.095546559, 4.105714286, 4.054155844, 4.124723247, 3.947319093,
                           4.170588235, 4.080389222, 4.269348659, 4.126210526, 4.023684211, 4.02327044,
                           4.125604839, 4.094094488, 4.000526316, 4, 4.231589958, 3.988333333,
                           4.138202247, 3.827832512, 4.160819672, 3.91874028],
    'total_reviews': [2363, 12573, 1881, 19498, 14320, 115724, 1108, 16106, 12864, 24219, 841, 7610,
                      12037, 46249, 4875, 8294, 12919, 42189, 17233, 49877, 14369, 32204]
}

low_data = {
    'category': ['Baby Care', 'Baby Care', 'Bakery, Cakes & Dairy', 'Bakery, Cakes & Dairy', 'Beauty & Hygiene', 
                 'Beauty & Hygiene', 'Beverages', 'Beverages', 'Cleaning & Household', 'Cleaning & Household',
                 'Eggs, Meat & Fish', 'Eggs, Meat & Fish', 'Foodgrains, Oil & Masala', 'Foodgrains, Oil & Masala',
                 'Fruits & Vegetables', 'Fruits & Vegetables', 'Gourmet & World Food', 'Gourmet & World Food',
                 'Kitchen, Garden & Pets', 'Kitchen, Garden & Pets', 'Snacks & Branded Foods', 'Snacks & Branded Foods'],
    'product_subcategory': ['Atta, Flours & Sooji', 'Baby Accessories', 'Bakery Snacks', 'Ice Creams & Desserts',
                            'Mothers & Maternity', 'Fragrances & Deos', 'Water', 'Coffee', 'Car & Shoe Care',
                            'Bins & Bathroom Ware', 'Marinades', 'Pork & Other Meats', 'Baby Food & Formula',
                            'Dry Fruits', 'Fresh Vegetables', 'Flower Bouquets, Bunches', 'Mutton & Lamb',
                            'Chocolates & Biscuits', 'Bakeware', 'Cookware & Non Stick', 'Atta, Flours & Sooji',
                            'Frozen Veggies & Snacks'],
    'avg_product_rating': [3.6, 3.145945946, 3.938, 3.622916667, 3.885714286, 3.74695, 4.170588235, 3.967977528,
                           3.888541667, 3.760076531, 4, 2.64, 4, 3.890394089, 4, 4, 4.05, 3.875205255,
                           3.65, 3.580084746, 4.05, 3.752459016],
    'total_reviews': [96, 2169, 1315, 2289, 332, 48037, 1108, 4193, 2056, 20064, 48, 102, 52, 10139,
                      8294, 473, 97, 31412, 2288, 17533, 6, 8855]
}

df_top = pd.DataFrame(top_data)
df_low = pd.DataFrame(low_data)

# Combine the dataframes
df_combined = pd.concat([df_top, df_low])

# Calculate median values
median_rating = df_combined['avg_product_rating'].median()
median_reviews = df_combined['total_reviews'].median()

# Create scatter plot
fig = px.scatter(df_combined, x='avg_product_rating', y='total_reviews', color='category', 
                 size='avg_product_rating', hover_name='product_subcategory', 
                 title='Product Subcategories by Rating and Review Count',
                 labels={'avg_product_rating': 'Average Rating', 'total_reviews': 'Total Reviews'},
                 width=1000, height=00,)

# Add median lines
fig.add_shape(type='line', x0=median_rating, y0=0, x1=median_rating, y1=df_combined['total_reviews'].max(),
              line=dict(color='Red', dash='dash'))

fig.add_shape(type='line', x0=0, y0=median_reviews, x1=df_combined['avg_product_rating'].max(), y1=median_reviews,
              line=dict(color='Red', dash='dash'))

# Add quadrant labels
fig.add_annotation(text='Median', x=median_rating, y=median_reviews, showarrow=True, arrowhead=1)
fig.add_annotation(text='High Rating, High Reviews', x=df_combined['avg_product_rating'].max(), y=median_reviews,
                   showarrow=True, arrowhead=1)
fig.add_annotation(text='Low Rating, High Reviews', x=median_rating, y=df_combined['total_reviews'].max(),
                   showarrow=True, arrowhead=1)

# Show plot
fig.show()






### Combination of category and sub-category list to be shown in ppt

In [13]:
import plotly.graph_objects as go

# Create figure for best product rating
fig1 = go.Figure(go.Bar(
    y=best_subcategories_rating['category'] + ' - ' + best_subcategories_rating['product_subcategory'],
    x=best_subcategories_rating['avg_product_rating'],
    name='Average Product Rating',
    orientation='h',
    marker=dict(color='rgba(255, 153, 51, 0.7)'),
    text=best_subcategories_rating['avg_product_rating'].round(2),
    textposition='auto'
))

fig1.update_layout(
    title='Subcategories with Best Average Rating for Each Category',
    yaxis=dict(title='Category - Subcategory'),
    xaxis=dict(title='Average Rating', range=[0, best_subcategories_rating['avg_product_rating'].max() + 1]), 
    bargap=0.2,
    margin=dict(l=150),
)

# Create figure for best customer reviews count
fig2 = go.Figure(go.Bar(
    y=best_subcategories_reviews['category'] + ' - ' + best_subcategories_reviews['product_subcategory'],
    x=best_subcategories_reviews['total_reviews'],
    name='Total Customer Reviews Count',
    orientation='h',
    marker=dict(color='rgba(51, 153, 255, 0.7)'),
    text=best_subcategories_reviews['total_reviews'],
    textposition='auto'
))

fig2.update_layout(
    title='Subcategories with High Reviews for Each Category',
    yaxis=dict(title='Category - Subcategory'),
    xaxis=dict(title='Total Reviews Count'),
    bargap=0.2,
    margin=dict(l=150)
)

# Show the plots one below another
fig1.show()
fig2.show()


In [14]:
low_subcategories_rating

Unnamed: 0,category,product_subcategory,avg_product_rating,total_reviews,avg_selling_price
1,Baby Care,Baby Accessories,3.145946,2169,409.648649
13,"Bakery, Cakes & Dairy",Ice Creams & Desserts,3.622917,2289,170.054167
17,Beauty & Hygiene,Fragrances & Deos,3.74695,48037,893.09959
25,Beverages,Coffee,3.967978,4193,208.07236
32,Cleaning & Household,Bins & Bathroom Ware,3.760077,20064,272.68648
45,"Eggs, Meat & Fish",Pork & Other Meats,2.64,102,263.0
50,"Foodgrains, Oil & Masala",Dry Fruits,3.890394,10139,255.474877
60,Fruits & Vegetables,Fresh Vegetables,4.0,8294,30.773452
66,Gourmet & World Food,Chocolates & Biscuits,3.875205,31412,269.818177
79,"Kitchen, Garden & Pets",Cookware & Non Stick,3.580085,17533,1204.869774


In [15]:
import plotly.graph_objs as go

# Create figure for best product rating
fig1 = go.Figure(go.Bar(
    y=low_subcategories_rating['category'] + ' - ' + low_subcategories_rating['product_subcategory'],
    x=low_subcategories_rating['avg_product_rating'],
    name='Average Product Rating',
    orientation='h',
    marker=dict(color='rgba(255, 153, 51, 0.7)'),
    text=low_subcategories_rating['avg_product_rating'].round(2),
    textposition='auto'
))

fig1.update_layout(
    title='Subcategories with Low Average Rating for Each Category',
    yaxis=dict(title='Category - Subcategory'),
    xaxis=dict(title='Average Rating', range=[0, low_subcategories_rating['avg_product_rating'].max() + 1]), 
    bargap=0.2,
    margin=dict(l=150),
)

# Create figure for best customer reviews count
fig2 = go.Figure(go.Bar(
    y=low_subcategories_reviews['category'] + ' - ' + low_subcategories_reviews['product_subcategory'],
    x=low_subcategories_reviews['total_reviews'],
    name='Total Customer Reviews Count',
    orientation='h',
    marker=dict(color='rgba(51, 153, 255, 0.7)'),
    text=low_subcategories_reviews['total_reviews'],
    textposition='auto'
))

fig2.update_layout(
    title='Subcategories with Low Reviews for Each Category',
    yaxis=dict(title='Category - Subcategory'),
    xaxis=dict(title='Total Reviews Count'),
    bargap=0.2,
    margin=dict(l=150)
)

# Show the plots one below another
fig1.show()
fig2.show()


In [16]:
growth_subcategories = data[data['category'] == 'Gourmet & World Food'].groupby(['product_subcategory']).agg({'product_rating':'mean', 'customer_reviews_count':'sum', 'product_title':'count'})
promotion_yes = data[(data['promotion_indicator'] == 'Yes') & (data['category'] == 'Gourmet & World Food')].groupby('product_subcategory')['promotion_indicator'].count().reset_index()
growth_subcategories['product_subcategory'] = growth_subcategories.index
growth_subcategories = growth_subcategories.reset_index(drop=True)
growth_subcategories = pd.merge(growth_subcategories, promotion_yes, on='product_subcategory', how='left')
growth_subcategories

Unnamed: 0,product_rating,customer_reviews_count,product_title,product_subcategory,promotion_indicator
0,4.166667,550,12,"Atta, Flours & Sooji",7.0
1,3.985714,781,14,Bakery Snacks,8.0
2,4.079412,10596,204,Cereals & Breakfast,100.0
3,3.875205,31412,609,Chocolates & Biscuits,288.0
4,4.026335,34642,693,Cooking & Baking Needs,391.0
5,4.083992,12997,253,Dairy & Cheese,122.0
6,3.992663,36592,736,Drinks & Beverages,362.0
7,4.05,97,1,Mutton & Lamb,1.0
8,4.23159,12919,239,Oils & Vinegar,116.0
9,4.155976,12260,251,"Pasta, Soup & Noodles",126.0


In [17]:
import plotly.graph_objects as go
required_data = growth_subcategories[['product_rating', 'customer_reviews_count', 'product_title', 'promotion_indicator']]
required_data.columns = ['avg_product_rating', 'total_reviews_count', 'product_count', 'promotion_count']
fig = go.Figure(data=go.Heatmap(
                   z=required_data,
                   x=['avg_product_rating', 'total_reviews_count', 'product_count', 'promotion_count'],
                   y=['avg_product_rating', 'total_reviews_count', 'product_count', 'promotion_count'],
                   colorscale='Greens'))

# Set the title and axis labels
fig.update_layout(
    title='Gourmet & World Food Subcategories features heatmap',
    xaxis=dict(title='X Axis'),
    yaxis=dict(title='Y Axis'))

# Show the plot
fig.show()

In [18]:
import plotly.graph_objects as go

# Sample data for product subcategories
subcategories = growth_subcategories['product_subcategory']
reviews_count = (growth_subcategories['customer_reviews_count']/1000).round(2)

# Sample data for average ratings
ratings = growth_subcategories['product_rating']

# Create the bar chart for reviews count
fig = go.Figure()

fig.add_trace(go.Bar(
    x=subcategories,
    y=reviews_count,
    name='Reviews Count (in thousands)',
    marker=dict(color='#53E8C8'),
    text=reviews_count,  # Add labels to the bars
    textposition='auto'  # Automatically position the labels
))

# Create the line graph for ratings
fig.add_trace(go.Scatter(
    x=subcategories,
    y=ratings,
    name='Average Rating',
    yaxis='y2',
    mode='lines+markers',
    line=dict(color='#0F8C37', width=2),  
    marker=dict(color='#0F8C37', size=10),
    text=[f'{rate:.1f}' for rate in ratings],  # Format ratings to one decimal place
    textposition='top center',  # Position the labels above the data points
    textfont=dict(color='#0F8C37', size=10)
))

# Update layout to have dual y-axes and improve aesthetics
fig.update_layout(
    title='Subcategories for Gourmet & World Food with Reviews and Ratings',
    xaxis=dict(title='Product Subcategories', tickfont=dict(size=14)),
    yaxis=dict(title='Reviews Count (in thousands)', side='left', color='#53E8C8', showgrid=False, tickfont=dict(size=12)),
    yaxis2=dict(title='Average Rating', overlaying='y', side='right', color='#0F8C37', range=[0, 5], showgrid=False, tickfont=dict(size=12)),
    legend=dict(x=1, y=1.3, bgcolor='rgba(255,255,255,0.7)', bordercolor='rgba(0,0,0,0.7)', borderwidth=1),
    plot_bgcolor='rgba(245,245,245,0.9)',  # Light gray background
    margin=dict(l=50, r=50, t=70, b=50)  # Add margins for better readability
)

# Show the plot
fig.show()


In [19]:
growth_subcategories_low_category = data[data['category'] == 'Eggs, Meat & Fish'].groupby(['product_subcategory']).agg({'product_rating':'mean', 'customer_reviews_count':'sum', 'product_title':'count'})
promotion_yes_low_category = data[(data['promotion_indicator'] == 'Yes') & (data['category'] == 'Eggs, Meat & Fish')].groupby('product_subcategory')['promotion_indicator'].count().reset_index()
growth_subcategories_low_category['product_subcategory'] = growth_subcategories_low_category.index
growth_subcategories_low_category = growth_subcategories_low_category.reset_index(drop=True)
growth_subcategories_low_category = pd.merge(growth_subcategories_low_category, promotion_yes_low_category, on='product_subcategory', how='left')
growth_subcategories_low_category

Unnamed: 0,product_rating,customer_reviews_count,product_title,product_subcategory,promotion_indicator
0,4.004412,1608,34,Eggs,16.0
1,3.898106,6936,132,Fish & Seafood,60.0
2,4.0,48,1,Marinades,1.0
3,4.023684,841,19,Mutton & Lamb,8.0
4,2.64,102,5,Pork & Other Meats,
5,4.02327,7610,159,"Sausages, Bacon & Salami",78.0


In [20]:
import plotly.graph_objects as go
required_data_low_category = growth_subcategories_low_category[['product_rating', 'customer_reviews_count', 'product_title', 'promotion_indicator']]
required_data_low_category.columns = ['avg_product_rating', 'total_reviews_count', 'product_count', 'promotion_count']
fig = go.Figure(data=go.Heatmap(
                   z=required_data_low_category,
                   x=['avg_product_rating', 'total_reviews_count', 'product_count', 'promotion_count'],
                   y=['avg_product_rating', 'total_reviews_count', 'product_count', 'promotion_count'],
                   colorscale='Greens'))

# Set the title and axis labels
fig.update_layout(
    title='Low Category Subcategories features heatmap',
    xaxis=dict(title='X Axis'),
    yaxis=dict(title='Y Axis'))

# Show the plot
fig.show()

In [21]:
import plotly.graph_objects as go

# Sample data for product subcategories
subcategories_low_category = growth_subcategories_low_category['product_subcategory']
reviews_count_low_category = (growth_subcategories_low_category['customer_reviews_count']/1000).round(2)

# Sample data for average ratings
ratings_low_category = growth_subcategories_low_category['product_rating']

# Create the bar chart for reviews count
fig = go.Figure()

fig.add_trace(go.Bar(
    x=subcategories_low_category,
    y=reviews_count_low_category,
    name='Reviews Count (in thousands)',
    marker=dict(color='#53E8C8'),
    text=reviews_count_low_category,  # Add labels to the bars
    textposition='auto'  # Automatically position the labels
))

# Create the line graph for ratings
fig.add_trace(go.Scatter(
    x=subcategories_low_category,
    y=ratings_low_category,
    name='Average Rating',
    yaxis='y2',
    mode='lines+markers',
    line=dict(color='#0F8C37', width=2),  
    marker=dict(color='#0F8C37', size=10),
    text=[f'{rate:.1f}' for rate in ratings_low_category],  # Format ratings to one decimal place
    textposition='top center',  # Position the labels above the data points
    textfont=dict(color='#0F8C37', size=10)
))

# Update layout to have dual y-axes and improve aesthetics
fig.update_layout(
    title='Subcategories for Eggs, Meat & Fish with Reviews and Ratings',
    xaxis=dict(title='Product Subcategories', tickfont=dict(size=14)),
    yaxis=dict(title='Reviews Count (in thousands)', side='left', color='#53E8C8', showgrid=False, tickfont=dict(size=12)),
    yaxis2=dict(title='Average Rating', overlaying='y', side='right', color='#0F8C37', range=[0, 5], showgrid=False, tickfont=dict(size=12)),
    legend=dict(x=1, y=1.3, bgcolor='rgba(255,255,255,0.7)', bordercolor='rgba(0,0,0,0.7)', borderwidth=1),
    plot_bgcolor='rgba(245,245,245,0.9)',  # Light gray background
    margin=dict(l=50, r=50, t=70, b=50)  # Add margins for better readability
)

# Show the plot
fig.show()
