## Imports

In [118]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

## Processing

In [119]:
data = pd.read_csv('Jan-24 project.csv')

In [120]:
data.isnull().sum()

product_id                   0
product_title                1
category                     0
product_subcategory          0
brand                        1
selling_price                0
original_price               0
product_type                 0
product_rating            8626
product_description        115
availability_status          0
customer_reviews_count       0
seasonal_indicator           0
promotion_indicator          0
shipping_weight              0
bundle_indicator             0
customer_demographics        0
dtype: int64

In [121]:
# print the row where product_title is null
print(data[data['product_title'].isnull()])

       product_id product_title   category product_subcategory          brand  \
14363       14364           NaN  Beverages              Coffee  Cothas Coffee   

       selling_price  original_price   product_type  product_rating  \
14363          200.0           240.0  Ground Coffee             4.2   

                                     product_description availability_status  \
14363  Cothas Specialty Blend Coffee and Chicory incl...            In Stock   

       customer_reviews_count seasonal_indicator promotion_indicator  \
14363                      57             Spring                  No   

       shipping_weight bundle_indicator customer_demographics  
14363         3.614885           Bundle                 Other  


In [122]:
print(data[data['brand'].isnull()])

      product_id          product_title              category  \
9765        9766  Food Package - Medium  Cleaning & Household   

           product_subcategory brand  selling_price  original_price  \
9765  Disposables, Garbage Bag   NaN           50.0            50.0   

                   product_type  product_rating product_description  \
9765  Aluminium Foil, Clingwrap             NaN                 NaN   

     availability_status  customer_reviews_count seasonal_indicator  \
9765            In Stock                      57             Spring   

     promotion_indicator  shipping_weight bundle_indicator  \
9765                 Yes         3.076136           Bundle   

     customer_demographics  
9765                  Male  


In [123]:
# Replace the null values with 'Unknown' where the null values are less like 1 or 2
data['brand'].fillna('Unknown', inplace=True)
data['product_title'].fillna('Unknown', inplace=True)

In [124]:
data.isnull().sum()

product_id                   0
product_title                0
category                     0
product_subcategory          0
brand                        0
selling_price                0
original_price               0
product_type                 0
product_rating            8626
product_description        115
availability_status          0
customer_reviews_count       0
seasonal_indicator           0
promotion_indicator          0
shipping_weight              0
bundle_indicator             0
customer_demographics        0
dtype: int64

In [125]:
# data.to_csv('Jan-24 project refined.csv', index=False)

# Analysis

In [126]:
data = pd.read_csv('Jan-24 project refined.csv')

In [127]:
# To analyze brand performance, we'll focus on market share, brand loyalty, and customer engagement metrics.
# We'll start with calculating the market share of each brand.

# Calculate total sales for each brand
brand_sales = data.groupby('brand')['selling_price'].sum()

# Calculate total market sales to determine market share
total_market_sales = brand_sales.sum()

# Calculate market share for each brand
market_share = (brand_sales / total_market_sales) * 100

# For brand loyalty, we'll look at average product rating and number of reviews per brand
brand_loyalty_metrics = data.groupby('brand').agg({
    'product_rating': 'mean',  # Average rating per brand
    'customer_reviews_count': 'sum'  # Total reviews per brand
})

# Merge market share data with brand loyalty metrics
brand_performance = pd.DataFrame({
    'Market Share (%)': market_share,
    'Average Rating': brand_loyalty_metrics['product_rating'],
    'Total Reviews Count': brand_loyalty_metrics['customer_reviews_count']
}).sort_values(by='Market Share (%)', ascending=False)

# Display the top 10 brands by market share to understand their performance
brand_performance.head(10)


Unnamed: 0_level_0,Market Share (%),Average Rating,Total Reviews Count
brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Prestige,1.554986,3.439623,3064
Dkny,1.286839,5.0,1057
bb Royal,1.283418,4.047122,27360
BB Home,1.125708,4.106936,21552
Hawkins,1.089891,3.841379,1763
bb Combo,0.904005,4.025,8363
Ajmal,0.861577,3.417778,3603
DP,0.823633,4.2,11382
Huggies,0.749925,4.119149,3561
StBotanica,0.712656,3.876562,4723


In [128]:
import plotly.express as px

# For the market share visualization, we'll use the previously calculated market share data
# Let's take the top 10 brands for a clearer visualization
top_brands_market_share = brand_performance.head(10).reset_index()

# Create a pie chart for market share of the top 10 brands
market_share_fig = px.pie(top_brands_market_share, 
                          values='Market Share (%)', 
                          names='brand', 
                          title='Market Share of Top 10 Brands')

market_share_fig.show()


In [129]:


# Considering the top brands for the radar chart to avoid clutter
top_brands = brand_performance.head(8)

# Normalizing the data for radar chart representation
normalized_top_brands = (top_brands - top_brands.min()) / (top_brands.max() - top_brands.min())

# Create a radar chart
fig = go.Figure()

fig.add_trace(go.Scatterpolar(
    r=normalized_top_brands['Market Share (%)'].tolist(),
    theta=normalized_top_brands.index.tolist(),
    fill='toself',
    name='Market Share (%)'
))

fig.add_trace(go.Scatterpolar(
    r=normalized_top_brands['Average Rating'].tolist(),
    theta=normalized_top_brands.index.tolist(),
    fill='toself',
    name='Average Rating'
))

fig.add_trace(go.Scatterpolar(
    r=normalized_top_brands['Total Reviews Count'].tolist(),
    theta=normalized_top_brands.index.tolist(),
    fill='toself',
    name='Total Reviews Count'
))

# Set the chart details
fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, 1]
        )
    ),
    title="Comparison of Top Brands across Market Share, Average Rating, and Total Reviews"
)

fig.show()


In [130]:
import plotly.express as px

# Reset the index if 'brand' is the index, to use it as a column
brand_performance_reset = brand_performance.reset_index()

fig = px.scatter(brand_performance_reset.head(15),
                 x='Average Rating',
                 y='Total Reviews Count',
                 size='Market Share (%)',
                 hover_name='brand',  # Use the brand name for hover information
                 color='Market Share (%)',  # Color points by market share
                 size_max=60)

fig.update_layout(
    title='Brand Performance: Market Share, Average Rating, and Total Reviews',
    xaxis_title='Average Rating',
    yaxis_title='Total Reviews Count'
)

fig.show()


In [131]:
import plotly.graph_objects as go

# Assuming 'brand_performance' is the DataFrame with brands as the index
brands = brand_performance.head(20).index.tolist()
average_rating = brand_performance['Average Rating'].tolist()
total_reviews = brand_performance['Total Reviews Count'].tolist()

# Create figure with secondary y-axis
fig = go.Figure()

# Add bar chart for Total Reviews Count
fig.add_trace(go.Bar(
    x=brands,
    y=total_reviews,
    name='Total Reviews Count',
    marker_color='rgb(55, 83, 109)'
))

# Add line chart for Average Rating on secondary y-axis
fig.add_trace(go.Scatter(
    x=brands,
    y=average_rating,
    name='Average Rating',
    marker_color='rgb(26, 118, 255)',
    yaxis='y2'
))

# Create layout with secondary y-axis
fig.update_layout(
    title='Brand Performance: Reviews and Ratings',
    xaxis_title='Brand',
    yaxis=dict(
        title='Total Reviews Count',
        titlefont=dict(color='rgb(55, 83, 109)'),
        tickfont=dict(color='rgb(55, 83, 109)')
    ),
    yaxis2=dict(
        title='Average Rating',
        titlefont=dict(color='rgb(26, 118, 255)'),
        tickfont=dict(color='rgb(26, 118, 255)'),
        overlaying='y',
        side='right'
    ),
    legend_title='Metric'
)

fig.show()


Prestige has the highest market share at approximately 1.55%, with an average product rating of 3.44 and a total of 3,064 reviews.
Dkny follows with a market share of around 1.29%, a perfect average rating of 5.00, but with fewer total reviews (1,057), indicating high customer satisfaction but potentially a smaller customer base or fewer product listings.
bb Royal has a significant market share at 1.28%, with a good average rating of 4.05 and a high number of total reviews (27,360), suggesting strong brand loyalty and customer engagement.
BB Home holds a market share of 1.13%, with an average rating of 4.11 and 21,552 total reviews, showing good customer satisfaction and engagement.
Other brands like Hawkins, bb Combo, Ajmal, DP, Huggies, and StBotanica also show notable market shares and customer engagement metrics.

In [132]:
# To delve deeper into the analysis, we'll segment the data based on product categories, customer demographics,
# and consider the impact of promotion indicators on brand performance.

# Segmenting data by product category and customer demographics
category_segmentation = data.groupby(['category', 'customer_demographics']).agg({
    'product_rating': 'mean',
    'customer_reviews_count': 'sum'
}).sort_values(by='customer_reviews_count', ascending=False)

# Display top segments by sales within product categories and demographics
category_segmentation.head(10)


Unnamed: 0_level_0,Unnamed: 1_level_0,product_rating,customer_reviews_count
category,customer_demographics,Unnamed: 2_level_1,Unnamed: 3_level_1
Beauty & Hygiene,Male,3.93053,137541
Beauty & Hygiene,Female,3.91889,126643
Beauty & Hygiene,Other,3.942361,126621
Gourmet & World Food,Male,3.950323,79989
Gourmet & World Food,Other,3.959651,78250
Gourmet & World Food,Female,4.040804,78248
"Kitchen, Garden & Pets",Other,3.723516,63694
"Kitchen, Garden & Pets",Male,3.706242,58209
"Kitchen, Garden & Pets",Female,3.775245,56968
Snacks & Branded Foods,Female,3.97455,49125


In [133]:
import plotly.express as px

# Use a formal theme like 'plotly_white' or 'plotly_dark' for a professional look
px.defaults.template = "plotly_white"

fig = px.treemap(
    category_segmentation.reset_index(), 
    path=['category', 'customer_demographics'], 
    values='customer_reviews_count',
    color='product_rating',
    color_continuous_scale='Blues',  # Using a blue color scale for a more formal appearance
    title='Product Ratings and Reviews Count by Category and Demographics'
)

# Customize hover template to display detailed information
fig.update_traces(
    hovertemplate="<b>Category:</b> %{label}<br>" +
                  "<b>Demographics:</b> %{parent}<br>" +
                  "<b>Reviews Count:</b> %{value}<br>" +
                  "<b>Average Rating:</b> %{color:.2f}<br>"
)

# Apply additional layout enhancements for a formal look
fig.update_layout(
    hoverlabel=dict(bgcolor="white", font_size=12, font_family="Arial"),
    paper_bgcolor='rgba(247, 247, 247, 1)',
    plot_bgcolor='rgba(247, 247, 247, 1)'
)

fig.show()


# The size of each segment represents the total number of customer reviews, highlighting which categories and demographics are most active in terms of reviewing products.
# The color indicates the average product rating, providing insight into how satisfied customers are in each segment.

Promotion Impact Analysis
Looking at the promotion impact, we can observe how promotions influence brand performance for selected brands:

Sumeru has higher sales when products are on promotion (₹2,334.70) compared to when not (₹1,324.00), with a slight decrease in average product rating during promotions.
Mother Dairy also shows increased sales with promotions (₹1,905.00) versus without (₹519.55), and maintains a relatively high product rating, suggesting effective promotional strategies.
For moha, sales are almost equal with and without promotions, but the data lacks product ratings.
Marshmallow and mJOY show varied performance with promotions, with mJOY achieving a perfect average rating during promotions.


Promotion Impact Analysis (Continued)
Sumeru shows a significant increase in sales when products are on promotion, with sales rising from 1,324 to 2,334.70 units, and a slight decrease in average product rating from 3.84 to 3.45, suggesting that promotions drive sales despite a small drop in customer satisfaction.
Mother Dairy also sees an increase in sales with promotions, from 519.55 to 1,905 units, and maintains a relatively high product rating, indicating that promotions are effective in boosting sales while keeping customer satisfaction high.
For brands like moha and mJOY, the data is limited regarding the product rating in some cases, but sales figures suggest that promotions have a notable impact.

In [134]:
# Analyzing the impact of promotion on brand performance
promotion_impact = data.groupby(['brand', 'promotion_indicator']).agg({
    'product_rating': 'mean',
    'customer_reviews_count': 'sum'
}).sort_values(by=['brand'], ascending=False)

# Display the impact of promotions on top brands
promotion_impact.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,product_rating,customer_reviews_count
brand,promotion_indicator,Unnamed: 2_level_1,Unnamed: 3_level_1
sumeru,Yes,3.45,499
sumeru,No,3.842857,370
mother dairy,Yes,4.171429,356
mother dairy,No,4.116667,240
moha,Yes,,88
moha,No,,114
marshmallow,Yes,3.8,21
marshmallow,No,4.4,48
mJOY,Yes,5.0,38
mJOY,No,,75


In [135]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Assuming promotion_impact is already defined as per your code
promotion_impact.reset_index(inplace=True)

# Filter the top 10 brands for a cleaner visualization
top_brands = promotion_impact['brand'].value_counts().head(10).index
filtered_data = promotion_impact[promotion_impact['brand'].isin(top_brands)]

# Create a subplot with 1 row and 2 columns
fig = make_subplots(rows=1, cols=2, subplot_titles=('Average Product Rating', 'Total Customer Reviews'))

# First subplot for average product rating
for indicator in ['Yes', 'No']:
    subset = filtered_data[filtered_data['promotion_indicator'] == indicator]
    fig.add_trace(
        go.Bar(x=subset['brand'], y=subset['product_rating'], name=f'Promotion {indicator}'),
        row=1, col=1
    )

# Second subplot for total customer reviews
for indicator in ['Yes', 'No']:
    subset = filtered_data[filtered_data['promotion_indicator'] == indicator]
    fig.add_trace(
        go.Bar(x=subset['brand'], y=subset['customer_reviews_count'], name=f'Promotion {indicator}'),
        row=1, col=2
    )

# Update layout for a formal theme and add hover info
fig.update_layout(
    title_text="Impact of Promotion on Brand Performance",
    template="plotly_white",
    hovermode="closest"
)

fig.update_traces(
    hovertemplate="<b>%{x}</b><br>Avg Rating: %{y:.2f}<br>Total Reviews: %{customdata}<extra></extra>"
)

fig.show()


In [137]:
# Analyzing how different customer demographics respond to promotions within each category
demographic_promotion_response = data.groupby(['category', 'customer_demographics', 'promotion_indicator']).agg({
    'customer_reviews_count': 'sum'  # Total reviews
}).sort_values(by=['category', 'customer_demographics'], ascending=False)

# Display the results to understand the impact of promotions across different demographics and categories
demographic_promotion_response.head(20)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,customer_reviews_count
category,customer_demographics,promotion_indicator,Unnamed: 3_level_1
Snacks & Branded Foods,Other,No,22144
Snacks & Branded Foods,Other,Yes,25158
Snacks & Branded Foods,Male,No,22228
Snacks & Branded Foods,Male,Yes,21726
Snacks & Branded Foods,Female,No,24861
Snacks & Branded Foods,Female,Yes,24264
"Kitchen, Garden & Pets",Other,No,31505
"Kitchen, Garden & Pets",Other,Yes,32189
"Kitchen, Garden & Pets",Male,No,28114
"Kitchen, Garden & Pets",Male,Yes,30095


In [144]:
import plotly.express as px

# Assuming demographic_promotion_response is already defined
data_for_plot = demographic_promotion_response.head(30).reset_index()

# Create a facet grid bar chart
fig = px.bar(
    data_for_plot,
    x='promotion_indicator',
    y='customer_reviews_count',
    facet_col='category',  # Or 'customer_demographics' to switch the main comparison
    color='customer_demographics',  # Or 'category' to switch the color coding
    title='Impact of Promotions on Customer Reviews by Category and Demographics',
    height=600,  # Adjust height to accommodate all facets
    width=1200  # Adjust width for readability
)

fig.update_layout(
    xaxis_title='Promotion Indicator',
    yaxis_title='Total Customer Reviews'
)

fig.show()


Snacks & Branded Foods
Other demographics show a significant increase in sales during promotions, from 56,650.67 to 69,336.69 units, indicating a strong response to discounts or promotional activities.
Both Male and Female segments exhibit similar trends, with sales slightly decreasing or remaining stable during promotions, suggesting that promotions may not significantly impact their purchasing behavior in this category.

Kitchen, Garden & Pets
All demographic groups (Other, Male, and Female) show an increase in sales during promotions, with the Other segment having the most substantial jump from 332,468.71 to 315,704.10 units. This indicates a high responsiveness to promotions in this category across all demographics.

Gourmet & World Food
The Male demographic shows an increase in sales during promotions, from 234,431.83 to 264,283.43 units, suggesting that promotional activities effectively attract this group.
The Female and Other demographics also respond well to promotions, with sales increasing in both groups.

Fruits & Vegetables
Although the numbers are relatively small compared to other categories, there is a slight increase in sales for the Other demographic during promotions, suggesting that promotions can positively impact sales even in less responsive categories.

Customer Reviews
The number of customer reviews tends to increase during promotions across most categories and demographics, indicating higher engagement and interest in promoted products.

In [None]:
# Retry the analysis focusing on the impact of bundling on sales, customer reviews, and product ratings
bundle_analysis = data.groupby(['brand', 'bundle_indicator']).agg({
    'customer_reviews_count': 'sum',  # Total reviews
    'product_rating': 'mean'  # Average product rating
}).sort_values(by=['brand'], ascending=False)

bundle_analysis.head(20)


Unnamed: 0_level_0,Unnamed: 1_level_0,selling_price,customer_reviews_count,product_rating
brand,bundle_indicator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
sumeru,Individual,2053.9,453,3.616667
sumeru,Bundle,1604.8,416,3.644444
mother dairy,Individual,1738.75,289,4.157143
mother dairy,Bundle,685.8,307,4.133333
moha,Bundle,368.0,114,
moha,Individual,364.0,88,
marshmallow,Bundle,150.0,21,3.8
marshmallow,Individual,150.0,48,4.4
mJOY,Individual,1258.6,113,5.0
kwality walls,Bundle,1184.65,555,3.7125


In [147]:
import plotly.express as px

# Assuming bundle_analysis is already defined
data_for_plot = bundle_analysis.head(100).reset_index()

# Create the scatter plot
fig = px.scatter(
    data_for_plot,
    x='customer_reviews_count',
    y='product_rating',
    color='bundle_indicator',  # Differentiate between bundled and individual
    symbol='bundle_indicator',  # Use different symbols for bundled and individual
    size='customer_reviews_count',  # Optionally represent review count as size
    hover_name='brand',  # Show brand name on hover
    title='Impact of Bundling on Customer Reviews and Product Rating'
)

fig.update_layout(
    xaxis_title='Total Customer Reviews',
    yaxis_title='Average Product Rating'
)

fig.show()
