In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations
from collections import Counter

In [2]:
df = pd.read_csv('month_yr_store_product_rollup_data.csv')

In [3]:
df.head()

Unnamed: 0,Year,Month,Store_ID,Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price,Store_Name,Store_City,Store_Location,Store_Open_Date,Units,Actual_Product_Price,Revenue,xRevenue,Profit
0,2022,1,1,1,Action Figure,Toys,9.99,15.99,Maven Toys Guadalajara 1,Guadalajara,Residential,1992-09-18,63,15.517358,978.12,1007.37,348.75
1,2022,1,1,2,Animal Figures,Toys,9.99,12.99,Maven Toys Guadalajara 1,Guadalajara,Residential,1992-09-18,7,12.4,86.8,90.93,16.87
2,2022,1,1,4,Chutes & Ladders,Games,9.99,12.99,Maven Toys Guadalajara 1,Guadalajara,Residential,1992-09-18,5,12.98,64.9,64.95,14.95
3,2022,1,1,5,Classic Dominoes,Games,7.99,9.99,Maven Toys Guadalajara 1,Guadalajara,Residential,1992-09-18,11,9.424545,103.67,109.89,15.78
4,2022,1,1,6,Colorbuds,Electronics,6.99,14.99,Maven Toys Guadalajara 1,Guadalajara,Residential,1992-09-18,5,14.98,74.9,74.95,39.95


In [4]:
print("Data Types:\n", df.dtypes)
print("\nSummary Statistics:\n", df.describe())
print("\nMissing Values:\n", df.isnull().sum())

Data Types:
 Year                      int64
Month                     int64
Store_ID                  int64
Product_ID                int64
Product_Name             object
Product_Category         object
Product_Cost            float64
Product_Price           float64
Store_Name               object
Store_City               object
Store_Location           object
Store_Open_Date          object
Units                     int64
Actual_Product_Price    float64
Revenue                 float64
xRevenue                float64
Profit                  float64
dtype: object

Summary Statistics:
                Year         Month      Store_ID    Product_ID  Product_Cost  \
count  22931.000000  22931.000000  22931.000000  22931.000000  22931.000000   
mean    2022.488029      6.003532     25.423401     16.870089     10.555235   
std        0.499868      3.193121     14.517608     10.078692      7.093882   
min     2022.000000      1.000000      1.000000      1.000000      1.990000   
25%     2022

In [4]:
# Assuming df is your dataframe
# Step 1: Group by city, month, category, and store and calculate total revenue
grouped = df.groupby(['Store_City', 'Month', 'Product_Category', 'Store_Name', 'Product_ID', 'Product_Name'])['Revenue'].sum().reset_index()

# Step 2: Sort the data by total revenue in descending order to get the top products
top_products = grouped.sort_values(by='Revenue', ascending=False)

# Step 3: Optionally, you can filter for top N products (e.g., top 5 for each group)
top_n_products = top_products.groupby(['Store_City', 'Month', 'Product_Category', 'Store_Name']).head(5)

# Step 4: Display the result
print(top_n_products)
top_n_products.head()

             Store_City  Month Product_Category  \
6380         Guanajuato      3             Toys   
7433         Hermosillo      4             Toys   
7517         Hermosillo      5             Toys   
3579   Cuidad de Mexico      6             Toys   
9866          Monterrey      5             Toys   
...                 ...    ...              ...   
11508           Pachuca     10     Art & Crafts   
9199           Mexicali     10     Art & Crafts   
13190   San Luis Potosi      2            Games   
2852         Cuernavaca     10     Art & Crafts   
7937         Hermosillo     11     Art & Crafts   

                          Store_Name  Product_ID     Product_Name   Revenue  
6380         Maven Toys Guanajuato 3          18      Lego Bricks  17636.84  
7433         Maven Toys Hermosillo 3          18      Lego Bricks  14216.30  
7517         Maven Toys Hermosillo 3          18      Lego Bricks  12390.11  
3579   Maven Toys Ciudad de Mexico 2          18      Lego Bricks  11161.21

Unnamed: 0,Store_City,Month,Product_Category,Store_Name,Product_ID,Product_Name,Revenue
6380,Guanajuato,3,Toys,Maven Toys Guanajuato 3,18,Lego Bricks,17636.84
7433,Hermosillo,4,Toys,Maven Toys Hermosillo 3,18,Lego Bricks,14216.3
7517,Hermosillo,5,Toys,Maven Toys Hermosillo 3,18,Lego Bricks,12390.11
3579,Cuidad de Mexico,6,Toys,Maven Toys Ciudad de Mexico 2,18,Lego Bricks,11161.21
9866,Monterrey,5,Toys,Maven Toys Monterrey 2,18,Lego Bricks,11124.8


## Top products with profit groupby stores

In [5]:
# Step 1: Group by Store and Product, and calculate total profit
grouped = df.groupby(['Store_Name', 'Product_ID', 'Product_Name'])['Profit'].sum().reset_index()

# Step 2: Sort by total profit in descending order
top_products_by_profit = grouped.sort_values(by='Profit', ascending=False)

# Step 3: Optionally, select the top N products for each store (e.g., top 5)
top_n_products_by_store = top_products_by_profit.groupby('Store_Name').head(3)

# Step 4: Display the result
print(top_n_products_by_store)

                         Store_Name  Product_ID     Product_Name    Profit
308   Maven Toys Ciudad de Mexico 2           6        Colorbuds  43201.56
894           Maven Toys Mexicali 1           6        Colorbuds  37556.75
792         Maven Toys Hermosillo 3           6        Colorbuds  37112.87
566        Maven Toys Guadalajara 3           6        Colorbuds  29378.18
596        Maven Toys Guadalajara 4           6        Colorbuds  28638.75
...                             ...         ...              ...       ...
72            Maven Toys Campeche 2           3  Barrel O' Slime   4192.34
1185            Maven Toys Puebla 1           1    Action Figure   4182.27
122           Maven Toys Chetumal 1          18      Lego Bricks   4148.93
435           Maven Toys Culiacan 1           3  Barrel O' Slime   4047.87
1316          Maven Toys Saltillo 2           8    Deck Of Cards   3460.01

[150 rows x 4 columns]


## Top Products with profit group by store and location

In [6]:
# Step 1: Group by Store, Location, and Product, and calculate total profit
grouped = df.groupby(['Store_Name', 'Store_Location', 'Product_Name'])['Profit'].sum().reset_index()

# Step 2: Sort by total profit in descending order
top_products_by_profit = grouped.sort_values(by='Profit', ascending=False)

# Step 3: Optionally, select the top N products for each store and location (e.g., top 5)
top_n_products_by_store_location = top_products_by_profit.groupby(['Store_Name', 'Store_Location']).head(5)

# Step 4: Display the result
print(top_n_products_by_store_location)

                         Store_Name Store_Location     Product_Name    Profit
308   Maven Toys Ciudad de Mexico 2        Airport        Colorbuds  43201.56
894           Maven Toys Mexicali 1     Commercial        Colorbuds  37556.75
792         Maven Toys Hermosillo 3     Commercial        Colorbuds  37112.87
566        Maven Toys Guadalajara 3        Airport        Colorbuds  29378.18
596        Maven Toys Guadalajara 4       Downtown        Colorbuds  28638.75
...                             ...            ...              ...       ...
1513      Maven Toys Villahermosa 1       Downtown         Dart Gun   2844.44
869             Maven Toys Merida 1       Downtown            Jenga   2837.40
689         Maven Toys Guanajuato 3    Residential  Barrel O' Slime   2789.47
534        Maven Toys Guadalajara 2     Commercial   Animal Figures   2724.86
483            Maven Toys Durango 1       Downtown      Lego Bricks   2488.91

[250 rows x 4 columns]


## Top Products with profit group by store and city

In [7]:
# Step 1: Group by Store, City, and Product, and calculate total profit
grouped = df.groupby(['Store_Name', 'Store_City', 'Product_ID', 'Product_Name'])['Profit'].sum().reset_index()

# Step 2: Sort by total profit in descending order
top_products_by_profit = grouped.sort_values(by='Profit', ascending=False)

# Step 3: Optionally, select the top N products for each store and city (e.g., top 5)
top_n_products_by_store_city = top_products_by_profit.groupby(['Store_Name', 'Store_City']).head(5)

# Step 4: Display the result
print(top_n_products_by_store_city)

                         Store_Name        Store_City  Product_ID  \
308   Maven Toys Ciudad de Mexico 2  Cuidad de Mexico           6   
894           Maven Toys Mexicali 1          Mexicali           6   
792         Maven Toys Hermosillo 3        Hermosillo           6   
566        Maven Toys Guadalajara 3       Guadalajara           6   
596        Maven Toys Guadalajara 4       Guadalajara           6   
...                             ...               ...         ...   
1513      Maven Toys Villahermosa 1      Villahermosa           7   
869             Maven Toys Merida 1            Merida          16   
689         Maven Toys Guanajuato 3        Guanajuato           3   
534        Maven Toys Guadalajara 2       Guadalajara           2   
483            Maven Toys Durango 1           Durango          18   

         Product_Name    Profit  
308         Colorbuds  43201.56  
894         Colorbuds  37556.75  
792         Colorbuds  37112.87  
566         Colorbuds  29378.18  
5

## top products with profit group by month

In [9]:
grouped = df.groupby(['Month', 'Product_Name'])['Profit'].sum().reset_index()

# Step 1: Group by Month and Product, and calculate total profit
grouped = df.groupby(['Month', 'Product_Name'])['Profit'].sum().reset_index()

# Step 2: Sort by total profit in descending order within each month
top_products_by_profit = grouped.sort_values(by=['Month', 'Profit'], ascending=[True, False])

# Step 3: Select the top N products for each month (e.g., top 5)
top_n_products_by_month = top_products_by_profit.groupby('Month').head(5)

# Step 4: Display the result
print(top_n_products_by_month)


     Month     Product_Name    Profit
5        1        Colorbuds  91527.60
0        1    Action Figure  33236.43
7        1    Deck Of Cards  27221.50
12       1    Glass Marbles  20045.44
2        1  Barrel O' Slime  18695.17
38       2        Colorbuds  81641.59
33       2    Action Figure  28992.21
40       2    Deck Of Cards  21106.29
45       2    Glass Marbles  20618.19
35       2  Barrel O' Slime  19000.05
71       3        Colorbuds  88399.01
66       3    Action Figure  40472.63
73       3    Deck Of Cards  25110.26
68       3  Barrel O' Slime  23950.99
79       3    Glass Marbles  22364.32
105      4        Colorbuds  82025.84
100      4    Action Figure  36397.95
107      4    Deck Of Cards  26248.07
102      4  Barrel O' Slime  22365.60
113      4    Glass Marbles  20309.40
139      5        Colorbuds  77472.09
134      5    Action Figure  36166.82
141      5    Deck Of Cards  21945.07
151      5      Lego Bricks  21031.60
150      5  Kids Makeup Kit  19591.06
173      6  

## top products with profit group by year

In [10]:
# Step 1: Group by Year and Product, and calculate total profit
grouped = df.groupby(['Year', 'Product_ID', 'Product_Name'])['Profit'].sum().reset_index()

# Step 2: Sort by total profit in descending order
top_products_by_profit = grouped.sort_values(by=['Year', 'Profit'], ascending=[True,False])

# Step 3: Optionally, select the top N products for each year (e.g., top 5)
top_n_products_by_year = top_products_by_profit.groupby('Year').head(5)

# Step 4: Display the result
print(top_n_products_by_year)


    Year  Product_ID     Product_Name     Profit
5   2022           6        Colorbuds  565469.20
0   2022           1    Action Figure  219270.26
7   2022           8    Deck Of Cards  143764.85
12  2022          14    Glass Marbles   97318.64
16  2022          18      Lego Bricks   97146.44
38  2023           6        Colorbuds  241049.44
35  2023           3  Barrel O' Slime  161509.06
33  2023           1    Action Figure  128555.05
51  2023          19       Magic Sand  108870.85
40  2023           8    Deck Of Cards  108152.53


## Top 5 products with max profit overall

In [11]:
grouped = df.groupby(['Product_ID', 'Product_Name'])['Profit'].sum().reset_index()

# Step 2: Sort by total profit in descending order
top_products_by_profit = grouped.sort_values(by='Profit', ascending=False)

# Step 3: Select the top 5 unique products with the highest profit
top_5_unique_products = top_products_by_profit.head(5)

# Step 4: Display the result
print(top_5_unique_products)

    Product_ID     Product_Name     Profit
5            6        Colorbuds  806518.64
0            1    Action Figure  347825.31
7            8    Deck Of Cards  251917.38
2            3  Barrel O' Slime  217607.83
17          18      Lego Bricks  191680.99
