In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations
from collections import Counter

In [2]:
df = pd.read_csv('month_yr_store_rollup_data.csv')

## The dataset seems to offer a regional view of performance, showing how different cities and types of store locations (residential, commercial, downtown) impact the overall business results.


In [3]:
df.head()

Unnamed: 0,Year,Month,Store_ID,Store_Name,Store_City,Store_Location,Store_Open_Date,Units,nUnique_Products,Actual_Product_Price,Revenue,xRevenue,Profit
0,2022,1,1,Maven Toys Guadalajara 1,Guadalajara,Residential,1992-09-18,563,20,14.612684,8352.78,8546.87,1694.91
1,2022,1,2,Maven Toys Monterrey 1,Monterrey,Residential,1995-04-27,854,16,13.763446,11814.6,12083.46,3521.64
2,2022,1,3,Maven Toys Guadalajara 2,Guadalajara,Commercial,1999-12-27,993,16,13.863232,13526.77,13871.57,3960.2
3,2022,1,4,Maven Toys Saltillo 1,Saltillo,Downtown,2000-01-01,959,17,13.886959,13629.73,13889.41,4284.32
4,2022,1,5,Maven Toys La Paz 1,La Paz,Downtown,2001-05-31,710,18,14.498741,9735.14,10012.4,2783.74


In [4]:
print("Data Types:\n", df.dtypes)
print("\nSummary Statistics:\n", df.describe())
print("\nMissing Values:\n", df.isnull().sum())

Data Types:
 Year                      int64
Month                     int64
Store_ID                  int64
Store_Name               object
Store_City               object
Store_Location           object
Store_Open_Date          object
Units                     int64
nUnique_Products          int64
Actual_Product_Price    float64
Revenue                 float64
xRevenue                float64
Profit                  float64
dtype: object

Summary Statistics:
               Year        Month     Store_ID        Units  nUnique_Products  \
count  1050.000000  1050.000000  1050.000000  1050.000000       1050.000000   
mean   2022.428571     5.857143    25.500000  1038.633333         21.839048   
std       0.495107     3.198035    14.437746   363.042820          4.145564   
min    2022.000000     1.000000     1.000000   334.000000         12.000000   
25%    2022.000000     3.000000    13.000000   786.000000         19.000000   
50%    2022.000000     6.000000    25.500000   987.000000    

#Top stores by profit

In [5]:
# Group by 'Store_Name' and calculate the maximum profit for each store
max_profit_stores = df.groupby('Store_Name')['Profit'].max().reset_index()

# Sort the stores by 'Profit' in descending order to get the top stores
top_5_stores_by_max_profit = max_profit_stores.sort_values(by='Profit', ascending=False).head(5)

# Print the result
print(top_5_stores_by_max_profit)

                       Store_Name   Profit
9   Maven Toys Ciudad de Mexico 2  9539.09
31         Maven Toys Monterrey 3  8141.11
17       Maven Toys Guadalajara 3  7634.83
10  Maven Toys Ciudad de Mexico 3  7455.90
11  Maven Toys Ciudad de Mexico 4  7375.58


## Top Stores by revenue

In [7]:
# Group by 'Store_Name' and calculate the maximum revenue for each store
max_revenue_stores = df.groupby('Store_Name')['Revenue'].max().reset_index()

# Sort the stores by 'Revenue' in descending order to get the top stores
top_5_stores_by_max_revenue = max_revenue_stores.sort_values(by='Revenue', ascending=False).head(5)

# Print the result
print(top_5_stores_by_max_revenue)

                       Store_Name   Revenue
9   Maven Toys Ciudad de Mexico 2  36191.65
17       Maven Toys Guadalajara 3  32207.34
43            Maven Toys Toluca 1  30357.11
8   Maven Toys Ciudad de Mexico 1  28607.37
30         Maven Toys Monterrey 2  27842.28


## Top stores by profit, month

In [8]:
# Group by 'Store_Name' and 'Month', then calculate the maximum profit for each combination
max_profit_by_month = df.groupby(['Store_Name', 'Month'])['Profit'].max().reset_index()

# Sort the results by 'Profit' in descending order to get the top stores
top_5_stores_by_max_profit_month = max_profit_by_month.sort_values(by='Profit', ascending=False).head(5)

# Print the result
print(top_5_stores_by_max_profit_month)

                        Store_Name  Month   Profit
110  Maven Toys Ciudad de Mexico 2      3  9539.09
108  Maven Toys Ciudad de Mexico 2      1  9518.24
112  Maven Toys Ciudad de Mexico 2      5  9293.04
114  Maven Toys Ciudad de Mexico 2      7  9076.49
111  Maven Toys Ciudad de Mexico 2      4  9028.05


#Top stores by profit, year

In [9]:
# Group by 'Store_Name' and 'Year', then calculate the maximum profit for each combination
max_profit_by_year = df.groupby(['Store_Name', 'Year'])['Profit'].max().reset_index()

# Sort the results by 'Profit' in descending order to get the top stores
top_5_stores_by_max_profit_year = max_profit_by_year.sort_values(by='Profit', ascending=False).head(5)

# Print the result
print(top_5_stores_by_max_profit_year)

                       Store_Name  Year   Profit
19  Maven Toys Ciudad de Mexico 2  2023  9539.09
18  Maven Toys Ciudad de Mexico 2  2022  8371.92
63         Maven Toys Monterrey 3  2023  8141.11
35       Maven Toys Guadalajara 3  2023  7634.83
34       Maven Toys Guadalajara 3  2022  7615.61


## Top stores by revenue, month

In [10]:
# Group by 'Store_Name' and 'Month', then calculate the maximum revenue for each combination
max_revenue_by_month = df.groupby(['Store_Name', 'Month'])['Revenue'].max().reset_index()

# Sort the results by 'Profit' in descending order to get the top stores
top_5_stores_by_max_revenue_month = max_revenue_by_month.sort_values(by='Revenue', ascending=False).head(5)

# Print the result
print(top_5_stores_by_max_revenue_month)

                        Store_Name  Month   Revenue
114  Maven Toys Ciudad de Mexico 2      7  36191.65
112  Maven Toys Ciudad de Mexico 2      5  32234.08
210       Maven Toys Guadalajara 3      7  32207.34
111  Maven Toys Ciudad de Mexico 2      4  31748.68
110  Maven Toys Ciudad de Mexico 2      3  31358.32


## Top stores by revenue, year

In [11]:
# Group by 'Store_Name' and 'Year', then calculate the maximum revenue for each combination
max_revenue_by_year = df.groupby(['Store_Name', 'Year'])['Revenue'].max().reset_index()

# Sort the results by 'Profit' in descending order to get the top stores
top_5_stores_by_max_revenue_year = max_revenue_by_year.sort_values(by='Revenue', ascending=False).head(5)

# Print the result
print(top_5_stores_by_max_profit_month)

                        Store_Name  Month   Profit
110  Maven Toys Ciudad de Mexico 2      3  9539.09
108  Maven Toys Ciudad de Mexico 2      1  9518.24
112  Maven Toys Ciudad de Mexico 2      5  9293.04
114  Maven Toys Ciudad de Mexico 2      7  9076.49
111  Maven Toys Ciudad de Mexico 2      4  9028.05


## Stores with maximum profit city wise

In [12]:
max_profit_by_city = df.groupby(['Store_Name', 'Store_City'])['Profit'].max().reset_index()

# Sort the results by 'Profit' in descending order to get the top stores
top_5_stores_by_max_profit_city = max_profit_by_city.sort_values(by='Profit', ascending=False).head(5)

# Print the result
print(top_5_stores_by_max_profit_city)

                       Store_Name        Store_City   Profit
9   Maven Toys Ciudad de Mexico 2  Cuidad de Mexico  9539.09
31         Maven Toys Monterrey 3         Monterrey  8141.11
17       Maven Toys Guadalajara 3       Guadalajara  7634.83
10  Maven Toys Ciudad de Mexico 3  Cuidad de Mexico  7455.90
11  Maven Toys Ciudad de Mexico 4  Cuidad de Mexico  7375.58


## Stores with max profit location wise

In [13]:
max_profit_by_location = df.groupby(['Store_Name', 'Store_Location'])['Profit'].max().reset_index()

# Sort the results by 'Profit' in descending order to get the top stores
top_5_stores_by_max_profit_location= max_profit_by_location.sort_values(by='Profit', ascending=False).head(5)

# Print the result
print(top_5_stores_by_max_profit_location)

                       Store_Name Store_Location   Profit
9   Maven Toys Ciudad de Mexico 2        Airport  9539.09
31         Maven Toys Monterrey 3        Airport  8141.11
17       Maven Toys Guadalajara 3        Airport  7634.83
10  Maven Toys Ciudad de Mexico 3    Residential  7455.90
11  Maven Toys Ciudad de Mexico 4     Commercial  7375.58


## Stores with max revenue city wise

In [16]:
max_revenue_by_city = df.groupby(['Store_Name', 'Store_City'])['Revenue'].max().reset_index()

# Sort the results by 'Revenue' in descending order to get the top stores
top_5_stores_by_max_revenue_city = max_revenue_by_city.sort_values(by='Revenue', ascending=False).head(5)

# Print the result
print(top_5_stores_by_max_revenue_city)

                       Store_Name        Store_City   Revenue
9   Maven Toys Ciudad de Mexico 2  Cuidad de Mexico  36191.65
17       Maven Toys Guadalajara 3       Guadalajara  32207.34
43            Maven Toys Toluca 1            Toluca  30357.11
8   Maven Toys Ciudad de Mexico 1  Cuidad de Mexico  28607.37
30         Maven Toys Monterrey 2         Monterrey  27842.28


## Stores with max revenue location wise

In [17]:
max_revenue_by_location = df.groupby(['Store_Name', 'Store_Location'])['Revenue'].max().reset_index()

# Sort the results by 'Revenue' in descending order to get the top stores
top_5_stores_by_max_revenue_location = max_revenue_by_location.sort_values(by='Revenue', ascending=False).head(5)

# Print the result
print(top_5_stores_by_max_revenue_location)

                       Store_Name Store_Location   Revenue
9   Maven Toys Ciudad de Mexico 2        Airport  36191.65
17       Maven Toys Guadalajara 3        Airport  32207.34
43            Maven Toys Toluca 1       Downtown  30357.11
8   Maven Toys Ciudad de Mexico 1       Downtown  28607.37
30         Maven Toys Monterrey 2       Downtown  27842.28
