In [None]:
#1. import libraries
import pandas as pd
from sqlalchemy import create_engine, text
import json
import psycopg2
import matplotlib.pyplot as plt

In [86]:
#2. Load the database configuration
with open ("db_config.json","r") as file:
    config = json.load(file)

username = config["username"]
password = config["password"]
host = config["host"]
port = config["port"]
database = config["database"]

print ('Succesful loaded the credential!')

Succesful loaded the credential!


In [87]:
#3. Connect to PostgreSQL
engine = create_engine(f'postgresql://{username}:{password}@{host}:{port}/{database}')
print (f'Connected to database {database}')

Connected to database shoes


In [88]:
#4. Query
#1. What are 5 products generated the highest total revenue in 2025?
query = """
select 
	product_name, 
	brand,
	SUM(amount)::numeric(16,2) as revenue 
from 
	sales_shoes 
group by
	product_name, brand 
order by 
	revenue desc 
limit 5;
"""
df_1 = pd.read_sql(query, engine)
display (df_1)

Unnamed: 0,product_name,brand,revenue
0,Off-White Hoodie,Off-White,24214.82
1,Nike Dunk Low,Nike,23634.39
2,Nike Tech Fleece,Nike,23247.12
3,Yeezy Boost 350,Adidas,21658.65
4,Puma Joggers,Puma,21342.02


1. What are 5 products generated the highest total revenue in 2025?

In [89]:
#2. What is the total sales amount by brand?
query = """
select
	brand,
	SUM(amount):: numeric(16,2) as revenue
from 
	sales_shoes
group by
	brand 
order by 
	revenue desc;
"""
df_2 = pd.read_sql(query, engine)
display (df_2)

Unnamed: 0,brand,revenue
0,Nike,67975.58
1,Adidas,36858.43
2,Off-White,24214.82
3,Puma,21342.02
4,Supreme,15647.51
5,New Era,13915.46
6,Essentials,13149.25


2. What is the total sales amount by brand?

In [90]:
#3. Which product types (Sneakers, T_Shirt, Hoodies) are performing the best in each country?
query = """
with revenue_product_type as(
	select 
		country,
		product_type,
		sum (amount) as revenue
	from sales_shoes
	group by
		product_type,
		country
	),
rank_revenue as (
	select 
		*,
		row_number() over(partition by country order by revenue desc) as sales_rank 
	from 
		revenue_product_type
	)
select 
	country,
	product_type,
	revenue::numeric(16,2)	
from 
	rank_revenue where sales_rank = 1
order by 
	product_type, revenue desc;
"""
df_3 = pd.read_sql(query, engine)
display (df_3)

Unnamed: 0,country,product_type,revenue
0,Germany,Hoodie,12784.53
1,USA,Hoodie,12076.48
2,Canada,Hoodie,12030.6
3,Japan,Sneakers,12966.86
4,Australia,Sneakers,11677.69
5,UK,Sneakers,11447.89
6,India,Sneakers,11208.55


3. Which product types (Sneakers, T_Shirt, Hoodies) are performing the best in each country? (Based on revenue)

In [91]:
#4. What is the average unit price per product type, and how does it vary by brand?
query = """
select
	product_type,
	brand,
	avg (unit_price):: numeric(16,2) as avg_price
from 
	sales_shoes
group by 
	product_type, brand
order by 
	product_type, avg_price desc;
"""
df_4 = pd.read_sql(query, engine)
display (df_4)

Unnamed: 0,product_type,brand,avg_price
0,Cap,New Era,166.12
1,Hoodie,Nike,168.83
2,Hoodie,Off-White,164.05
3,Hoodie,Supreme,161.31
4,Joggers,Puma,184.83
5,Sneakers,Nike,193.08
6,Sneakers,Adidas,173.0
7,T-shirt,Essentials,170.34


4. What is the average unit price per product type, and how does it vary by brand?

In [92]:
#5. Which sales transactions contributed the most to total revenue per country?
query = """
with revenue_payment as (
	select
		date,
		country,
		product_name,
		sum (quantity) as total_sold,
		sum (amount) as revenue,
		row_number () over (partition by country order by sum(amount) desc) as rank_transaction
	from
		sales_shoes
	group by 
		date, country,product_name
	)
select 
	date,
	country,
	product_name,
	total_sold,
	revenue::numeric(16,2)
from 
	revenue_payment 
where 
	rank_transaction = 1;
"""
df_5 = pd.read_sql(query, engine)
display (df_5)

Unnamed: 0,date,country,product_name,total_sold,revenue
0,2022-01-14,Australia,Puma Joggers,5.0,1447.35
1,2022-03-26,Canada,Fear of God Essentials Tee,5.0,1338.55
2,2022-04-07,Germany,Supreme Hoodie,5.0,1466.05
3,2022-07-27,India,Jordan 1 High,5.0,1458.5
4,2022-07-17,Japan,Nike Dunk Low,5.0,1350.7
5,2022-03-08,UK,Off-White Hoodie,5.0,1174.2
6,2022-02-07,USA,Off-White Hoodie,8.0,1649.08


5. Which sales transactions contributed the most to total revenue per country?

In [93]:
data_table = [df_1, df_2, df_3, df_4, df_5]
name_table = [1,2,3,4,5]
title = ["Top 5 Products by Total Revenue (2022)","Total Sales Amount by Brand", "Best-Performing Product Types by Country",
         "Average Unit Price by Product Type and Brand","Highest-Revenue Sales Transactions"]
dir_img = r'C:\Users\user\Documents\Portfolio_Vista\Retail_Fashion\data\sales_query'
for i,j,k in zip (data_table,name_table,title):
    fig, ax = plt.subplots(figsize=(len(i.columns) * 3, len(i) * 0.7))
    ax.axis('tight')
    ax.axis('off')

    table = ax.table(
        cellText=i.values,
        colLabels=i.columns,
        loc='center',
        cellLoc='center'
    )
    table.scale(1, 1.5)
    table.auto_set_font_size(False)
    table.set_fontsize(10)

    plt.title(k, fontsize=14, fontweight='bold', pad=20)
    plt.tight_layout()
    plt.savefig(f"{dir_img}\sales_{j}.png", bbox_inches='tight', dpi=300)
    plt.close()


  plt.savefig(f"{dir_img}\sales_{j}.png", bbox_inches='tight', dpi=300)
