In [10]:
#1. import libraries
import pandas as pd
from sqlalchemy import create_engine, text
import json
import psycopg2
import matplotlib.pyplot as plt

In [11]:
#2. Load the database configuration
with open (r"C:\Users\user\Documents\Portfolio_Vista\Retail_Fashion\scripts\db_config.json","r") as file:
    config = json.load(file)

username = config["username"]
password = config["password"]
host = config["host"]
port = config["port"]
database = config["database"]

print ('Succesful loaded the credential!')

Succesful loaded the credential!


In [12]:
#3. Connect to PostgreSQL
engine = create_engine(f'postgresql://{username}:{password}@{host}:{port}/{database}')
print (f'Connected to database {database}')

Connected to database shoes


In [13]:
#4. Query
#1. Which regions show the highest revenue growth potential for marketing campaigns?
query = """
select 
	country,
	sum (amount) as total_revenue
from
	sales_shoes
group by
	country
order by 
	total_revenue desc;
"""
df_1 = pd.read_sql(query, engine)
display (df_1)

Unnamed: 0,country,total_revenue
0,Japan,31587.39
1,Canada,30851.19
2,Germany,29822.89
3,UK,29739.25
4,USA,26095.59
5,Australia,24094.02
6,India,20912.74


In [14]:
#2. How do payment modes correlate with high-value purchases?
# Assumption high purchase value is more than 1000 euro
query = """
select
	payment_mode,
	count (payment_mode) as transaction_high_value
from
	sales_shoes
where 
	amount >= 1000
group by
	payment_mode
order by
	transaction_high_value desc;
"""
df_2 = pd.read_sql(query, engine)
display (df_2)

Unnamed: 0,payment_mode,transaction_high_value
0,Card,16
1,Wallet,14
2,Cash on Delivery,7
3,UPI,5


In [15]:
#3. Which top 5 products could benefit from targeted campaigns based on gender?
query = """
with gender as (
	select
		gender,
		product_name,
		sum (quantity) as total_sold,
		row_number () over (partition by gender order by sum (quantity) desc) as rank_sold_qty
	from 
		sales_shoes
	group by 
		gender, product_name
	)
select 
	*
from
	gender
where 
	rank_sold_qty <=5;
"""
df_3 = pd.read_sql(query, engine)
display (df_3)

Unnamed: 0,gender,product_name,total_sold,rank_sold_qty
0,Men,Off-White Hoodie,60.0,1
1,Men,Nike Tech Fleece,51.0,2
2,Men,Adidas Ultraboost,46.0,3
3,Men,Supreme Hoodie,46.0,4
4,Men,Jordan 1 High,45.0,5
5,Unisex,Yeezy Boost 350,38.0,1
6,Unisex,Nike Tech Fleece,36.0,2
7,Unisex,Puma Joggers,33.0,3
8,Unisex,Off-White Hoodie,32.0,4
9,Unisex,Jordan 1 High,31.0,5


In [16]:
#4. Are there trends indicating emerging popular products that Nike could push in campaigns?
query = """
with nike_products as(
	select
		extract (month from date) as month_sales,
		product_name,
		brand,
		sum (quantity) as cur_sold
	from
		sales_shoes
	where 
		brand = 'Nike'
	group by 
		month_sales,
		product_name,
		brand
	order by 
		product_name, month_sales
	),
prev_sold as (
	select
		*,
		lag (cur_sold,1,0) over (partition by product_name order by month_sales) as prev_sold
	from 
		nike_products
	)
select 
	*,
	round((cur_sold - prev_sold)/nullif(prev_sold,0) * 100,2) as percent_growth
from
	prev_sold;
"""
df_4 = pd.read_sql(query, engine)
display (df_4)

Unnamed: 0,month_sales,product_name,brand,cur_sold,prev_sold,percent_growth
0,1.0,Jordan 1 High,Nike,15.0,0.0,
1,2.0,Jordan 1 High,Nike,11.0,15.0,-26.67
2,3.0,Jordan 1 High,Nike,12.0,11.0,9.09
3,4.0,Jordan 1 High,Nike,11.0,12.0,-8.33
4,5.0,Jordan 1 High,Nike,9.0,11.0,-18.18
5,6.0,Jordan 1 High,Nike,28.0,9.0,211.11
6,7.0,Jordan 1 High,Nike,16.0,28.0,-42.86
7,8.0,Jordan 1 High,Nike,11.0,16.0,-31.25
8,1.0,Nike Dunk Low,Nike,24.0,0.0,
9,2.0,Nike Dunk Low,Nike,3.0,24.0,-87.5


In [None]:
data_table = [df_1, df_2, df_3, df_4]
name_table = [1,2,3,4,5]
titles =  [
    "Regions with Highest Revenue Growth Potential",
    "Correlation Between Payment Modes and High-Value Purchases",
    "Products Suitable for Targeted Marketing Campaigns",
    "Emerging Product Trends for Nike Campaign Opportunities"
]

dir_img = r'C:\Users\user\Documents\Portfolio_Vista\Retail_Fashion\data\marketing_promotions_query'
for i,j,k in zip (data_table,name_table,titles):
    fig, ax = plt.subplots(figsize=(len(i.columns) * 3, len(i) * 0.7))
    ax.axis('tight')
    ax.axis('off')

    table = ax.table(
        cellText=i.values,
        colLabels=i.columns,
        loc='center',
        cellLoc='center'
    )
    table.scale(1, 1.5)
    table.auto_set_font_size(False)
    table.set_fontsize(10)

    plt.title(k, fontsize=14, fontweight='bold', pad=20)
    plt.tight_layout()
    plt.savefig(f"{dir_img}\marketing_promotions_{j}.png", bbox_inches='tight', dpi=300)
    plt.close()


  plt.savefig(f"{dir_img}\marketing_promotions_{j}.png", bbox_inches='tight', dpi=300)
