### Connect to PostgreSQL and Load Data

In [1]:
import pandas as pd
from sqlalchemy import create_engine

# PostgreSQL connection
engine = create_engine("postgresql+psycopg2://postgres:new_password@localhost:5432/Superstore_db")


In [2]:

# Load tables into Pandas
fact_sales = pd.read_sql("SELECT * FROM fact_sales;", engine)
dim_customer = pd.read_sql("SELECT * FROM dim_customer;", engine)
dim_product = pd.read_sql("SELECT * FROM dim_product;", engine)
dim_region = pd.read_sql("SELECT * FROM dim_region;", engine)
dim_date = pd.read_sql("SELECT * FROM dim_date;", engine)
# Display the first few rows of each table
print(fact_sales.head())

         order_id  order_date   ship_date customer_id       product_id  \
0  CA-2016-152156  2016-11-08  2016-11-11    CG-12520  FUR-BO-10001798   
1  CA-2016-152156  2016-11-08  2016-11-11    CG-12520  FUR-CH-10000454   
2  CA-2016-138688  2016-06-12  1900-01-01    DV-13045  OFF-LA-10000240   
3  CA-2016-161389  2016-12-05  2016-12-10    IM-15070  OFF-BI-10003656   
4  US-2015-108966  2015-10-11  1900-01-01    SO-20335  FUR-TA-10000577   

   region_id     sales  quantity  discount    profit  order_date_id  \
0          7  261.9600         2      0.00   41.9136            1.0   
1          7  731.9400         3      0.00  219.5820            1.0   
2          8   14.6200         2      0.00    6.8714            2.0   
3          8  407.9760         3      0.20  132.5922            6.0   
4          7  957.5775         5      0.45 -383.0310            3.0   

   ship_date_id  
0         584.0  
1         584.0  
2        1238.0  
3         108.0  
4        1238.0  


### Merge Dimensions with Fact Table

In [3]:
# Merge dimensions for complete analysis
df = fact_sales \
    .merge(dim_customer, on="customer_id", how="left") \
    .merge(dim_product, on="product_id", how="left") \
    .merge(dim_region, on="region_id", how="left") \
    .merge(dim_date, left_on="order_date", right_on="date", how="left")


###  Generate Summary Metrics

In [4]:
# Total Sales & Profit
total_sales = df['sales'].sum()
total_profit = df['profit'].sum()
total_quantity = df['quantity'].sum()

print(f"Total Sales: {total_sales}")
print(f"Total Profit: {total_profit}")
print(f"Total Quantity Sold: {total_quantity}")

# Top 10 Customers by Sales
top_customers = df.groupby('customer_id')['sales'].sum().sort_values(ascending=False).head(10)
print(top_customers)

# Top 10 Products by Sales
top_products = df.groupby('product_name')['sales'].sum().sort_values(ascending=False).head(10)
print(top_products)

# Regional Sales & Profit
region_summary = df.groupby('region')[['sales','profit']].sum().sort_values('sales', ascending=False)
print(region_summary)

# Monthly Trend
monthly_trend = df.groupby(['year','month'])[['sales','profit']].sum().reset_index()
print(monthly_trend)

# Quarterly Trend
quarterly_trend = df.groupby(['year','quarter'])[['sales','profit']].sum().reset_index()
print(quarterly_trend)
# Yearly Trend
yearly_trend = df.groupby('year')[['sales','profit']].sum().reset_index()
print(yearly_trend)
# Save the cleaned DataFrame to a new CSV file
df.to_csv("sales_analysis_complete_data.csv", index=False)  

Total Sales: 4594401.7206
Total Profit: 572794.0434
Total Quantity Sold: 75746
customer_id
SM-20320    50086.100
TC-20980    38104.436
RB-19360    30234.678
TA-21385    29191.240
AB-10105    28947.142
KL-16645    28350.458
SC-20095    28284.668
HL-15040    25746.596
SE-20110    24418.876
CC-12370    24258.144
Name: sales, dtype: float64
product_name
Canon imageCLASS 2200 Advanced Copier                                          123199.648
Fellowes PB500 Electric Punch Plastic Comb Binding Machine with Manual Bind     54906.768
Cisco TelePresence System EX90 Videoconferencing Unit                           45276.960
HON 5400 Series Task Chairs for Big and Tall                                    43741.152
GBC DocuBind TL300 Electric Binding System                                      39646.958
GBC Ibimaster 500 Manual ProClick Binding System                                38049.000
Hewlett Packard LaserJet 3310 Copier                                            37679.372
HP Designjet T520 

### Save Summary to CSV

In [5]:
top_customers.to_csv("top_customers.csv", index=True)
top_products.to_csv("top_products.csv", index=True)
region_summary.to_csv("region_summary.csv", index=True)
monthly_trend.to_csv("monthly_trend.csv", index=False)
quarterly_trend.to_csv("quarterly_trend.csv", index=False)
