# Exploratory Data Analysis (EDA)

---
Import Libraries:

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option("display.max_columns", None)
plt.style.use("ggplot")

Load Processed Data:

In [None]:
file_path = ("../Data/03_Processed_Data/Phoenix_Global_Sales_Processed_Data.csv")
df = pd.read_csv(file_path)
df['Date'] = pd.to_datetime(df['Date'])

df.head()

Revenue & Profit Trend Over Time:

In [None]:
# Date is Datetime:-
df['Date'] = pd.to_datetime(df['Date'])

# Create Year_Month Column:-
df['Year_Month'] = df['Date'].dt.to_period('M').astype(str)

# Group and Sum:-
monthly_trend = df.groupby('Year_Month')[['Revenue', 'Profit']].sum()

# Plot:-
monthly_trend.plot(figsize = (12,6))

plt.title("Monthly Revenue & Profit Trend")
plt.xlabel("Year-Month")
plt.ylabel("Amount")
plt.grid(True)
plt.tight_layout()
plt.xticks(rotation = 45)
plt.show()

Profit Margin Trend:

In [None]:
monthly_trend['Profit_Margin_%'] = (monthly_trend['Profit'] / monthly_trend['Revenue']) * 100

monthly_trend['Profit_Margin_%'].plot(figsize = (12,5))
plt.title("Monthly Profit Margin Trend (%)")
plt.ylabel("Profit Margin %")
plt.grid(True)
plt.xticks(rotation = 45)
plt.show()

Country-wise Revenue:

In [None]:
country_revenue = (
    df.groupby('Country')['Revenue']
      .sum()
      .sort_values(ascending=False)
)

country_revenue.plot(kind = 'bar', figsize = (10,5))

plt.title("Revenue by Country")
plt.ylabel("Revenue")
plt.grid(True)
plt.xticks(rotation = 0)
plt.show()

Region-wise Profit:

In [None]:
region_profit = df.groupby('Region')['Profit'].sum().sort_values(ascending = False)

region_profit.plot(kind='bar', figsize = (10,5))
plt.title("Profit by Region")
plt.ylabel("Profit")
plt.grid(True)
plt.xticks(rotation = 0)    
plt.show()

Product Performance:

In [None]:
product_summary = df.groupby('Product_Type')[['Revenue', 'Profit']].sum()

product_summary.plot(kind= 'bar', figsize = (10,5))

plt.title("Revenue & Profit by Product Type")
plt.xticks(rotation = 0)
plt.grid(True)
plt.show()

Sales Channel Comparison:

In [None]:
channel_summary = df.groupby('Sales_Channel')[['Revenue', 'Profit']].sum()
channel_summary.plot(kind = 'bar', figsize = (8,5))

plt.title("Performance by Sales Channel")
plt.xticks(rotation = 0)
plt.grid(True)
plt.show()


Customer Type Analysis:

In [None]:
customer_summary = df.groupby('Customer_Type')['Revenue'].sum()
customer_summary.plot(kind = 'pie', autopct = '%1.1f%%', figsize = (6,6))

plt.title("Revenue Contribution by Customer Type")
plt.show()


Discount vs Profit:

In [None]:
plt.figure(figsize = (8,5))
sns.scatterplot(data = df,
                x = 'Discount_%',
                y = 'Profit',
                alpha = 0.4)

plt.title("Discount vs Profit Relationship")
plt.show()

Delivery Performance Impact:

In [None]:
delivery_profit = df.groupby('Delivery_Performance')['Profit'].mean()

delivery_profit.plot(kind = 'bar', figsize = (8,5))
plt.title("Average Profit by Delivery Speed")
plt.ylabel("Avg Profit")
plt.xticks(rotation = 0)
plt.grid(True)
plt.show()

Top 10 Branches by Revenue:

In [None]:
top_branches = df.groupby('Phoenix_Branch_ID')['Revenue'].sum().sort_values(ascending=False).head(10)

top_branches.plot(kind = 'bar', figsize = (10,5))
plt.title("Top 10 Phoenix Branches by Revenue")
plt.xticks(rotation = 0)
plt.grid(True)
plt.show()

Loss-Making Transactions:

In [None]:
loss_count = (df['Profit'] < 0).sum()
loss_percentage = (loss_count / len(df)) * 100
loss_count, loss_percentage

Profit Margin Distribution:

In [None]:
plt.figure(figsize = (8,5))
sns.histplot(df['Profit_Margin_%'], bins = 50, kde = True)
plt.title("Profit Margin Distribution")
plt.xticks(rotation = 0)
plt.grid(True)
plt.show()

---

# Observations:-
1. **Revenue and Profit show a consistent upward trend over multiple years**, indicating strong business growth for Phoenix.
2. A few countries contribute the majority of total revenue, highlighting key international markets driving business performance.
3. Certain regions outperform others in profitability, suggesting better operational efficiency and market demand in those areas.
4. Among product types, some categories generate significantly higher revenue and profit, making them core revenue drivers for the company.
5. The online sales channel contributes the highest share of revenue compared to retail and partner channels, showing strong digital presence.
6. Returning customers generate a larger portion of revenue compared to new customers, indicating good customer retention.
7. Higher discount levels tend to reduce profit margins, showing that aggressive discounting impacts profitability.
8. Faster delivery performance is associated with higher average profits and better customer ratings, emphasizing logistics efficiency.
9. A small number of Phoenix branches contribute a large portion of total sales, reflecting revenue concentration.
10. Although most transactions are profitable, a noticeable percentage result in losses, highlighting areas for cost control and pricing optimization.

---