# Data Processing & Feature Engineering

---

Import Libraries:

In [None]:
import pandas as pd
import numpy as np

pd.set_option("display.max_columns", None)
pd.set_option("display.float_format", "{:.2f}".format)

Load Cleaned Data:

In [None]:
file_path = ("../Data/02_Clean_Data/Phoenix_Global_Sales_Clean_Data.csv")
df = pd.read_csv(file_path)
df['Date'] = pd.to_datetime(df['Date'])

df.head()

Time-Based Features:

In [None]:
df['Month'] = df['Date'].dt.month
df['Month_Name'] = df['Date'].dt.month_name()
df['Quarter'] = df['Date'].dt.to_period('Q').astype(str)
df['Year_Month'] = df['Date'].dt.to_period('M').astype(str)

Profitability Metrics:

- Profit Margin (%):-

In [None]:
df['Profit_Margin_%'] = np.where(
    df['Revenue'] > 0,
    (df['Profit'] / df['Revenue']) * 100,
    0
)

Revenue & Profit Buckets (Segmentation):

In [None]:
df['Revenue_Category'] = pd.cut(
    df['Revenue'],
    bins = [0, 1000, 3000, 6000, 10000, np.inf],
    labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High']
)

df['Profit_Category'] = pd.cut(
    df['Profit'],
    bins = [-np.inf, 0, 1000, 3000, 5000, np.inf],
    labels = ['Loss', 'Low Profit', 'Medium Profit', 'High Profit', 'Very High Profit']
)

Sales Performance Flag:

In [None]:
df['High_Performance_Sale'] = np.where(
    (df['Profit_Margin_%'] > 20) & (df['Revenue'] > 3000),
    'Yes',
    'No'
)

Delivery Performance Category:

In [None]:
df['Delivery_Performance'] = pd.cut(
    df['Delivery_Days'],
    bins = [0, 3, 7, 14, np.inf],
    labels =['Very Fast', 'Fast', 'Normal', 'Delayed']
)

Discount Impact Category:

In [None]:
df['Discount_Level'] = pd.cut(
    df['Discount_%'],
    bins = [0, 5, 15, 30, 100],
    labels = ['Low', 'Medium', 'High', 'Very High']
)

Branch Sales Volume:

In [None]:
branch_sales = df.groupby('Phoenix_Branch_ID')['Units_Sold'].transform('sum')
df['Branch_Total_Units'] = branch_sales


Customer Value Indicator:

In [None]:
df['Customer_Value'] = np.where(
    (df['Revenue'] > 4000) & (df['Customer_Rating'] >= 4),
    'High Value',
    'Standard'
)

Final Dataset Check:

In [None]:
df.info()
df.head()
df.describe()

Export Processed Dataset:

In [15]:
processed_file_path = ("../Data/03_Processed_Data/Phoenix_Global_Sales_Processed_Data.csv")
df.to_csv(processed_file_path, index = False)

print("Processed data exported successfully!")


Processed data exported successfully!


---

# Observations:-
1. Created Time Dimensions (Month, Quarter, Year-Month).
2. Added Profit Margin KPI.
3. Segmented Revenue and Profit Levels.
4. Created Performance Indicators.
5. Categorized Delivery Speed.
6. Added Discount Impact Levels.
7. Prepared Analytical-Ready Dataset.
8. Exported Processed CSV.

---