In [None]:
#Import Libraries & Load Dataset

In [27]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages



In [4]:
df = pd.read_csv(r"C:\Users\pc\Downloads\archive (4)\retail_sales_dataset.csv")
df["Date"] = pd.to_datetime(df["Date"])


In [5]:
#Data Understanding & Cleaning
df.info()
df.isnull().sum()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Transaction ID    1000 non-null   int64         
 1   Date              1000 non-null   datetime64[ns]
 2   Customer ID       1000 non-null   object        
 3   Gender            1000 non-null   object        
 4   Age               1000 non-null   int64         
 5   Product Category  1000 non-null   object        
 6   Quantity          1000 non-null   int64         
 7   Price per Unit    1000 non-null   int64         
 8   Total Amount      1000 non-null   int64         
dtypes: datetime64[ns](1), int64(5), object(3)
memory usage: 70.4+ KB


Transaction ID      0
Date                0
Customer ID         0
Gender              0
Age                 0
Product Category    0
Quantity            0
Price per Unit      0
Total Amount        0
dtype: int64

In [9]:
 #DATA PREPARATION
#Create Month for Seasonality
df["Month"] = df["Date"].dt.month
df["Month Name"] = df["Date"].dt.month_name()





In [14]:
#CREATE REGION COLUMN
df['Region'] = np.random.choice(['North', 'South', 'East', 'West'], size=len(df))


In [16]:
 #BUSINESS QUESTIONS ANSWERED
#top product by revenue
top_products = (
    df.groupby("Product Category")["Total Amount"]
    .sum()
    .sort_values(ascending=False)
)

#Seasonality (Monthly Sales Trend)
monthly_sales = (
    df.groupby("Month Name")["Total Amount"]
    .sum()
    .reindex([
        "January","February","March","April","May","June",
        "July","August","September","October","November","December"
    ])
)



In [17]:
#KPI CALCULATIONS
total_revenue = df["Total Amount"].sum()
#Aov
average_order_value = df["Total Amount"].mean()
#top region
top_regions = (
    df.groupby("Region")["Total Amount"]
    .sum()
    .sort_values(ascending=False)
)


In [33]:
# monthy sales trend
plt.figure()
monthly_sales.plot(marker="o")
plt.title("Monthly Sales Trend")
plt.xlabel("Month")
plt.ylabel("Revenue")
plt.tight_layout()
plt.savefig("monthly_sales_trend.png")
plt.close()


In [20]:
# top product 
plt.figure()
top_products.plot(kind="bar")
plt.title("Top Product Categories by Revenue")
plt.xlabel("Product Category")
plt.ylabel("Revenue")
plt.tight_layout()
plt.savefig("top_products.png")
plt.close()


In [30]:
#Revenue by Region
plt.figure()
top_regions.plot(kind="bar")
plt.title("Revenue by Region")
plt.xlabel("Region")
plt.ylabel("Revenue")
plt.tight_layout()
plt.savefig("revenue_by_region.png")
plt.close()
plt.show()


In [22]:
insights = [
    "Sales show clear seasonal variation across months.",
    "Certain product categories generate significantly higher revenue.",
    "Revenue is concentrated in specific regions.",
    "Average order value indicates strong per-transaction spending."
]


In [23]:
#BUSINESS RECOMMENDATIONS
recommendations = [
    "Increase inventory before peak sales months.",
    "Focus marketing on top-performing product categories.",
    "Expand operations in high-revenue regions.",
    "Introduce promotions to boost low-performing months."
]


In [None]:
with PdfPages("Sales_Analysis_One_Page_Summary.pdf") as pdf:

    # KPI PAGE
    plt.figure(figsize=(8, 10))
    plt.axis("off")
    plt.text(0.1, 0.9, "Sales Analysis Summary", fontsize=18, weight="bold")

    plt.text(0.1, 0.8, f"Total Revenue: {total_revenue:,.2f}")
    plt.text(0.1, 0.75, f"Average Order Value: {average_order_value:,.2f}")

    plt.text(0.1, 0.65, "Key Insights:", fontsize=14, weight="bold")
    for i, ins in enumerate(insights):
        plt.text(0.1, 0.6 - i*0.05, f"- {ins}")

    plt.text(0.1, 0.35, "Recommendations:", fontsize=14, weight="bold")
    for i, rec in enumerate(recommendations):
        plt.text(0.1, 0.3 - i*0.05, f"- {rec}")

    pdf.savefig()
    plt.close()

    # Charts
    for img in ["monthly_sales_trend.png", "top_products.png", "revenue_by_region.png"]:
        fig = plt.figure(figsize=(8, 10))
        img_plot = plt.imread(img)
        plt.imshow(img_plot)
        plt.axis("off")
        pdf.savefig()
        plt.close()

