# Retail Sales Analysis 2025

This notebook analyzes a small retail transactions dataset and produces:
1) Top products by total sales
2) Monthly sales trend

**Stack**: Python, Pandas, Matplotlib.
Dataset path: `data/sales.csv`

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# Always display all columns for exploration
pd.set_option('display.max_columns', None)


In [None]:
# Load dataset
DATA_PATH = Path('..') / 'data' / 'sales.csv'  # relative to this notebook
df = pd.read_csv(DATA_PATH)
print("Shape:", df.shape)
df.head()

## Basic Data Cleaning

In [None]:
# Ensure expected columns exist
expected_cols = {'TransactionID','Product','Month','Quantity','Price','Total'}
missing = expected_cols - set(df.columns)
if missing:
    raise ValueError(f"Missing columns: {missing}")

# Drop duplicates, if any
df = df.drop_duplicates(subset=['TransactionID'])

# Sanity checks
assert (df['Quantity'] > 0).all(), "Quantity must be positive"
assert (df['Price'] > 0).all(), "Price must be positive"
df.describe(include='all')

## Top 10 Products by Total Sales

In [None]:
product_sales = df.groupby('Product')['Total'].sum().sort_values(ascending=False)
top10 = product_sales.head(10)
top10

In [None]:
plt.figure(figsize=(10,5))
top10.plot(kind='bar')
plt.title('Top 10 Products by Total Sales')
plt.ylabel('Total Sales')
plt.xlabel('Product')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
# Save figure
out_path = Path('..') / 'dashboard' / 'sales_top10.png'
plt.savefig(out_path)
plt.show()
print('Saved:', out_path)

## Monthly Sales Trend

In [None]:
monthly = df.groupby('Month')['Total'].sum().sort_index()
monthly

In [None]:
plt.figure(figsize=(10,5))
monthly.plot(kind='line', marker='o')
plt.title('Monthly Sales Trend')
plt.ylabel('Total Sales')
plt.xlabel('Month')
plt.xticks(range(1,13))
plt.grid(True)
plt.tight_layout()
# Save figure
out_path2 = Path('..') / 'dashboard' / 'sales_trends.png'
plt.savefig(out_path2)
plt.show()
print('Saved:', out_path2)

## Quick Insights

In [None]:
best_product = product_sales.idxmax()
best_product_value = product_sales.max()
best_month = monthly.idxmax()
best_month_value = monthly.max()

print(f"Best-selling product: {best_product} with total sales of {best_product_value:.2f}")
print(f"Peak month: {best_month} with total sales of {best_month_value:.2f}")