In [1]:
pip install pandas matplotlib seaborn



In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Set styles
sns.set(style="whitegrid")

# Create output folder for visualizations
output_dir = "superstore_charts"
os.makedirs(output_dir, exist_ok=True)

# Load data
df = pd.read_csv("superstore.csv", encoding='ISO-8859-1')

# Preprocessing
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Year'] = df['Order Date'].dt.year
df['Month'] = df['Order Date'].dt.to_period("M")

# 1. Sales Over Time
plt.figure(figsize=(12, 6))
monthly_sales = df.groupby('Month')['Sales'].sum()
monthly_sales.plot()
plt.title("Monthly Sales Trend")
plt.ylabel("Sales ($)")
plt.xlabel("Month")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(f"{output_dir}/01_monthly_sales_trend.png")
plt.close()

# 2. Sales by Category
plt.figure(figsize=(8, 6))
category_sales = df.groupby('Category')['Sales'].sum().sort_values()
sns.barplot(x=category_sales.values, y=category_sales.index)
plt.title("Total Sales by Category")
plt.xlabel("Sales ($)")
plt.tight_layout()
plt.savefig(f"{output_dir}/02_sales_by_category.png")
plt.close()

# 3. Profit by Region
plt.figure(figsize=(8, 6))
region_profit = df.groupby('Region')['Profit'].sum().sort_values()
sns.barplot(x=region_profit.values, y=region_profit.index, palette="coolwarm")
plt.title("Profit by Region")
plt.xlabel("Profit ($)")
plt.tight_layout()
plt.savefig(f"{output_dir}/03_profit_by_region.png")
plt.close()

# 4. Top 10 Products by Sales
plt.figure(figsize=(10, 6))
top_products = df.groupby('Product Name')['Sales'].sum().nlargest(10)
sns.barplot(x=top_products.values, y=top_products.index)
plt.title("Top 10 Products by Sales")
plt.xlabel("Sales ($)")
plt.tight_layout()
plt.savefig(f"{output_dir}/04_top10_products.png")
plt.close()

# 5. Discount vs Profit (Scatter Plot)
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='Discount', y='Profit', alpha=0.5)
plt.title("Discount vs Profit")
plt.xlabel("Discount")
plt.ylabel("Profit")
plt.tight_layout()
plt.savefig(f"{output_dir}/05_discount_vs_profit.png")
plt.close()

# 6. Sales by Segment and Region (Heatmap)
pivot_table = pd.pivot_table(df, values='Sales', index='Region', columns='Segment', aggfunc='sum')
plt.figure(figsize=(8, 6))
sns.heatmap(pivot_table, annot=True, fmt=".0f", cmap="YlGnBu")
plt.title("Sales by Segment and Region")
plt.tight_layout()
plt.savefig(f"{output_dir}/06_segment_region_heatmap.png")
plt.close()

print(f"✅ All charts saved in the folder: {output_dir}")



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=region_profit.values, y=region_profit.index, palette="coolwarm")


✅ All charts saved in the folder: superstore_charts
