# Importing necessary libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# Load datasets

In [None]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')


# Data Cleaning

In [None]:
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

# Check for missing values

In [None]:
print("Missing values in Customers:", customers.isnull().sum())
print("Missing values in Products:", products.isnull().sum())
print("Missing values in Transactions:", transactions.isnull().sum())


# Merge datasets for analysis

In [None]:
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')


# EDA: Customer Analysis

In [None]:
customer_region_count = customers['Region'].value_counts()
print("Customer distribution by region:\n", customer_region_count)


# EDA: Product Analysis

In [None]:
top_products = merged_data.groupby('ProductName')['Quantity'].sum().sort_values(ascending=False).head(10)
avg_price_category = products.groupby('Category')['Price'].mean()

# EDA: Transaction Analysis

In [None]:
monthly_sales = merged_data.groupby(merged_data['TransactionDate'].dt.to_period('M'))['TotalValue'].sum()
high_value_customers = merged_data.groupby('CustomerID')['TotalValue'].sum().sort_values(ascending=False).head(10)


# Visualization: Customer Distribution by Region

In [None]:
plt.figure(figsize=(8, 5))
customer_region_count.plot(kind='bar', color='skyblue', title='Customer Distribution by Region')
plt.ylabel('Number of Customers')
plt.xlabel('Region')
plt.show()


# Visualization: Monthly Sales Trends

In [None]:
plt.figure(figsize=(10, 6))
monthly_sales.plot(kind='line', marker='o', title='Monthly Sales Trends', color='green')
plt.ylabel('Total Sales Value')
plt.xlabel('Month')
plt.grid()
plt.show()

# Visualization: Correlation Heatmap (Numerical Variables)

In [None]:
correlation_matrix = merged_data[['Quantity', 'TotalValue', 'Price']].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()