In [None]:
# 1. Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 2. Load the Dataset
df = pd.read_csv('shopping_trends.csv')

# 3. Data Exploration
print("First 5 rows of the dataset:")
print(df.head())

print("\nDataset Information:")
print(df.info())

print("\nMissing Values:")
print(df.isnull().sum())

# 4. Basic Statistics
print("\nStatistical Summary:")
print(df.describe(include='all'))

# 5. Data Cleaning
# Remove duplicates
df.drop_duplicates(inplace=True)

# Fill missing values (example: with mode for categorical, median for numerical)
for column in df.columns:
    if df[column].dtype == 'object':
        df[column].fillna(df[column].mode()[0], inplace=True)
    else:
        df[column].fillna(df[column].median(), inplace=True)

# Verify missing values are handled
print("\nMissing Values after handling:")
print(df.isnull().sum())

# 6. Data Visualization

# Gender Distribution
plt.figure(figsize=(6, 4))
sns.countplot(x='Gender', data=df, palette='viridis')
plt.title('Gender Distribution')
plt.show()

# Purchase Amount Distribution
plt.figure(figsize=(8, 5))
sns.histplot(df['Purchase Amount (USD)'], bins=30, kde=True, color='blue')
plt.title('Purchase Amount Distribution')
plt.xlabel('Purchase Amount (USD)')
plt.show()

# Purchases by Category
plt.figure(figsize=(10, 6))
sns.countplot(y='Category', data=df, order=df['Category'].value_counts().index, palette='coolwarm')
plt.title('Number of Purchases by Category')
plt.show()

# Top Locations by Purchase Count
plt.figure(figsize=(10, 6))
top_locations = df['Location'].value_counts().head(10)
sns.barplot(x=top_locations.values, y=top_locations.index, palette='magma')
plt.title('Top 10 Locations by Purchase Count')
plt.xlabel('Number of Purchases')
plt.show()

# Purchase Frequency by Season
plt.figure(figsize=(8, 5))
sns.countplot(x='Season', data=df, palette='Set2')
plt.title('Purchases by Season')
plt.show()

# 7. Insights Summary (printed directly)
print("\nKey Insights:")
print("- Gender distribution shows the majority of customers are [insert gender based on analysis].")
print("- Most purchases occur in the [insert season] season.")
print("- The [insert category] category is the most popular among customers.")
print("- Locations like [insert top locations] contribute significantly to overall sales.")

