## 3. Category-Based Rating and Price Analysis

### Step 1: Import Libraries and Load Data

In [None]:
import pandas as pd
import plotly.express as px
import numpy as np

# Load dataset (replace 'dataset.csv' with your file path)
df = pd.read_csv('dataset.csv')

# Display dataset overview
print("Dataset Overview:")
print(df.info())
print("\nSample Data:")
print(df.head())


### Step 2: Create Treemap Visualization

In [None]:
# Initial Treemap

# Create treemap for category-based analysis
fig = px.treemap(
    df,
    path=['Category'],  # Group by category
    values='Price',     # Size of blocks determined by price
    color='Rating',     # Color based on ratings
    title="Category-Based Price and Rating Analysis",
    color_continuous_scale='viridis'
)
fig.update_traces(textinfo='label+value+percent entry')
fig.show()

### Step 3: Feedback Loop for Transformation

In [None]:
# Handle Outliers and Inconsistencies

# Identify potential issues in Price column (e.g., extreme values)
print("\nPrice Summary:")
print(df['Price'].describe())

# Provide user options for transformation
print("\nFeedback Options:")
print("1. Remove outliers in the 'Price' column (above 99th percentile).")
print("2. Normalize the 'Price' column (min-max scaling).")
print("3. Proceed without transformation.")

# User input
choice = int(input("Enter your choice (1, 2, or 3): "))

if choice == 1:
    # Remove outliers in Price
    price_cap = df['Price'].quantile(0.99)
    df = df[df['Price'] <= price_cap]
    print(f"Outliers removed. Price capped at 99th percentile: {price_cap:.2f}")
elif choice == 2:
    # Normalize Price column
    min_price = df['Price'].min()
    max_price = df['Price'].max()
    df['Price'] = (df['Price'] - min_price) / (max_price - min_price)
    print(f"Price column normalized (min: {min_price:.2f}, max: {max_price:.2f}).")
elif choice == 3:
    print("Proceeding without transformation.")
else:
    print("Invalid choice. No transformations applied.")

### Step 4: Re-Visualize Treemap Post-Transformation

In [None]:
# Create updated treemap
fig = px.treemap(
    df,
    path=['Category'],  # Group by category
    values='Price',     # Size of blocks determined by price
    color='Rating',     # Color based on ratings
    title="Updated Category-Based Price and Rating Analysis",
    color_continuous_scale='plasma'
)
fig.update_traces(textinfo='label+value+percent entry')
fig.show()

### Step 5: Sunburst Visualization (Optional Alternative)

In [None]:
# Create sunburst chart
fig = px.sunburst(
    df,
    path=['Category'],  # Group by category
    values='Price',     # Size of blocks determined by price
    color='Rating',     # Color based on ratings
    title="Category-Based Price and Rating Analysis (Sunburst)",
    color_continuous_scale='magma'
)
fig.update_traces(textinfo='label+percent parent+percent entry')
fig.show()
