In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the cleaned data file
cleaned_file_path = 'Pet memorial data (clean).csv'
try:
    df_pet_memorial_cleaned = pd.read_csv(cleaned_file_path)
except FileNotFoundError:
    print(f"The file {cleaned_file_path} does not exist. Please run the data cleaning process and try again.")
    df_pet_memorial_cleaned = pd.DataFrame()  # Create an empty DataFrame to avoid further errors if the file is missing

if not df_pet_memorial_cleaned.empty:
    # Step 1: Data Analysis
    # Descriptive Statistics
    print("Descriptive Statistics for Price, Deal (Ignoring Deal of 0):")
    df_pet_memorial_filtered = df_pet_memorial_cleaned[df_pet_memorial_cleaned['Deal'] > 0]
    print(df_pet_memorial_filtered[['Price', 'Deal']].describe())

    # Sales Performance Analysis
    # Top products by monthly sales
    top_sales = df_pet_memorial_filtered.sort_values(by='Deal', ascending=False).head(10)
    print("Top 10 Products by Monthly Sales (Ignoring Deal of 0):")
    print(top_sales[['Title', 'Deal']])

     # Price Distribution (Ignoring Ratings of 0)
    plt.figure(figsize=(12, 8))
    sns.histplot(df_pet_memorial_filtered['Price'], bins=10, kde=True, color='skyblue')
    plt.title('Price Distribution (Ignoring Deal of 0)', fontsize=16)
    plt.xlabel('Price', fontsize=14)
    plt.ylabel('Frequency', fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()


    # Price vs. Monthly Sales Scatter Plot (Ignoring Deal of 0)
    plt.figure(figsize=(12, 8))
    sns.scatterplot(data=df_pet_memorial_filtered, x='Price', y='Deal',hue='Deal', palette='viridis', sizes=(20, 200), alpha=0.6)
    plt.title('Price vs. Deal (Ignoring Deal of 0)', fontsize=16)
    plt.xlabel('Price', fontsize=14)
    plt.ylabel('Deal', fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.legend(title='Deal', fontsize=12, title_fontsize=14, loc='upper right')
    plt.grid(axis='both', linestyle='--', alpha=0.7)
    plt.show()

else:
    print("The DataFrame is empty. No further processing will be done.")

# Categorize products based on keywords in the title
def categorize_product(title):
    title = title.lower()
    if any(keyword in title for keyword in ['骨灰', '盒', '葬','祭','罐']):
        return 'Urn & Keepsake Box'
    elif any(keyword in title for keyword in ['框', '图', '画','照片','相','影']):
        return 'Photo Frame'
    elif any(keyword in title for keyword in ['石', '墓', '碑', '雕', '塑']):
        return 'Memorial Stone & Statue'
    elif any(keyword in title for keyword in ['宝石', '链', '戒', '牙','坠','环']):
        return 'Memorial Jewelry'
    elif any(keyword in title for keyword in ['爪', '爪印', '玻璃', '灯', '铃','挂件','亚克力','公仔','装饰','模']):
        return 'Memorial Decoration'
    else:
        # If no other categories match, classify as 'Other'
        return 'Other'

# Apply categorization function to the DataFrame
df_pet_memorial_cleaned['Category'] = df_pet_memorial_cleaned['Title'].apply(categorize_product)

# Analyze Product Categories
category_counts = df_pet_memorial_cleaned['Category'].value_counts().reset_index()
category_counts.columns = ['Category', 'Count']

# Plot the Distribution of Product Categories
plt.figure(figsize=(12, 8))
sns.barplot(data=category_counts, x='Category', y='Count', hue='Category', palette='Oranges_r', dodge=False, legend=False)
plt.title('Distribution of Pet Memorial Product Categories on Taobao', fontsize=16)
plt.xlabel('Category', fontsize=14)
plt.ylabel('Number of Products', fontsize=14)
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig('pet_memorial_category_distribution.png')
plt.show()

# Display the top categories in a table format
print("Top Pet Memorial Product Categories by Number of Listings:")
print(category_counts.head(10))
