In [11]:
import pandas as pd

# Load the dataset
df = pd.read_csv(r'C:\Users\TEJASWINI\Desktop\ecommerce.csv')
# Calculate return statistics for each product
product_stats = df.groupby('Product_ID').agg(
    total_orders=('Order_ID', 'count'),
    returns=('Return_Status', lambda x: (x == 'Returned').sum())
)
product_stats['product_return_rate'] = product_stats['returns'] / product_stats['total_orders']

# Calculate return statistics for each product category
category_stats = df.groupby('Product_Category').agg(
    total_orders=('Order_ID', 'count'),
    returns=('Return_Status', lambda x: (x == 'Returned').sum())
)
category_stats['category_return_rate'] = category_stats['returns'] / category_stats['total_orders']

# Merge product and category stats back to original data
df = df.merge(product_stats['product_return_rate'], on='Product_ID', how='left')
df = df.merge(category_stats['category_return_rate'], on='Product_Category', how='left')

# Define risk flags for each order
df['high_risk_product'] = df['product_return_rate'] >= 0.3  # 30% return rate threshold for product
df['high_risk_category'] = df['category_return_rate'] >= 0.25  # 25% return rate threshold for category
df['high_value_order'] = (df['Product_Price'] * df['Order_Quantity']) >= 500  # $500+ order value threshold
df['high_risk_order'] = df['high_risk_product'] | df['high_risk_category'] | df['high_value_order']

# Add additional risk indicators
df['fast_return'] = (df['Days_to_Return'] <= 7) & (df['Return_Status'] == 'Returned')  # Returned within 7 days
df['defective_return'] = (df['Return_Reason'] == 'Defective') & (df['Return_Status'] == 'Returned')

# Save the enhanced dataset with risk flags
df.to_csv('order_risk_analysis.csv', index=False)

print("Order-level risk analysis completed.")
print(f"Dataset with risk flags saved to 'order_risk_analysis.csv'")
print("\nRisk Summary:")
print(f"- {df['high_risk_product'].sum()} orders with high-risk products")
print(f"- {df['high_risk_category'].sum()} orders in high-risk categories")
print(f"- {df['high_value_order'].sum()} high-value orders")
print(f"- {df['high_risk_order'].sum()} total high-risk orders identified")
print(f"- {df['fast_return'].sum()} fast returns (within 7 days)")
print(f"- {df['defective_return'].sum()} defective product returns")

Order-level risk analysis completed.
Dataset with risk flags saved to 'order_risk_analysis.csv'

Risk Summary:
- 5052 orders with high-risk products
- 10000 orders in high-risk categories
- 5480 high-value orders
- 10000 total high-risk orders identified
- 2565 fast returns (within 7 days)
- 1327 defective product returns
