In [None]:
# Cost-per-usage
merged['cost_per_usage'] = merged['BenefitCost'] / merged['UsageFrequency'].replace(0, np.nan)  # Avoid div by zero

# Normalize and ROI (example: scale 0-1, ROI = normalized satisfaction / normalized cost-per-usage)
merged['norm_cost'] = (merged['cost_per_usage'] - merged['cost_per_usage'].min()) / (merged['cost_per_usage'].max() - merged['cost_per_usage'].min())
merged['norm_sat'] = (merged['SatisfactionScore'] - merged['SatisfactionScore'].min()) / (merged['SatisfactionScore'].max() - merged['SatisfactionScore'].min())
merged['ROI'] = merged['norm_sat'] / (merged['norm_cost'] + 1e-5)  # Avoid div by zero

# By BenefitID/subtype
roi_per_benefit = merged.groupby('BenefitID')['ROI'].mean().reset_index()
roi_per_subtype = merged.groupby('BenefitSubType')['ROI'].mean().reset_index()

# Flag high-cost underutilized (e.g., high cost-per-usage, low usage)
flags = merged[(merged['cost_per_usage'] > merged['cost_per_usage'].quantile(0.75)) & (merged['UsageFrequency'] < merged['UsageFrequency'].quantile(0.25))]

# Quadrant plot (ROI vs Cost-per-Usage)
# Ensure median is computed on pandas Series, not numpy array
cost_per_usage_median = pd.Series(merged['cost_per_usage']).median()
roi_median = pd.Series(merged['ROI']).median()

fig6 = px.scatter(
    merged,
    x='cost_per_usage',
    y='ROI',
    color='BenefitSubType',
    title='ROI Quadrant Plot',
    labels={'cost_per_usage': 'Cost per Usage', 'ROI': 'ROI Score'}
)
fig6.add_hline(y=roi_median, line_dash="dash", annotation_text="Median ROI")
fig6.add_vline(x=cost_per_usage_median, line_dash="dash", annotation_text="Median Cost")
fig6.show()
# Histogram of Cost per Usage
fig7 = px.histogram(
    merged,
    x='cost_per_usage',
    nbins=30,
    color='BenefitSubType',
    title='Distribution of Cost per Usage by Benefit Subtype',
    labels={'cost_per_usage': 'Cost per Usage'}
)
fig7.show()

# Boxplot of Cost per Usage by Benefit Subtype
fig8 = px.box(
    merged,
    x='BenefitSubType',
    y='cost_per_usage',
    color='BenefitSubType',
    title='Cost per Usage by Benefit Subtype',
    labels={'cost_per_usage': 'Cost per Usage', 'BenefitSubType': 'Benefit Subtype'}
)
fig8.show()

# Histogram of ROI
fig9 = px.histogram(
    merged,
    x='ROI',
    nbins=30,
    color='BenefitSubType',
    title='Distribution of ROI by Benefit Subtype',
    labels={'ROI': 'ROI Score'}
)
fig9.show()

# Boxplot of ROI by Benefit Subtype
fig10 = px.box(
    merged,
    x='BenefitSubType',
    y='ROI',
    color='BenefitSubType',
    title='ROI by Benefit Subtype',
    labels={'ROI': 'ROI Score', 'BenefitSubType': 'Benefit Subtype'}
)
fig10.show()

# PDF-style insights
insights_pdf = """
### Cost Efficiency Insights
- **High ROI Benefits:** Subtype 'Health' has avg ROI of X, low cost-per-usage.
- **Flags:** Y benefits (e.g., 'Luxury Wellness') are high-cost & underutilized â€“ consider discontinuation.
- **Recommendations:** Reallocate budget from low-ROI (bottom-left quadrant) to high-ROI areas.
"""
print(insights_pdf)