In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import utils.loader as DATA
import utils.col_refs as REF

Loaded 50000 benefits usage data!
Loaded 50000 employees benefits usage data!


In [2]:
# cost per usage by benefit 
data = {REF.benefitID: list(), REF.benefit_cost: list(), REF.usage_freq: list(), "UsageCost": list()}
for benefitID in DATA.BENEFITS[REF.benefitID]: 
    cost = DATA.EMPLOYEES_BENEFITS_USAGE.groupby(REF.benefitID).get_group(benefitID).reset_index()[REF.benefit_cost].sum()
    usage = DATA.EMPLOYEES_BENEFITS_USAGE.groupby(REF.benefitID).get_group(benefitID).reset_index()[REF.usage_freq].sum()
    usage_cost = cost / usage

    data[REF.benefitID].append(benefitID)
    data[REF.benefit_cost].append(cost)
    data[REF.usage_freq].append(usage)
    data["UsageCost"].append(usage_cost)
cost_analysis = pd.DataFrame(data)
cost_analysis.to_csv("data/cost_analysis.csv", index=False)
print(cost_analysis)

    BenefitID  BenefitCost  UsageFrequency   UsageCost
0           1   1470280.38            5721  256.997095
1           2   1143105.81            5267  217.031671
2           3    551525.00            5503  100.222606
3           4    459088.64            6105   75.198794
4           5    127200.00            5644   22.537208
5           6   1008969.84            5721  176.362496
6           7    762750.00            5682  134.239704
7           8    206250.00            5412   38.109756
8           9   1422050.00            5684  250.184729
9          10   1479415.68            5244  282.115881
10         11   1520363.75            5385  282.333101
11         12   1336563.13            5417  246.734933
12         13    106795.00            5304   20.134804
13         14    120237.04            5237   22.959145
14         15    581201.78            5354  108.554684
15         16    145087.80            5709   25.413873
16         17    820536.50            5474  149.897059
17        

In [3]:
benefits_data = pd.read_csv("data/benefits_data.csv")

In [5]:
# Merge cost_analysis with benefits_data on BenefitID
merged_costs = cost_analysis.merge(
    benefits_data,
    on="BenefitID",      # join key
    how="left",          # keep all rows from cost_analysis
    suffixes=('', '_benefits')  # avoid duplicate column name conflicts
)

print(merged_costs.head())
print("Shape:", merged_costs.shape)

   BenefitID  BenefitCost  UsageFrequency   UsageCost         BenefitType  \
0          1   1470280.38            5721  256.997095     Retirement Plan   
1          2   1143105.81            5267  217.031671    Health Insurance   
2          3    551525.00            5503  100.222606   Commuter Benefits   
3          4    459088.64            6105   75.198794     Retirement Plan   
4          5    127200.00            5644   22.537208  Technology Stipend   

               BenefitSubType  BenefitCost_benefits  
0         401k Basic Matching                876.21  
1              PPO Individual                706.93  
2             Transit Subsidy                325.00  
3      401k High Contribution                261.44  
4  Monthly Internet Allowance                 75.00  
Shape: (30, 7)


In [6]:
import plotly.express as px

# Use the merged dataframe with cost info (merged_costs)
fig = px.pie(
    merged_costs,
    values="BenefitCost", 
    names="BenefitSubType",  # slice labels
    hover_data=["BenefitType","BenefitSubType","BenefitCost"],
    title="Benefit Cost Distribution by SubType"
)

# Format hover template to show name and cost nicely
fig.update_traces(
    hovertemplate="<b>%{label}</b><br>Cost: $%{value:,.2f}<br>Type: %{customdata[0]}"
)

fig.show()


In [8]:
# Aggregate BenefitCost by BenefitType
benefit_type_costs = merged_costs.groupby("BenefitType", as_index=False)["BenefitCost"].sum()

# Create interactive pie chart
fig = px.pie(
    benefit_type_costs,
    values="BenefitCost", 
    names="BenefitType", 
    hover_data=["BenefitCost"],
    title="Benefit Cost Distribution by Benefit Type"
)

# Format hover template
fig.update_traces(
    hovertemplate="<b>%{label}</b><br>Total Cost: $%{value:,.2f}"
)

fig.show()

In [9]:
merged_costs.to_csv("data/cost_analysis.csv", index=False)
