In [1]:
! pip install dash

Collecting dash
  Downloading dash-3.1.1-py3-none-any.whl.metadata (10 kB)
Collecting Flask<3.2,>=1.0.4 (from dash)
  Downloading flask-3.1.1-py3-none-any.whl.metadata (3.0 kB)
Collecting Werkzeug<3.2 (from dash)
  Downloading werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)
Collecting importlib-metadata (from dash)
  Downloading importlib_metadata-8.7.0-py3-none-any.whl.metadata (4.8 kB)
Collecting retrying (from dash)
  Downloading retrying-1.4.0-py3-none-any.whl.metadata (7.5 kB)
Collecting blinker>=1.9.0 (from Flask<3.2,>=1.0.4->dash)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting click>=8.1.3 (from Flask<3.2,>=1.0.4->dash)
  Downloading click-8.2.1-py3-none-any.whl.metadata (2.5 kB)
Collecting itsdangerous>=2.2.0 (from Flask<3.2,>=1.0.4->dash)
  Downloading itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)
Collecting zipp>=3.20 (from importlib-metadata->dash)
  Downloading zipp-3.23.0-py3-none-any.whl.metadata (3.6 kB)
Downloading dash-3.1.1-py3-non

In [2]:
import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html, Input, Output
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [4]:
pip install openpyxl


Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [openpyxl]1/2[0m [openpyxl]
[1A[2KSuccessfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.


#### Load Dataset

In [5]:
file_path = "Provider Sample DataSet MRP 2025 Spring.xlsx"
df = pd.read_excel(file_path, sheet_name="MetricData")

#### Data Cleaning

In [6]:
df.columns = df.columns.str.strip().str.replace(" ", "_").str.lower()
df["reportingperiodstartdate"] = pd.to_datetime(df["reportingperiodstartdate"], errors="coerce")
df["reportingperiodenddate"] = pd.to_datetime(df["reportingperiodenddate"], errors="coerce")

# Drop missing values
df = df.dropna(subset=["value", "specialty"])

### Machine Learning - Clustering Physicians by Workload

In [None]:
# Initialize Dash App
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Physician Burnout Analysis Dashboard"),

    # Dropdown for Specialty Selection
    html.Label("Select Specialty:"),
    dcc.Dropdown(
        id="specialty-dropdown",
        options=[{"label": spec, "value": spec} for spec in df["specialty"].unique()],
        value=df["specialty"].unique()[0],
        multi=False
    ),

    # Line Chart
    dcc.Graph(id="trend-graph"),

    # Scatter Plot
    dcc.Graph(id="cluster-graph")
])

# Callbacks to update charts
@app.callback(
    Output("trend-graph", "figure"),
    Input("specialty-dropdown", "value")
)
def update_trend_chart(specialty):
    df_filtered = df[df["specialty"] == specialty]
    fig = px.line(df_filtered, x="reportingperiodstartdate", y="value", title=f"Workload Trend: {specialty}")
    return fig

@app.callback(
    Output("cluster-graph", "figure"),
    Input("specialty-dropdown", "value")
)
def update_cluster_chart(specialty):
    df_filtered = df[df["specialty"] == specialty]
    fig = px.scatter(df_filtered, x="reportingperiodstartdate", y="value", color=df_filtered["cluster"].astype(str),
                     title=f"Clustering of Workload: {specialty}")
    return fig

# Run App
if __name__ == "__main__":
    app.run(debug=True)

[2025-07-16 23:40:22,534] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "/usr/local/python/3.12.1/lib/python3.12/site-packages/flask/app.py", line 917, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/python/3.12.1/lib/python3.12/site-packages/flask/app.py", line 902, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/python/3.12.1/lib/python3.12/site-packages/dash/dash.py", line 1484, in dispatch
    response_data = ctx.run(partial_func)
                    ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/python/3.12.1/lib/python3.12/site-packages/dash/_callback.py", line 698, in add_context
    raise err
  File "/usr/local/python/3.12.1/lib/python3.12/site-packages/dash/_callback.py", line 689, in add_context

In [8]:
scaler = StandardScaler()
df["scaled_value"] = scaler.fit_transform(df[["value"]])
kmeans = KMeans(n_clusters=3, random_state=42)
df["cluster"] = kmeans.fit_predict(df[["scaled_value"]])