In [2]:
! pip install dash


Collecting dash
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-2.18.2-py3-none-any.whl (7.8 MB)
   ---------------------------------------- 0.0/7.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/7.8 MB 653.6 kB/s eta 0:00:12
    --------------------------------------- 0.1/7.8 MB 1.1 MB/s eta 0:00:08
   - -------------------------------------- 0.2/7.8 MB 1.7 MB/s eta 0:00:05
   -- ------------------------------------- 0.5/7.8 MB 2.8 MB/s eta 0:00:03
   ---- ----------------------------------

In [5]:
import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html, Input, Output
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

#### Load Dataset

In [10]:
file_path = "Provider Sample DataSet MRP 2025 Spring.xlsx"
df = pd.read_excel(file_path, sheet_name="MetricData")

#### Data Cleaning

In [11]:
df.columns = df.columns.str.strip().str.replace(" ", "_").str.lower()
df["reportingperiodstartdate"] = pd.to_datetime(df["reportingperiodstartdate"], errors="coerce")
df["reportingperiodenddate"] = pd.to_datetime(df["reportingperiodenddate"], errors="coerce")

# Drop missing values
df = df.dropna(subset=["value", "specialty"])

### Machine Learning - Clustering Physicians by Workload

In [12]:
scaler = StandardScaler()
df["scaled_value"] = scaler.fit_transform(df[["value"]])
kmeans = KMeans(n_clusters=3, random_state=42)
df["cluster"] = kmeans.fit_predict(df[["scaled_value"]])





#### Initialize Dash App

In [9]:
# Initialize Dash App
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Physician Burnout Analysis Dashboard"),

    # Dropdown for Specialty Selection
    html.Label("Select Specialty:"),
    dcc.Dropdown(
        id="specialty-dropdown",
        options=[{"label": spec, "value": spec} for spec in df["specialty"].unique()],
        value=df["specialty"].unique()[0],
        multi=False
    ),

    # Line Chart
    dcc.Graph(id="trend-graph"),

    # Scatter Plot
    dcc.Graph(id="cluster-graph")
])

# Callbacks to update charts
@app.callback(
    Output("trend-graph", "figure"),
    Input("specialty-dropdown", "value")
)
def update_trend_chart(specialty):
    df_filtered = df[df["specialty"] == specialty]
    fig = px.line(df_filtered, x="reportingperiodstartdate", y="value", title=f"Workload Trend: {specialty}")
    return fig

@app.callback(
    Output("cluster-graph", "figure"),
    Input("specialty-dropdown", "value")
)
def update_cluster_chart(specialty):
    df_filtered = df[df["specialty"] == specialty]
    fig = px.scatter(df_filtered, x="reportingperiodstartdate", y="value", color=df_filtered["cluster"].astype(str),
                     title=f"Clustering of Workload: {specialty}")
    return fig

# Run App
if __name__ == "__main__":
    app.run_server(debug=True)


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result

