In [None]:

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# Load the data from the file
file_path = r'C:\Users\szhang\Downloads\test NYU.txt'
file_path = r"C:\Users\szhang\Downloads\00001_12876_Dept-Shift-Counts_v1.0_11-13-2019 1_rs.txt"
data = pd.read_csv(file_path, delimiter=',', skipinitialspace=True)

# Drop unnecessary columns
data = data.drop(columns=['Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'LINE'])

# Function to extract hour and minute safely
def extract_time(s):
    try:
        time_str = str(s).split()[-1]
        hour = int(time_str[:2])
        minute = int(time_str[2:4])
        return hour * 60 + minute  # convert time to minutes since midnight
    except ValueError:
        return None

# Extract the time from 'Sched Start Time'
data['Time'] = data['Sched Start Time'].apply(extract_time)

# Drop rows with invalid time values
data = data.dropna(subset=['Time'])

# Define a custom sorting order for weekdays
weekday_order = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun']
data['Week Day'] = pd.Categorical(data['Week Day'], categories=weekday_order, ordered=True)

# Resample to 15-minute intervals
data['Time'] = (data['Time'] // 15) * 15

# Group by 'Week Day' and 'Time' to count the number of employees
employee_counts = data.groupby(['Week Day', 'Time']).size().reset_index(name='Employee Count')

# Convert 'Time' back to HH:MM format for better readability in plots
employee_counts['Time'] = employee_counts['Time'].apply(lambda x: f"{x // 60:02}:{x % 60:02}")

# Create an interactive plot
fig = go.Figure()

# Plot each day with a unique color and add annotations for peak and low points
colors = px.colors.qualitative.Plotly
for i, day in enumerate(weekday_order):
    day_data = employee_counts[employee_counts['Week Day'] == day]
    fig.add_trace(go.Scatter(x=day_data['Time'], y=day_data['Employee Count'], mode='lines+markers', name=day, line=dict(color=colors[i])))
    
    # Annotate peak point
    peak_idx = day_data['Employee Count'].idxmax()
    peak = day_data.loc[peak_idx]
    fig.add_trace(go.Scatter(x=[peak['Time']], y=[peak['Employee Count']], mode='markers+text', name=f'Peak {day}', text=[f"Peak: {peak['Employee Count']}"], textposition='top center', marker=dict(color=colors[i], size=10)))
    
    # Annotate low point
    low_idx = day_data['Employee Count'].idxmin()
    low = day_data.loc[low_idx]
    fig.add_trace(go.Scatter(x=[low['Time']], y=[low['Employee Count']], mode='markers+text', name=f'Low {day}', text=[f"Low: {low['Employee Count']}"], textposition='bottom center', marker=dict(color=colors[i], size=10)))

# Update layout for better readability
fig.update_layout(title='Employee Count Throughout the Day by Day of the Week',
                  xaxis_title='Time of the Day',
                  yaxis_title='Employee Count',
                  xaxis=dict(tickformat='%H:%M'),
                  legend_title='Day of the Week')

# Show plot
fig.show()
