In [364]:
import os
import sys
module_path = os.path.abspath(os.path.join('../..'))
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)

/Users/andrew/src/otp-scheduler


In [365]:
import pandas as pd
from src import parse

In [397]:
def sort_schedule(df):
    return df.sort_values(by=['zone', 'priority_rank', 'code']).reset_index(drop=True)

In [398]:
# ASSUMPTIONS
MEAL_CAPACITY = 10000
MAX_DROPOFFS_PER_RUN = 4
MIN_MEALS_PER_RUN = 50

In [399]:
restaurants = parse.parse_from_restaurants_csv('../../examples/boston/20200405/restaurants.csv').set_index('code')

In [400]:
restaurants.shape

(8, 23)

In [401]:
hospitals = sort_schedule(parse.parse_from_hospital_requests_csv('../../examples/boston/20200405/hospitals.csv'))
schedule = hospitals.iloc[:, 5:]

In [522]:
vip_hospitals = sort_schedule(parse.parse_from_hospital_requests_csv('../../examples/boston/20200405/vips.csv'))
vip_schedule = vip_hospitals.iloc[:, 5:]
vip_schedule.shape

(30, 21)

In [523]:
results = sort_schedule(parse.parse_from_results_csv('../../examples/boston/20200405/results.csv'))
results.shape

(30, 26)

In [524]:
results_schedule = results.iloc[:, 5:]
results_schedule_columns = results_schedule.columns

In [525]:
log = []
vip_log = []
for i, row in results_schedule.iterrows():
    for j, val in enumerate(row):
        if not pd.isnull(val):
            log.append([
                hospitals.iloc[i, 1], 
                hospitals.iloc[i, 4],
                results_schedule_columns[j], 
                val, 
                schedule.iloc[i, j], 
                restaurants[results_schedule_columns[j]][val],
            ])

In [526]:
meal_log = pd.DataFrame(log, columns=['hospital', 'zone', 'meal', 'restaurant', 'quantity', 'restaurant_capacity'])
meal_log.head(10)

Unnamed: 0,hospital,zone,meal,restaurant,quantity,restaurant_capacity
0,BIDMC - West Campus,1,lunch_mon,PO,60.0,200.0
1,BIDMC - West Campus,1,lunch_tues,FC,60.0,300.0
2,BIDMC - West Campus,1,lunch_wed,PG,60.0,600.0
3,BIDMC - West Campus,1,lunch_thurs,PO,60.0,200.0
4,BIDMC - West Campus,1,lunch_fri,PG,60.0,600.0
5,BIDMC - East Campus,1,lunch_mon,PO,30.0,200.0
6,BIDMC - East Campus,1,lunch_tues,FC,30.0,300.0
7,BIDMC - East Campus,1,lunch_wed,PG,30.0,600.0
8,BIDMC - East Campus,1,lunch_thurs,PO,30.0,200.0
9,BIDMC - East Campus,1,lunch_fri,PG,30.0,600.0


## Rules / Validation Checks:
High-level
- [x] Within budget (# meals < capacity)

Restaurants
- [x] Restaurant has capacity for that order
- [x] Restaurant is delivering to one cluster per meal time
- [x] Restaurant is delivering at least 50 meals for a meal time they are assigned
- [x] Restaurant is not delivering to more than 4 dropoffs per meal time
- [x] Restaurant is within their max days per week

Allocation
- [x] VIP Orders are all fulfilled
- [x] All orders are fully-served (this is pretty much intrinsically assumed, given the format of our data)
- [x] Every HP has > 0 orders

In [527]:
def test(condition, *print_args):
    if not condition:
        print('TEST FAILURE:', print_args[0])
        for arg in print_args[1:]:
            print(arg)

In [528]:
# NOTHING in here should be null! This means something isn't matching up with our priors in our spreadsheets!
test(meal_log.isnull().sum().sum() == 0, 'we have nulls in our dense meal log set!')

In [529]:
# Within budget
test(meal_log['quantity'].sum() < MEAL_CAPACITY, 'we are above meal capacity', meal_log['quantity'].sum(), MEAL_CAPACITY)

In [530]:
# Restauarant assertions
under_capacity = meal_log[meal_log['restaurant_capacity'] < meal_log['quantity']]
test(under_capacity.empty, 'Some restaurants are assinged a meal that is over their capacity')

In [531]:
# Restaurant is delivering one cluster per meal time
restaurant_delivery_zones = meal_log.groupby(['meal', 'restaurant'])[['zone']].nunique()
multiple_zones = restaurant_delivery_zones[restaurant_delivery_zones['zone'] > 1]
test(multiple_zones.empty, 'Some restaurants are delivering to more than one zone for a meal!', multiple_zones)

TEST FAILURE: Some restaurants are delivering to more than one zone for a meal!
                         zone
meal         restaurant      
dinner_fri   PG             2
dinner_mon   FC             2
dinner_sat   MM             2
dinner_sun   PG             2
dinner_thurs LD             2
dinner_tues  FC             2
             LD             2
dinner_wed   CS             2
lunch_fri    FC             2
             PG             2
lunch_mon    FC             2
             LD             2
             PO             2
lunch_sun    LD             2
             MM             2
lunch_thurs  LD             2
lunch_wed    FC             2
             PG             3
             PO             2


In [532]:
# Restaurant is delivering at least `MIN_MEALS_PER_RUN` per run
restaurant_quantity_per_meal = meal_log.groupby(['meal', 'restaurant'])[['quantity']].sum()
small_orders_per_meal = restaurant_quantity_per_meal[restaurant_quantity_per_meal['quantity'] < MIN_MEALS_PER_RUN]
test(small_orders_per_meal.empty, 'Some restaurants are delivery a low quantity of meals for an order!', small_orders_per_meal)

TEST FAILURE: Some restaurants are delivery a low quantity of meals for an order!
                      quantity
meal      restaurant          
lunch_fri PO              30.0


In [533]:
# Restaurant is delivering no more than `MAX_DROPOFFS_PER_RUN` dropoffs per run
dropoffs_per_run = meal_log.groupby(['meal', 'zone', 'restaurant'])[['hospital']].count()
too_many_dropoffs = dropoffs_per_run[dropoffs_per_run['hospital'] > MAX_DROPOFFS_PER_RUN]
test(too_many_dropoffs.empty, 'Some restaurants are delivering to too many dropoff points per run!', too_many_dropoffs)

In [534]:
# Restaurant is within their max days per week

# Roll up all the meals into days
def column_to_day(series):
    days = set(series.map(lambda m: m.replace('lunch', '').replace('breakfast', '').replace('dinner', '').replace('_', '')))
    return len(days)
    

days_worked = meal_log.groupby(['restaurant']).agg({'meal': [('Days Worked', column_to_day)]}).reset_index()
days_worked.columns = ['restaurant', 'Days Worked']
days_with_capacity = days_worked.merge(restaurants[['max_days_per_week']], left_on='restaurant', right_on='code')
days_over_capacity = days_with_capacity[days_with_capacity['Days Worked'] > days_with_capacity['max_days_per_week']]
test(too_many_dropoffs.empty, 'Some restaurants are working more days than requested this week', days_over_capacity)

In [542]:
# Ensure all VIP orders are fulfilled
vip_schedule_dense = vip_schedule.dropna(how='all', axis=0).dropna(how='all', axis=1)
vip_schedule_dense
vip_orders = []
missing_meals = []
for i, row in vip_schedule_dense.iterrows():
    for j, val in enumerate(row):
        if not pd.isnull(val):
            order = meal_log[(meal_log['hospital'] == hospitals.iloc[i, 1]) & (meal_log['meal'] == vip_schedule_dense.columns[j])]
            vip_orders.append(order)
            if order.empty:
                missing_meals.append((hospitals.iloc[i, 1], vip_schedule_dense.columns[j]))
                
test(len(missing_meals) == 0, 'Some VIP meal orders have not been fulfilled', missing_meals)

TEST FAILURE: Some VIP meal orders have not been fulfilled
[('Boston Healthcare for the Homeless', 'breakfast_mon'), ('Boston Healthcare for the Homeless', 'breakfast_tues'), ('Boston Healthcare for the Homeless', 'breakfast_wed'), ('Boston Healthcare for the Homeless', 'breakfast_thurs'), ('Boston Healthcare for the Homeless', 'breakfast_fri'), ('Boston Healthcare for the Homeless', 'breakfast_sat'), ('Boston Healthcare for the Homeless', 'breakfast_sun')]


In [611]:
vip_orders_df = pd.concat(vip_orders).reset_index()

In [614]:
hospital_meals_rollup = meal_log[['hospital', 'quantity']].groupby('hospital').agg({
    'quantity': [('total_meals_received', 'sum'), ('total_orders_fulfilled', 'count')]
}).sort_values([('quantity', 'total_meals_received')], ascending=False).reset_index()
hospital_meals_rollup.columns = ['hospital', 'total_meals_received', 'total_orders_fulfilled']

In [629]:
vip_orders_rollup = vip_orders_df.groupby('hospital')[['index']].count().reset_index()
vip_orders_rollup.columns = ['hospital', 'vip_meals_count']
vip_orders_rollup

Unnamed: 0,hospital,vip_meals_count
0,Boston Healthcare for the Homeless,14
1,Boston Medical Center,7
2,Brigham and Women's - Faulkner ED,4
3,Brigham and Women's Hospital - ED,4


In [650]:
hospital_meals_rollup = meal_log[['hospital', 'quantity']].groupby('hospital').agg({
    'quantity': [('total_meals_received', 'sum'), ('total_orders_fulfilled', 'count')],
}).sort_values([('quantity', 'total_meals_received')], ascending=False).reset_index()
hospital_meals_rollup.columns = ['hospital', 'total_meals_received', 'total_orders_fulfilled']

hospital_meals_rollup = hospital_meals_rollup.merge(vip_orders_rollup, on='hospital', how='left').fillna(0)

In [651]:
# Every HP has > 0 orders
hospitals_no_orders = hospital_meals_rollup[hospital_meals_rollup['total_orders_fulfilled'] == 0]
test(hospitals_no_orders.empty, 'Some Hospitals received no orders!', hospitals_no_orders)

## KPIs:
- [X] Meals scheduled for delivery
- [x] Utilization % (number of meals delivered / capacity)
- [x] Meals per Delivery Run
- [x] Total meals and number of orders for each HP
- [x] Total meals and number of orders for each Restaurant
- [x] Equitable meals served: all HPs are within 1 order of each other, excluding VIP orders

In [652]:
# Number of meals delivered
meals_scheduled = meal_log['quantity'].sum()
meals_scheduled

9323.0

In [653]:
# Meal Utilization %
utilization = meals_scheduled / MEAL_CAPACITY
print(str(utilization * 100) + '%')

93.23%


In [654]:
grouped_by_meal_restaurant = meal_log.groupby(['meal', 'restaurant']).agg({
    'quantity': ['sum', 'count']
}).sort_values(by='meal', ascending=False)
# grouped_by_meal_restaurant.head(10)

In [655]:
# Meals delivered by Run, and Number of Dropoff Points
# CAVEAT: this ASSUMES that a restaurant is delivering to a single cluster for one meal time. Careful!
meals_per_run_agg = grouped_by_meal_restaurant.agg(['median', 'mean', 'min', 'max'])
meals_per_run_agg.columns = ['Meals per Meal Time', 'Dropoff Points']
meals_per_run_agg

Unnamed: 0,Meals per Meal Time,Dropoff Points
median,178.5,2.5
mean,221.97619,2.690476
min,30.0,1.0
max,716.0,5.0


In [656]:
# Total number of meals delivered by restaurant
meal_log.groupby('restaurant')[['quantity']].sum().sort_values(by='quantity', ascending=False)

Unnamed: 0_level_0,quantity
restaurant,Unnamed: 1_level_1
LD,3315.0
PG,2632.0
FC,1568.0
PO,842.0
MM,701.0
CS,265.0


In [657]:
# Total meals and orders for each Restaurant
meal_log.groupby('restaurant').agg({
    'quantity': [('meal_sum', 'sum'), ('orders_fulfilled', 'count')]
}).sort_values([('quantity', 'meal_sum')], ascending=False)

Unnamed: 0_level_0,quantity,quantity
Unnamed: 0_level_1,meal_sum,orders_fulfilled
restaurant,Unnamed: 1_level_2,Unnamed: 2_level_2
LD,3315.0,27
PG,2632.0,27
FC,1568.0,27
PO,842.0,16
MM,701.0,13
CS,265.0,3


In [658]:
# Total numbers of meals and orders received for each HP
# NB: this variable is defined above, as it's used in a validation test
hospital_meals_rollup

Unnamed: 0,hospital,total_meals_received,total_orders_fulfilled,vip_meals_count
0,Boston Medical Center,3600.0,7,7.0
1,Carney Hospital - ED,600.0,6,0.0
2,VA Medical Center - West Roxbury,480.0,6,0.0
3,Whittier Street Health Center,450.0,3,0.0
4,Brigham and Women's Hospital - ED,400.0,4,4.0
5,Codman Square Health Center,330.0,3,0.0
6,Massachusetts General Hospital - ED,325.0,5,0.0
7,BIDMC - West Campus,300.0,5,0.0
8,East Boston Neighborhood Health Center,275.0,4,0.0
9,Tufts Medical Center,250.0,5,0.0


## Hospital Equity Check

In [660]:
# Hospital Equity check
max_row = hospital_meals_rollup[
    hospital_meals_rollup['total_orders_fulfilled'] == hospital_meals_rollup['total_orders_fulfilled'
].max()]
min_row = hospital_meals_rollup[
    hospital_meals_rollup['total_orders_fulfilled'] == hospital_meals_rollup['total_orders_fulfilled'
].min()]

order_range = (max_row['total_orders_fulfilled'].values[0] - min_row['total_orders_fulfilled'].values[0])
print('INCLUDING VIP MEALS')
print('RANGE OF ORDERS FOR HOSPITALS:', order_range)
if order_range > 1:
    print('This range should ideally only be 1, once we remove VIP orders!')
print('Note: this is inclusive of VIP orders. Please verify this discrepancy!')
print('------------------------------')
print(
    'MAXIMUM Order for Hospital:', 
    equity_stats.loc['max']['total_orders_fulfilled'], 
    'for Hospital(s):', 
)
for _, row in max_row.iterrows():
    print('-', row['hospital'], '\tNumber of VIP orders:', row['vip_meals_count'])
print()
print(
    'MINIMUM Order for Hospital:', 
    equity_stats.loc['min']['total_orders_fulfilled'], 
    'for Hospital(s):', 
)
for _, row in min_row.iterrows():
    print('-', row['hospital'], '\tNumber of VIP orders:', row['vip_meals_count'])


INCLUDING VIP MEALS
RANGE OF ORDERS FOR HOSPITALS: 13
This range should ideally only be 1, once we remove VIP orders!
Note: this is inclusive of VIP orders. Please verify this discrepancy!
------------------------------
MAXIMUM Order for Hospital: 14.0 for Hospital(s):
- Boston Healthcare for the Homeless 	Number of VIP orders: 14.0

MINIMUM Order for Hospital: 1.0 for Hospital(s):
- Cambridge Health Alliance - Everett Hospital 	Number of VIP orders: 0.0
- Cambridge Health Alliance - Cambridge Hospital 	Number of VIP orders: 0.0
- Cambridge Health Alliance - Somerville Hospital 	Number of VIP orders: 0.0


In [688]:
print('Removing VIP Meals from the equation (VIP meals collapse to a single meal for each hospital):')
(hospital_meals_rollup[
    'total_orders_fulfilled'
] - hospital_meals_rollup[
    'vip_meals_count'
].transform(lambda x: max(0, x-1))).describe()

Removing VIP Meals from the equation (VIP meals collapse to a single meal for each hospital):


count    30.000000
mean      2.933333
std       1.552158
min       1.000000
25%       2.000000
50%       3.000000
75%       4.000000
max       6.000000
dtype: float64