In [364]:
import os
import sys
module_path = os.path.abspath(os.path.join('../..'))
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path)

/Users/andrew/src/otp-scheduler


In [365]:
import pandas as pd
from src import parse

In [397]:
def sort_schedule(df):
    return df.sort_values(by=['zone', 'priority_rank', 'code']).reset_index(drop=True)

In [398]:
# ASSUMPTIONS
MEAL_CAPACITY = 10000
MAX_DROPOFFS_PER_RUN = 4
MIN_MEALS_PER_RUN = 50

In [399]:
restaurants = parse.parse_from_restaurants_csv('../../examples/boston/20200405/restaurants.csv').set_index('code')

In [400]:
restaurants.shape

(8, 23)

In [401]:
hospitals = sort_schedule(parse.parse_from_hospital_requests_csv('../../examples/boston/20200405/hospitals.csv'))

In [402]:
schedule = hospitals.iloc[:, 5:]

In [403]:
results = sort_schedule(parse.parse_from_results_csv('../../examples/boston/20200405/results.csv'))


In [404]:
results_schedule = results.iloc[:, 5:]
results_schedule_columns = results_schedule.columns

In [405]:
log = []
for i, row in results_schedule.iterrows():
    for j, val in enumerate(row):
        if not pd.isnull(val):
            log.append([
                hospitals.iloc[i, 1], 
                hospitals.iloc[i, 4],
                results_schedule_columns[j], 
                val, 
                schedule.iloc[i, j], 
                restaurants[results_schedule_columns[j]][val],
            ])

In [407]:
meal_log = pd.DataFrame(log, columns=['hospital', 'zone', 'meal', 'restaurant', 'quantity', 'restaurant_capacity'])
meal_log.head(10)

Unnamed: 0,hospital,zone,meal,restaurant,quantity,restaurant_capacity
0,BIDMC - West Campus,1,lunch_mon,PO,60.0,200.0
1,BIDMC - West Campus,1,lunch_tues,FC,60.0,300.0
2,BIDMC - West Campus,1,lunch_wed,PG,60.0,600.0
3,BIDMC - West Campus,1,lunch_thurs,PO,60.0,200.0
4,BIDMC - West Campus,1,lunch_fri,PG,60.0,600.0
5,BIDMC - East Campus,1,lunch_mon,PO,30.0,200.0
6,BIDMC - East Campus,1,lunch_tues,FC,30.0,300.0
7,BIDMC - East Campus,1,lunch_wed,PG,30.0,600.0
8,BIDMC - East Campus,1,lunch_thurs,PO,30.0,200.0
9,BIDMC - East Campus,1,lunch_fri,PG,30.0,600.0


## Rules / Validation Checks:
High-level
- [x] Within budget (# meals < capacity)

Restaurants
- [x] Restaurant has capacity for that order
- [x] Restaurant is delivering to one cluster per meal time
- [x] Restaurant is delivering at least 50 meals for a meal time they are assigned
- [x] Restaurant is not delivering to more than 4 dropoffs per meal time
- [x] Restaurant is within their max days per week

Allocation
- [ ] VIP Orders are all fulfilled
- [x] All orders are fully-served (this is pretty much intrinsically assumed, given the format of our data)
- [ ] Every HP has > 0 orders
- [ ] Equitable meals served: all HPs are within 1 order of each other, excluding VIP orders

In [408]:
def test(condition, *print_args):
    if not condition:
        print('TEST FAILURE:', print_args[0])
        for arg in print_args[1:]:
            print(arg)

In [409]:
# NOTHING in here should be null! This means something isn't matching up with our priors in our spreadsheets!
test(meal_log.isnull().sum().sum() == 0, 'we have nulls in our dense meal log set!')

In [410]:
# Within budget
test(meal_log['quantity'].sum() < MEAL_CAPACITY, 'we are above meal capacity', meal_log['quantity'].sum(), MEAL_CAPACITY)

In [411]:
# Restauarant assertions
under_capacity = meal_log[meal_log['restaurant_capacity'] < meal_log['quantity']]
test(under_capacity.empty, 'Some restaurants are assinged a meal that is over their capacity')

In [412]:
# Restaurant is delivering one cluster per meal time
restaurant_delivery_zones = meal_log.groupby(['meal', 'restaurant'])[['zone']].nunique()
multiple_zones = restaurant_delivery_zones[restaurant_delivery_zones['zone'] > 1]
test(multiple_zones.empty, 'Some restaurants are delivering to more than one zone for a meal!', multiple_zones)

TEST FAILURE: Some restaurants are delivering to more than one zone for a meal!
                         zone
meal         restaurant      
dinner_fri   PG             2
dinner_mon   FC             2
dinner_sat   MM             2
dinner_sun   PG             2
dinner_thurs LD             2
dinner_tues  FC             2
             LD             2
dinner_wed   CS             2
lunch_fri    FC             2
             PG             2
lunch_mon    FC             2
             LD             2
             PO             2
lunch_sun    LD             2
             MM             2
lunch_thurs  LD             2
lunch_wed    FC             2
             PG             3
             PO             2


In [413]:
# Restaurant is delivering at least `MIN_MEALS_PER_RUN` per run
restaurant_quantity_per_meal = meal_log.groupby(['meal', 'restaurant'])[['quantity']].sum()
small_orders_per_meal = restaurant_quantity_per_meal[restaurant_quantity_per_meal['quantity'] < MIN_MEALS_PER_RUN]
test(small_orders_per_meal.empty, 'Some restaurants are delivery a low quantity of meals for an order!', small_orders_per_meal)

TEST FAILURE: Some restaurants are delivery a low quantity of meals for an order!
                      quantity
meal      restaurant          
lunch_fri PO              30.0


In [414]:
# Restaurant is delivering no more than `MAX_DROPOFFS_PER_RUN` dropoffs per run
dropoffs_per_run = meal_log.groupby(['meal', 'zone', 'restaurant'])[['hospital']].count()
too_many_dropoffs = dropoffs_per_run[dropoffs_per_run['hospital'] > MAX_DROPOFFS_PER_RUN]
test(too_many_dropoffs.empty, 'Some restaurants are delivering to too many dropoff points per run!', too_many_dropoffs)

In [415]:
# Restaurant is within their max days per week

# Roll up all the meals into days
def column_to_day(series):
    days = set(series.map(lambda m: m.replace('lunch', '').replace('breakfast', '').replace('dinner', '').replace('_', '')))
    return len(days)
    

days_worked = meal_log.groupby(['restaurant']).agg({'meal': [('Days Worked', column_to_day)]}).reset_index()
days_worked.columns = ['restaurant', 'Days Worked']
days_with_capacity = days_worked.merge(restaurants[['max_days_per_week']], left_on='restaurant', right_on='code')
days_over_capacity = days_with_capacity[days_with_capacity['Days Worked'] > days_with_capacity['max_days_per_week']]
test(too_many_dropoffs.empty, 'Some restaurants are working more days than requested this week', days_over_capacity)

## KPIS:
- [X] Meals scheduled for delivery
- [x] Utilization % (number of meals delivered / capacity)
- [x] Meals per Delivery Run
- [x] Total meals and number of orders for each HP
- [x] Total meals and number of orders for each Restaurant

In [416]:
# Number of meals delivered
meals_scheduled = meal_log['quantity'].sum()
meals_scheduled

9323.0

In [417]:
# Meal Utilization %
utilization = meals_scheduled / MEAL_CAPACITY
print(str(utilization * 100) + '%')

93.23%


In [418]:
grouped_by_meal_restaurant = meal_log.groupby(['meal', 'restaurant']).agg({
    'quantity': ['sum', 'count']
}).sort_values(by='meal', ascending=False)
# grouped_by_meal_restaurant.head(10)

In [419]:
# Meals delivered by Run, and Number of Dropoff Points
# CAVEAT: this ASSUMES that a restaurant is delivering to a single cluster for one meal time. Careful!
meals_per_run_agg = grouped_by_meal_restaurant.agg(['median', 'mean', 'min', 'max'])
meals_per_run_agg.columns = ['Meals per Meal Time', 'Dropoff Points']
meals_per_run_agg

Unnamed: 0,Meals per Meal Time,Dropoff Points
median,178.5,2.5
mean,221.97619,2.690476
min,30.0,1.0
max,716.0,5.0


In [420]:
# Total number of meals delivered by restaurant
meal_log.groupby('restaurant')[['quantity']].sum().sort_values(by='quantity', ascending=False)

Unnamed: 0_level_0,quantity
restaurant,Unnamed: 1_level_1
LD,3315.0
PG,2632.0
FC,1568.0
PO,842.0
MM,701.0
CS,265.0


In [421]:
# Total meals and orders for each Restaurant
meal_log.groupby('restaurant').agg({
    'quantity': [('meal_sum', 'sum'), ('orders_fulfilled', 'count')]
}).sort_values([('quantity', 'meal_sum')], ascending=False)

Unnamed: 0_level_0,quantity,quantity
Unnamed: 0_level_1,meal_sum,orders_fulfilled
restaurant,Unnamed: 1_level_2,Unnamed: 2_level_2
LD,3315.0,27
PG,2632.0,27
FC,1568.0,27
PO,842.0,16
MM,701.0,13
CS,265.0,3


In [422]:
# Total numbers of meals and orders received for each HP
meal_log[['hospital', 'quantity']].groupby('hospital').agg({
    'quantity': [('total_meals_received', 'sum'), ('total_orders_fulfilled', 'count')]
}).sort_values([('quantity', 'total_meals_received')], ascending=False)

Unnamed: 0_level_0,quantity,quantity
Unnamed: 0_level_1,total_meals_received,total_orders_fulfilled
hospital,Unnamed: 1_level_2,Unnamed: 2_level_2
Boston Medical Center,3600.0,7
Carney Hospital - ED,600.0,6
VA Medical Center - West Roxbury,480.0,6
Whittier Street Health Center,450.0,3
Brigham and Women's Hospital - ED,400.0,4
Codman Square Health Center,330.0,3
Massachusetts General Hospital - ED,325.0,5
BIDMC - West Campus,300.0,5
East Boston Neighborhood Health Center,275.0,4
Tufts Medical Center,250.0,5
