# Imports

In [1]:
import numpy as np
import pandas as pd
from numba import njit
from itertools import product
from ortools.linear_solver import pywraplp
from multiprocessing import Process

In [2]:
print()




# Initialize

## Load Data

In [3]:
data = pd.read_csv('./data/family_data.csv', index_col='family_id')

## Set Static Variables

In [4]:
N_DAYS = 100
N_FAMILIES = 5000
MAX_OCCUPANCY = 300
MIN_OCCUPANCY = 125

FAMILY_SIZE = data.n_people.values
DESIRED     = data.values[:, :-1] - 1

# Cost Function

## Preference Cost

### Preference Cost Function

In [5]:
def get_penalty(n, choice):
    penalty = None
    if choice == 0:
        penalty = 0
    elif choice == 1:
        penalty = 50
    elif choice == 2:
        penalty = 50 + 9 * n
    elif choice == 3:
        penalty = 100 + 9 * n
    elif choice == 4:
        penalty = 200 + 9 * n
    elif choice == 5:
        penalty = 200 + 18 * n
    elif choice == 6:
        penalty = 300 + 18 * n
    elif choice == 7:
        penalty = 300 + 36 * n
    elif choice == 8:
        penalty = 400 + 36 * n
    elif choice == 9:
        penalty = 500 + 36 * n + 199 * n
    else:
        penalty = 500 + 36 * n + 398 * n
    return penalty

In [6]:
def GetPreferenceCostMatrix(data):
    cost_matrix = np.zeros((N_FAMILIES, N_DAYS), dtype=np.int64)
    for i in range(N_FAMILIES):
        desired = data.values[i, :-1]
        cost_matrix[i, :] = get_penalty(FAMILY_SIZE[i], 10)
        for j, day in enumerate(desired):
            cost_matrix[i, day-1] = get_penalty(FAMILY_SIZE[i], j)
    return cost_matrix

### Make Matrix

In [7]:
PCOSTM = GetPreferenceCostMatrix(data)

In [8]:
print(PCOSTM.shape)
PCOSTM[0]

(5000, 100)


array([2236, 2236, 2236, 2236, 2236, 2236, 2236, 2236, 2236,  544, 2236,
         86, 2236, 2236, 2236, 2236, 2236, 2236, 2236, 2236, 2236, 2236,
       2236, 2236, 2236, 2236, 2236, 1440, 2236, 2236, 2236, 2236,  236,
       2236, 2236, 2236, 2236,   50, 2236, 2236, 2236, 2236, 2236, 2236,
       2236, 2236, 2236, 2236, 2236, 2236, 2236,    0, 2236, 2236, 2236,
       2236, 2236, 2236, 2236, 2236, 2236, 2236, 2236,  372, 2236, 2236,
       2236, 2236, 2236, 2236, 2236, 2236, 2236, 2236,  272,  444, 2236,
       2236, 2236, 2236, 2236,  136, 2236, 2236, 2236, 2236, 2236, 2236,
       2236, 2236, 2236, 2236, 2236, 2236, 2236, 2236, 2236, 2236, 2236,
       2236])

### Calculate Preference Cost

In [9]:
@njit(fastmath=True)
def pcost(prediction):
    daily_occupancy = np.zeros(N_DAYS+1, dtype=np.int64)
    penalty = 0
    for (i, p) in enumerate(prediction):
        n = FAMILY_SIZE[i]
        penalty += PCOSTM[i, p]
        daily_occupancy[p] += n
    return penalty, daily_occupancy

## Accounting Cost

In [10]:
def GetAccountingCostMatrix():
    ac = np.zeros((1000, 1000), dtype=np.float64)
    for n in range(ac.shape[0]):
        for n_p1 in range(ac.shape[1]):
            diff = abs(n - n_p1)
            ac[n, n_p1] = max(0, (n - 125) / 400 * n**(0.5 + diff / 50.0))
    return ac

### Make Matrix

In [11]:
ACOSTM = GetAccountingCostMatrix() 

In [12]:
print(ACOSTM.shape)
ACOSTM

(1000, 1000)


array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [4.28336940e+61, 3.73088297e+61, 3.24965849e+61, ...,
        6.88341688e+01, 7.90274513e+01, 9.07302023e+01],
       [5.02533626e+61, 4.37706017e+61, 3.81241268e+61, ...,
        7.91593344e+01, 6.89476587e+01, 7.91593344e+01],
       [5.89593436e+61, 5.13524692e+61, 4.47270259e+61, ...,
        9.10367626e+01, 7.92912921e+01, 6.90612103e+01]])

### Calculate Accounting Cost

In [13]:
@njit(fastmath=True)
def acost(daily_occupancy):
    accounting_cost = 0
    n_out_of_range = 0
    daily_occupancy[-1] = daily_occupancy[-2]
    for day in range(N_DAYS):
        n_p1 = daily_occupancy[day + 1]
        n    = daily_occupancy[day]
        n_out_of_range += (n > MAX_OCCUPANCY) or (n < MIN_OCCUPANCY)
        accounting_cost += ACOSTM[n, n_p1]
    return accounting_cost, n_out_of_range

## Total Cost

In [14]:
@njit(fastmath=True)
def cost_function(prediction):
    penalty, daily_occupancy = pcost(prediction)
    accounting_cost, n_out_of_range = acost(daily_occupancy)
    return penalty + accounting_cost + n_out_of_range*100000000

## Evaluation Method

In [15]:
def eval(prediction):
    pc, occ = pcost(prediction)
    ac, _ = acost(occ)
    print('Preferenced Cost : ', pc)
    print('Accounting Cost : {: .2f}'.format(ac))
    print('Total Cost : {: .2f}'.format(pc+ac))
    print('')
    print('Max Occupancy : {} , Min Ocupancy : {}'.format(occ.min(), occ.max()))

# Solver

## Linear Programming Solver

**NOTE :**

<font color='Red'>Modify the accounting penalty to **linear** with binary variables.</font>

### Make Solver

In [16]:
def solveSantaLP():
    
    S = pywraplp.Solver('SolveAssignmentProblem', pywraplp.Solver.CBC_MIXED_INTEGER_PROGRAMMING)
#     S = pywraplp.Solver('SolveAssignmentProblem', pywraplp.Solver.GLOP_LINEAR_PROGRAMMING)    
    GAP = MAX_OCCUPANCY-MIN_OCCUPANCY + 1
    
    print('start pre')
    candidates = [[] for _ in range(N_DAYS)] 
    
    x = {}
    for i in range(N_FAMILIES):
        for j in DESIRED[i, :]:
            candidates[j].append(i)
            x[i, j] = S.BoolVar('x[%i,%i]' % (i, j))
    
    N = {}
    for day in range(N_DAYS):
        for i in range(GAP):
            for j in range(GAP):
                N[day, i+MIN_OCCUPANCY, j+MIN_OCCUPANCY] = S.BoolVar('N[%i,%i,%i]' % (day, i+MIN_OCCUPANCY, j+MIN_OCCUPANCY))
            

    family_presence = [S.Sum([x[i, j] for j in DESIRED[i, :]])
                                                        for i in range(N_FAMILIES)]
    
    linear_constraint = [S.Sum(N[day, i+MIN_OCCUPANCY, j+MIN_OCCUPANCY] for i in range(GAP)
                                                                                                                               for j in range(GAP)) 
                                                                                                                               for day in range(N_DAYS) ]
 
        
    # Objective    
    preference_cost = S.Sum([PCOSTM[i, j] * x[i,j] for i in range(N_FAMILIES)
                                                                            for j in DESIRED[i, :] ])
    
    penalties = []
    for day in range(N_DAYS):
        day_penalties = []
        for i in range(GAP):
            for j in range(GAP):
                daily_occupancy = i + MIN_OCCUPANCY
                p_occupancy = j + MIN_OCCUPANCY
                day_penalties.append((daily_occupancy - 125)/400 * daily_occupancy**(1/2+abs(daily_occupancy - p_occupancy)/50) 
                                         * N[day, daily_occupancy, p_occupancy])
        day_penalty = S.Sum(day_penalties)
        penalties.append(day_penalty)
        
    accounting_penalty = S.Sum(penalties)
    
    total_cost = preference_cost + accounting_penalty

    S.Minimize(total_cost)



    # Constraints
    for day in range(N_DAYS):
        S.Add(linear_constraint[day] == 1)
        S.Add(
            S.Sum([x[i, day] * FAMILY_SIZE[i] for i in candidates[day]]) == 
            S.Sum( [(i + MIN_OCCUPANCY)* N[day, i+MIN_OCCUPANCY, j+MIN_OCCUPANCY] for i in range(GAP)
                                                                                                                                                for j in range(GAP)])
            )
        
    for i in range(N_FAMILIES):
        S.Add(family_presence[i] == 1)

    print('redeay and start')
    print(S.SetNumThreads(4))
    res = S.Solve()

    resdict = {0:'OPTIMAL', 1:'FEASIBLE', 2:'INFEASIBLE', 3:'UNBOUNDED', 
               4:'ABNORMAL', 5:'MODEL_INVALID', 6:'NOT_SOLVED'}

    print('LP solver result:', resdict[res])


    l = [(i, j, x[i, j].solution_value()) for i in range(N_FAMILIES)
                                                      for j in DESIRED[i, :] 
                                                      if x[i, j].solution_value()>0]

    df = pd.DataFrame(l, columns=['family_id', 'day', 'n'])
    return df

### Solve

In [None]:
df_tmp = solveSantaLP()

start pre


### Check

In [None]:
df_tmp.shape

In [20]:
print('--- About df_tmp.n ---')
print('Over 1.0 : ', len(df_tmp[df_tmp.n > 1.0]))
print('Under 0.999 : ', len(df_tmp[df_tmp.n < 0.999]))

--- About df_tmp.n ---
Over 1.0 :  0
Under 0.999 :  110


In [21]:
assigned_tmp_df = df_tmp[df_tmp.n > 0.999].copy()
assigned_tmp_df['family_size'] = FAMILY_SIZE[assigned_tmp_df.family_id]
occupancy = assigned_tmp_df.groupby('day').family_size.sum().values
min_occupancy = np.array([max(0, MIN_OCCUPANCY-o) for o in occupancy])
max_occupancy = np.array([MAX_OCCUPANCY - o for o in occupancy])

unassigned_tmp_df = df_tmp[(df_tmp.n <= 0.999) & (df_tmp.n > (1 - 0.999))]

In [22]:
print('Assigened : ', len(assigned_tmp_df.family_id.unique()))
print(' - Under Min Occupancies : ', len(min_occupancy[min_occupancy != 0]))
print('   ', min_occupancy[min_occupancy != 0])
print(' - Over Max Occupancies : ', len(max_occupancy[max_occupancy < 0]))
print('')
print('Unassigned : ', len(unassigned_tmp_df.family_id.unique()))

Assigened :  4946
 - Under Min Occupancies :  28
    [4 4 7 1 3 4 1 1 6 1 6 1 8 5 3 3 3 1 4 9 3 5 4 2 6 3 2 2]
 - Over Max Occupancies :  0

Unassigned :  54


## Mixed Integer Programming Solver

### Make Solver

In [24]:
def solveSantaIP(families, min_occupancy, max_occupancy):

    S = pywraplp.Solver('SolveAssignmentProblem', pywraplp.Solver.CBC_MIXED_INTEGER_PROGRAMMING)
    
    #S.SetNumThreads(NumThreads) 
    #S.set_time_limit(limit_in_seconds*1000*NumThreads) #cpu time = wall time * N_threads

    n_families = len(families)
    
    x = {}
    candidates = [[] for _ in range(N_DAYS)] #families that can be assigned to each day

    for i in families:
        for j in DESIRED[i, :]:
            candidates[j].append(i)
            x[i, j] = S.BoolVar('x[%i,%i]' % (i, j))

            
    daily_occupancy = [S.Sum([x[i, j] * FAMILY_SIZE[i] for i in candidates[j]])
                                                                                   for j in range(N_DAYS)]

    family_presence = [S.Sum([x[i, j] for j in DESIRED[i, :]])
                                                        for i in families]



    # Objective
    preference_cost = S.Sum([PCOSTM[i, j] * x[i,j] for i in families
                                                                            for j in DESIRED[i, :] ])
    

    S.Minimize(preference_cost)


    # Constraints

    for i in range(n_families):
        S.Add(family_presence[i] == 1)

    for j in range(N_DAYS):
        S.Add(daily_occupancy[j] >= min_occupancy[j])
        S.Add(daily_occupancy[j] <= max_occupancy[j])

    res = S.Solve()
    
    resdict = {0:'OPTIMAL', 1:'FEASIBLE', 2:'INFEASIBLE', 3:'UNBOUNDED', 
               4:'ABNORMAL', 5:'MODEL_INVALID', 6:'NOT_SOLVED'}
    
    print('MIP solver result:', resdict[res])
    
                
    l = [(i, j) for i in families
                 for j in DESIRED[i, :] 
                 if x[i, j].solution_value()>0]


    df = pd.DataFrame(l, columns=['family_id', 'day'])
    return df


### Solve

In [25]:
tmp_rdf = solveSantaIP(unassigned_tmp_df.family_id.unique(), min_occupancy, max_occupancy)

MIP solver result: OPTIMAL


In [26]:
tmp_df2 = pd.concat((assigned_tmp_df[['family_id', 'day']], tmp_rdf)).sort_values('family_id')
tmp_df2['family_size'] = FAMILY_SIZE[tmp_df2.family_id]

occupancy2 = tmp_df2.groupby('day').family_size.sum().values
min_occupancy2 = np.array([max(0, MIN_OCCUPANCY-o) for o in occupancy2])
max_occupancy2 = np.array([MAX_OCCUPANCY - o for o in occupancy2])

### Check

In [27]:
print('Assigened : ', len(tmp_df2.family_id.unique()))
print(' - Under Min Occupancies : ', len(min_occupancy2[min_occupancy2 != 0]))
print(' - Over Max Occupancies : ', len(max_occupancy2[max_occupancy2 < 0]))

Assigened :  5000
 - Under Min Occupancies :  0
 - Over Max Occupancies :  0


In [28]:
eval(tmp_df2.day.values)

Preferenced Cost :  44931
Accounting Cost :  10190711667.51
Total Cost :  10190756598.51

Max Occupancy : 125 , Min Ocupancy : 300
