In [182]:
import pandas as pd
import random
import numpy as np
import time
import copy # for deep copy
import xlwings as xw
import datetime

### Main function

In [887]:
# load file
df = pd.read_excel('test.xlsm')

# define types of duties
TYPES_OF_DUTY = {0:'Test', 1:'CT/MR', 3:'ER', 4:'CR', 5:'VS', 
                 6:'Other6', 7:'Other7', 8:'Other8', 9:'Other9'}  # no type 2

# cleanse the data, determine if there is violation
df, type_to_generate = data_cleansing(df)
violation, df_updated = is_violation(df,type_to_generate)

# if no violation, then start generating list into optimized list
optimized_list = []
if violation == False:
    for duty_type in type_to_generate:
        preliminary_list = preliminary_gen3(df_updated, duty_type, 1000)
        #optimized_list[duty_type] = optimization2(preliminary_list, df_updated, duty_type)
        optimized_list.append([duty_type, optimization2(preliminary_list, df_updated, duty_type)])

if violation == False:
# prepare list for output
    first_choice, other_choice = formatting_for_output(optimized_list)
    export_to_excel(first_choice, other_choice)
    

檢查輸入資料...
檢查輸入資料...OK
正在建立 [type1,CT/MR班] 初步清單...
>----------- 8.5%
=>---------- 16.8%
==>--------- 25.1%
===>-------- 33.4%
====>------- 41.7%
=====>------ 50.0%
[type1,CT/MR班] 初步清單已建立完成
{'21': 0, '22': 0, '23': 0, '24': 0, '25': 0, '26': 0, '27': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 0}
{'21': 0, '22': 0, '23': 0, '24': 0, '25': 0, '26': 0, '27': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 0}
正在尋找 [type1,CT/MR班] 最佳排班...
已完成 [type1,CT/MR班] 最佳排班排序
正在調整格式資料格式...
正在開啟Excel並輸出排班資料...
儲存完成


In [457]:
def optimization2(preliminary_list, df_updated, duty_type):
    #np.seterr(divide='ignore', over='ignore', under='ignore', invalid='ignore')  # 忽略計算問題
    #np.seterr(all='raise')
    """
    input: preliminary_list, updated df, what type of duty
    output: list_location_std_sorted, after optimization
    """
    # Optimizing the list
    # 1. minimize the total days of QOD in everyone (如果只選標準差多少人不夠)
    # find min() of days  -> 這幾乎是最好的了，因為幾乎<3，所以 2 不需要

    # 2. minimize standard deviation of days of QOD among others
    #np.array([2,2,2,1]).std(ddof=0)
    # 計算個人值班分散程度（標準差） 的標準差，依照順序排列 （大家分散程度要差不多）
    # 在這個情況下，不可能大家同時標準差都很高，導致標準差的標準差值很小

    DAYS = len(preliminary_list[0])  # 這個月有幾天
    df_work = df_updated[df['Type']==duty_type]
    CODE_LIST = df_work['Code'].tolist() # code list, ['31','32']
    
    
    print(f'正在尋找 [type{duty_type},{TYPES_OF_DUTY[duty_type]}班] 最佳排班...')
    
    if duty_type==1:
        num_of_qod_dict = {code:0 for code in CODE_LIST}
        qod = []  # 各組每天的 qod 情況
        #days_of_duty = {code:[] for code in CODE_LIST}
        #for code in CODE_LIST
        for n in range(len(preliminary_list)):
            day_count = 0
            for i in range(DAYS-2):
                #possible_qod = [] # to store possible qod in
                
                for item in preliminary_list[n][i]:  # iterate through duty code in a day
                    if item in preliminary_list[n][i+2]: # if qod happens
                        day_count+=1
                        #possible_qod.append(item)  # add to possible_qod       
            qod.append(day_count)
        # 尋找最少 qod 的組合
        min_qod = min(qod)
        min_qod_index = [index for index,value in enumerate(qod) if value==min_qod]
    
    else:
        qod = []  # 各組 qod 的情況
        for n in range(len(preliminary_list)):
            # search for qod (value of location i == value of location i+2)
            list_temp = [preliminary_list[n][i] for i in range(DAYS-2) if preliminary_list[n][i]==preliminary_list[n][i+2]] 
            qod.append(list_temp)
        qod_pd = pd.DataFrame(qod)

        # 1. 找到 QOD 人次最少的組合
        # num of qods in each candidate
        num_of_qod = []
        for i in range(len(preliminary_list)):
            num_of_qod.append(qod_pd.iloc[i].notnull().sum())  # 非0個數 = qod 個數
        min_qod = min(num_of_qod)
        # create index of candidates with minimal qod days in total
        #eg [7376, 11732, 15383, 18130, 20990, 28528, 28785]
        min_qod_index = [index for index,value in enumerate(num_of_qod) if value==min_qod]
        
        
    # 2. 每個人分布的標準差 之間的標準差 最小化，取三個
    min_qod_list = []  
    for index in min_qod_index:
        min_qod_list.append([index, preliminary_list[index]])  # store index, list with the minimal qods
    
    # 由 CODE_LIST 內容依序提出資訊
    #error = 0
    list_location_std = []
    for i in range(len(min_qod_list)):  # how many items
        list_location = []
        std_value = []
        for code in CODE_LIST:
            list_location = [location for location,item in enumerate(min_qod_list[i][1]) if code in item]
            # min_qod_list = index, list with minimal qod value
            # 如果空白班，會有無法計算的問題，所以加上忽略
            # 填入每個人的 std value, to a list
            std_value.append(np.std(list_location,ddof=0))
            
            #try:
            #    std_value_temp = np.std(list_location,ddof=0)
            #    std_value.append(std_value_temp)
            #except:
            #    std_value.append(1000)
            #    error +=1
            #    print(error, code)
            #    print(list_location)
            #    print(min_qod_list[i])
                
                
        list_location_std.append([min_qod_list[i][1],
                                 min_qod_index[i],
                                 np.std(std_value, ddof=0)]) # form a list, of [list, location, std value]

    # 根據 std value (list_location_std[2]) 來排序
    # sorted_a = sorted(a, key=lambda x: x[1])
    list_location_std_sorted = sorted(list_location_std, key=lambda x:x[2]) 
    # list_location_std[2] is the std value

    
    # 最多取三個
    if len(list_location_std_sorted)>3:
        list_location_std_sorted=list_location_std_sorted[0:3]

    print(f'已完成 [type{duty_type},{TYPES_OF_DUTY[duty_type]}班] 最佳排班排序')
    #list_location_std_sorted.reverse()
    
    return list_location_std_sorted



### exporting data

In [229]:
def export_to_excel(first_choice, other_choice):
    print('正在開啟Excel並輸出排班資料...')
    wb = xw.Book()
    sht = wb.sheets[0]  


    # 輸出到 excel
    sht.range('A1').value = '建議班表'
    sht.range('A2').expand().value = first_choice
    sht.autofit()
    last_row = wb.sheets[0].range('A' + str(wb.sheets[0].cells.last_cell.row)).end('up').row
    sht.range('A'+str(last_row+2)).value = '其他排班建議'
    last_row = wb.sheets[0].range('A' + str(wb.sheets[0].cells.last_cell.row)).end('up').row
    sht.range('A'+str(last_row+1)).expand().value = other_choice

    now = datetime.datetime.now().strftime("%m%d%H%M%S")  # now time
    wb.save(f'排班資料_{now}.xlsx')

    print('儲存完成')


In [183]:
wb.sheets # 有哪些 sheet

Sheets([<Sheet [Book7]Sheet1>])

In [888]:
pd.DataFrame(first_choices_decompose)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
1,CR,35,45,33,35,42,44,41,31,45,...,54,31,43,45,35,43,34,35,31,32
2,ER,27,33,21,22,27,23,21,33,22,...,22,26,21,25,33,32,26,25,22,27
3,CT,25,27,12,24,22,14,11,23,27,...,11,24,12,21,24,11,15,22,27,17
4,MR,16,22,15,21,23,13,16,22,13,...,15,23,15,27,25,26,21,24,13,23


In [495]:
# load file
df = pd.read_excel('test.xlsm')

TYPES_OF_DUTY = {0:'Test', 1:'CT/MR', 3:'ER', 4:'CR', 5:'VS', 
                 6:'Other', 7:'Other', 8:'Other', 9:'Other'}  # no type 2

df, type_to_generate = data_cleansing(df)
violation, df_updated = is_violation(df,type_to_generate)

檢查輸入資料...
檢查輸入資料...OK


### Preparing for output

In [286]:
def formatting_for_output(optimized_list):
    """
    input: optimized list as a list
    output: list of first_choices_decompose, other_choices_decompose
    """
    print('正在調整格式資料格式...')
    output_list = copy.deepcopy(optimized_list)
    for i,item in enumerate(output_list):
        if item[0]!=1:
            output_list[i][0] = TYPES_OF_DUTY[item[0]]

    first_choices = [[item[0],item[1][0][0]] for item in output_list]
    other_choices = []
    for item in output_list:
        if len(item[1])>1:
            for i in range(1, len(item[1])):
                other_choices.append([item[0], item[1][i][0]])
    # 處理 duties 1, eg CT/MR, first item-> CT, second item -> MR


    for index, item in enumerate(first_choices):
        if item[0] == 1:
            CT = ['CT', [ct[0] for ct in item[1]]]
            MR = ['MR', [mr[1] for mr in item[1]]]
            #index_of_type1 = index
            first_choices[index] = MR
            first_choices.insert(index, CT)

    for index, item in enumerate(other_choices):
        if item[0] == 1:
            CT = ['CT', [ct[0] for ct in item[1]]]
            MR = ['MR', [mr[1] for mr in item[1]]]
            #index_of_type1 = index
            other_choices[index] = MR
            other_choices.insert(index, CT)

    first_choices_decompose=[]
    for item in first_choices:
        templist = []
        templist.append(item[0])
        templist.extend(data for data in item[1])
        first_choices_decompose.append(templist)
    other_choices_decompose=[]
    for item in other_choices:
        templist = []
        templist.append(item[0])
        templist.extend(data for data in item[1])
        other_choices_decompose.append(templist)
        
    # 加上日期
    first_choices_decompose.insert(0, ['']+[str(i) for i in range(1,len(first_choices_decompose[0]))])
    other_choices_decompose.insert(0, ['']+[str(i) for i in range(1,len(other_choices_decompose[0]))])

    return first_choices_decompose, other_choices_decompose


In [238]:


# combined

output_list = copy.deepcopy(optimized_list)
for i,item in enumerate(output_list):
    if item[0]!=1:
        output_list[i][0] = TYPES_OF_DUTY[item[0]]
        
first_choices = [[item[0],item[1][0][0]] for item in output_list]
other_choices = []
for item in output_list:
    if len(item[1])>1:
        for i in range(1, len(item[1])):
            other_choices.append([item[0], item[1][i][0]])
# 處理 duties 1, eg CT/MR, first item-> CT, second item -> MR


for index, item in enumerate(first_choices):
    if item[0] == 1:
        CT = ['CT', [ct[0] for ct in item[1]]]
        MR = ['MR', [mr[1] for mr in item[1]]]
        #index_of_type1 = index
        first_choices[index] = MR
        first_choices.insert(index, CT)

for index, item in enumerate(other_choices):
    if item[0] == 1:
        print(item)
        CT = ['CT', [ct[0] for ct in item[1]]]
        MR = ['MR', [mr[1] for mr in item[1]]]
        #index_of_type1 = index
        other_choices[index] = MR
        other_choices.insert(index, CT)
# 整理成 CR XXXX, ER XXXX, MR XXXX, CT XXXX
first_choices_decompose=[]
for item in first_choices:
    templist = []
    templist.append(item[0])
    templist.extend(data for data in item[1])
    first_choices_decompose.append(templist)
# 加上日期
first_choices_decompose.insert(0, ['']+[str(i) for i in range(1,len(first_choices_decompose[0]))])
    
other_choices_decompose=[]
for item in other_choices:
    templist = []
    templist.append(item[0])
    templist.extend(data for data in item[1])
    other_choices_decompose.append(templist)
other_choices_decompose.insert(0, ['']+[str(i) for i in range(1,len(other_choices_decompose[0]))])



[1, [['24', '27'], ['13', '25'], ['12', '15'], ['21', '23'], ['27', '24'], ['12', '13'], ['21', '22'], ['25', '27'], ['11', '17'], ['15', '16'], ['24', '13'], ['23', '26'], ['22', '21'], ['16', '21'], ['11', '15'], ['27', '25'], ['12', '15'], ['26', '17'], ['22', '14'], ['11', '16'], ['21', '12'], ['24', '13'], ['11', '16'], ['14', '27']]]
[1, [['21', '16'], ['22', '14'], ['12', '15'], ['25', '16'], ['23', '13'], ['11', '22'], ['12', '17'], ['23', '21'], ['26', '14'], ['17', '22'], ['15', '13'], ['27', '26'], ['25', '14'], ['16', '21'], ['11', '15'], ['14', '26'], ['12', '15'], ['27', '24'], ['26', '23'], ['17', '13'], ['12', '16'], ['15', '24'], ['14', '26'], ['11', '13']]]


In [214]:
#other_choices_decompose.insert(0, ['']+[str(i) for i in range(1,len(other_choices_decompose[0]))])
pd.DataFrame(other_choices_decompose)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
1,CR,C,B,C,D,C,B,A,B,D,...,D,C,A,D,B,C,A,B,D,A
2,CR,A,C,B,D,C,A,C,B,D,...,D,A,B,C,D,B,C,A,D,B


### Data cleansing

In [239]:
def data_cleansing(df):
    """
    input: df, whole data
    return: df after cleansing, define type_to_generate, sorted
    """
    #df=pd.DataFrame(df)
    #df[df['Unnamed: 2']==4]  # select Unnamed: 2 value ==4 
    # rename columns and index
    df = df.rename(columns={df.columns[0]:'Name',
                            df.columns[1]:'Code',
                            df.columns[2]:'Type',
                            df.columns[3]:'Holiday',
                            df.columns[4]:'Weekday'})
    df = df.rename(index={0:'Weekday_ch',1:'Weekday_num',2:'is_holiday'})
    # unselect unnamned coluns
    unselect_unnamed = [col for col in df if 'Unnamed' not in str(col)]  # 注意 有些還是 date type, 所以用 str
    df = df[unselect_unnamed]  

    # 有哪些班要運算，determine type_to_generate, [0-9, except 2]
    type_to_generate = sorted([int(i) for i in str(df['Name'].loc['is_holiday']) if i in '134567890'], reverse= True)
    df['Name'].loc['is_holiday'] = np.nan  # set value as NaN, not to interfere with count of hollidays


    # 將住院醫師簡碼 (iloc[4] and below)以下 code 轉為 str
    # 使用 loc 賦值不會出現  SettingWithCopyWarning: 
    # A value is trying to be set on a copy of a slice from a DataFrame 
    for i in range(4,len(df.index)):
        df.loc[i]['Code'] = str(df.loc[i]['Code'])

    # 將所有大寫轉為小寫
    lower_text = lambda item: str(item).lower() if type(item) ==str else item
    df.iloc[4:,5:len(df.columns)] = df.iloc[4:,5:len(df.columns)].applymap(lower_text)
        
        
    # rename date index from 1 to date
    # start from 5
    for i in range(5,len(df.columns)):
        df = df.rename(columns={df.columns[i]:str(i-4)}) 
    
    return df, type_to_generate

    ## ==== now the data has been clensed === ##



In [5]:
def is_violation(df, duty_type_array):
    """
    input = dataframe after clensing
    output = if there's no day violation
    print where is the violation
    True -> have violation
    False -> no violation
    """
    
    print('檢查輸入資料...')
    
    # 檢查欲執行項目是否為空白, 如果空白，則 raise assertion
    if duty_type_array == []:
        raise AssertionError('請輸入要執行的班別，再執行程式')
    
        
    # TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR', 5:'VS', 6:'Other', 7:'Other', 8:'Other', 9:'Other', 0:'Test'}
    is_violation = False
    days_in_month = df.loc['Weekday_num'].notnull().sum()
    num_of_holiday = df.loc['is_holiday'].notnull().sum()
    num_of_weekday = days_in_month - num_of_holiday
    
    # iterate through every duty types
    for duty_type in duty_type_array:
        # 檢查每一個要 run 的值班類別班數和是否正確
        # test for holiday
        # Weekday_num 非零的欄位代表當月日數

        # if CT/MR, type1 -> doubles the days of duties
        num_of_holiday_duty = num_of_holiday *2 if duty_type ==1 else num_of_holiday
        num_of_weekday_duty = num_of_weekday *2 if duty_type ==1 else num_of_weekday

        # 假日及平日值班數
        num_of_duties_h = df[df['Type']==duty_type]['Holiday'].sum()
        num_of_duties_w = df[df['Type']==duty_type]['Weekday'].sum()


        # 檢查值班總數是否不足， report error message and violation
        if num_of_duties_h < num_of_holiday_duty:
            print(f'{TYPES_OF_DUTY[duty_type]} 班假日值班總數不足，缺少{num_of_holiday_duty-num_of_duties_h}班')
            is_violation = True
        if num_of_duties_w < num_of_weekday_duty:
            print(f'{TYPES_OF_DUTY[duty_type]} 班平日值班總數不足，缺少{num_of_weekday_duty-num_of_duties_w}班')
            is_violation = True
            
        # 預約值班前後兩天無法值班，避免 qd，並 update 新表，以利接下來亂數產生
        # 4 to len(df.index) 會指到最後一項列
        # 5 to len(df.columns)-1 會指到最後一欄
        # 處理第一欄
        for i in range(4,len(df.index)):
            if df.iloc[i,5] == 1:
                df.iloc[i,6]='x'
        # 處理中間欄
        for i in range(4,len(df.index)):
            for j in range(6,len(df.columns)-1):
                if df.iloc[i,j]==1:
                    df.iloc[i,(j+1)]='x'
                    df.iloc[i,(j-1)]='x'
        # 處理最後一欄
        for i in range(4,len(df.index)):
            if df.iloc[i,len(df.columns)-1] == 1:
                df.iloc[i,len(df.columns)-2]='x'
        
            
        # 檢查是否有某日所有人都無法值班
        # iterate from '1' to 'end'
        for i in range(1,days_in_month+1):
            # any repeated reservation 
            # 已經在 data_cleasing 中間將 大寫轉為小寫了
            # 符合的 duty type 中 5至end處的值，有多少x or X
            # 使用 map 
            #lower_text = lambda item: str(item).lower()
            # num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(i)].map(lower_text)=='x').sum()
            num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(i)]=='x').sum()

            # 該班 R 人數
            num_of_r = len(df[df['Type']==duty_type].index)
            if num_of_exclude >= num_of_r:
                print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號所有人均無法值班')
                is_violation = True       
        
        # 檢查是否有某日有兩個以上的人預約要值班
        # type 1 duty 同時兩個人值班
        if duty_type==1:
            # iterate from '1' to 'end'
            for i in range(1,days_in_month+1):
                # any repeated reservation 
                # 符合的 duty type 中 5至end處的值，是1的有多少個
                num_of_reservation = (df[df['Type']==duty_type].iloc[:,5:][str(i)]==1).sum()
                if num_of_reservation>2:
                    print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號有超過2人預約要值班')
                    is_violation = True
        else:
            # iterate from '1' to 'end'
            for i in range(1,days_in_month+1):
                # any repeated reservation 
                # 符合的 duty type 中 5至end處的值，是1的有多少個
                num_of_reservation = (df[df['Type']==duty_type].iloc[:,5:][str(i)]==1).sum()
                if num_of_reservation>1:
                    print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號有超過1人預約要值班')
                    is_violation = True
                
    if is_violation == False:
        print('檢查輸入資料...OK')
    else: 
        print('請修正以上資料後再執行程式')
        
    return is_violation, df

    

        

In [551]:

violation, df_updated = is_violation(df,type_to_generate)

檢查輸入資料...
31 8 23
CT/MR 班假日值班總數不足，缺少2班
CT/MR 班平日值班總數不足，缺少18班
請修正以上資料後再執行程式


### Construct the list by condition

In [496]:
# preliminary_list = preliminary_gen(df_updated, 3)
start = time.time()
preliminary_list = preliminary_gen2(df_updated, 3, 40000)
print(time.time()-start)

正在建立 [type3,ER班] 初步清單...
>----------- 8.3%
=>---------- 16.7%
==>--------- 25.0%
===>-------- 33.3%
====>------- 41.7%
=====>------ 50.0%
[type3,ER班] 初步清單已建立完成
11.574233770370483


### finding the best solution

In [880]:
# load file
df = pd.read_excel('test.xlsm')

# define types of duties
TYPES_OF_DUTY = {0:'Test', 1:'CT/MR', 3:'ER', 4:'CR', 5:'VS', 
                 6:'Other6', 7:'Other7', 8:'Other8', 9:'Other9'}  # no type 2

# cleanse the data, determine if there is violation
df, type_to_generate = data_cleansing(df)
violation, df_updated = is_violation(df,type_to_generate)

for duty_type in type_to_generate:
    preliminary_list = preliminary_gen3(df_updated, duty_type, 100)

檢查輸入資料...
檢查輸入資料...OK
正在建立 [type1,CT/MR班] 初步清單...
>----------- 10.0%
=>---------- 18.0%
==>--------- 26.0%
===>-------- 34.0%
====>------- 42.0%
=====>------ 50.0%
[type1,CT/MR班] 初步清單已建立完成
{'21': 0, '22': 0, '23': 0, '24': 0, '25': 0, '26': 0, '27': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 0}
{'21': 0, '22': 0, '23': 0, '24': 0, '25': 0, '26': 0, '27': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 0}


In [881]:
flag = True
for i in range(len(preliminary_list)-1):
    if sorted(preliminary_list[i])!=sorted(preliminary_list[i+1]):
        flag = False
print(flag)

False


In [882]:
flag = True
for ix in preliminary_list:
    if '12' not in ix[2]:
        flag = False
    if '16' not in ix[13]:
        flag = False
    if '11' not in ix[14]:
        flag = False
    if '12' not in ix[16]:
        flag = False
    if '12' not in ix[24]:
        flag = False
print(flag)
    

True


In [883]:
# check for type 1
flag = True

listb = []
for i in range(len(preliminary_list)-1):
    if sorted([itm for item in preliminary_list[i] for itm in item]) != sorted([itm for item in preliminary_list[i+1] for itm in item]):
        flag= False
print(flag)

True


In [885]:
# check for type1
sor = [itm for item in preliminary_list[1] for itm in item]
print(pd.DataFrame(sor).apply(pd.value_counts))
#sor = [itm for item in preliminary_list[13] for itm in item]
#print(pd.DataFrame(sor).apply(pd.value_counts))

    0
11  7
13  7
16  7
15  7
12  7
17  7
14  7
24  2
22  2
21  2
25  2
23  2
26  2
27  1


In [886]:
def preliminary_gen3(df_updated, duty_type, count_start):
    """
    input: df_updated or df
    generate: preliminary_list

    """
    def remove_when_not_available(added_items):
        if type(added_items) == str:
            if TODAY_IS_HOLIDAY == True:
                num_holiday_gen[add_item]-=1
                if num_holiday_gen[add_item] == 0: # if == 0, remove item
                    for holiday in LIST_OF_HOLIDAY:
                        if add_item in available_code_gen[holiday]:
                            #if add_item not in reservation_dict[holiday]: # 不必 check，因為總數可以check
                            available_code_gen[holiday].remove(add_item)
            else:
                num_weekday_gen[add_item]-=1
                if num_weekday_gen[add_item] == 0:
                    for weekday in LIST_OF_WEEKDAY:
                        if add_item in available_code_gen[weekday]:
                            #if add_item not in reservation_dict[weekday]:
                            available_code_gen[weekday].remove(add_item)
        elif type(added_items) == list:
            for itm in added_items:
                if TODAY_IS_HOLIDAY == True:
                    num_holiday_gen[itm]-=1
                    if num_holiday_gen[itm] == 0:   
                        for holiday in LIST_OF_HOLIDAY:
                            if itm in available_code_gen[holiday]:
                                #if itm not in reservation_dict[holiday]:
                                available_code_gen[holiday].remove(itm)
                else:
                    num_weekday_gen[itm]-=1
                    if num_weekday_gen[itm] == 0:
                        for weekday in LIST_OF_WEEKDAY:
                            if itm in available_code_gen[weekday]:
                                #if itm not in reservation_dict[weekday]:
                                available_code_gen[weekday].remove(itm)
            
    #sys_random = random.SystemRandom()
    # TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR', 5:'VS', 6:'Other', 7:'Other', 8:'Other', 9:'Other', 0:'Test'}

    
    print(f'正在建立 [type{duty_type},{TYPES_OF_DUTY[duty_type]}班] 初步清單...')

    # for check of holiday
    IS_HOLIDAY = df_updated.iloc[2,5:].tolist()  # list of holiday 'v' [nan, 'v'...]
    HOLIDAY_CHECK = {}
    for index,item in enumerate(IS_HOLIDAY):
        if str(item).lower() == 'v':
            HOLIDAY_CHECK[str(index+1)] = True
        else:
            HOLIDAY_CHECK[str(index+1)] = False
    LIST_OF_HOLIDAY = [key for key,item in HOLIDAY_CHECK.items() if item==True]
    LIST_OF_WEEKDAY = [key for key,item in HOLIDAY_CHECK.items() if item==False]
    
    DAYS = len(IS_HOLIDAY) # 這個月有幾天, eg 30
    DAY_LIST = [str(i+1) for i in range(DAYS)] # 這個月的號碼 eg ['1'...'28']
    df_work = df_updated[df['Type']==duty_type]

    # 建立 int day, str day 對照表
    # combinding 2 dictionaries: z = {**x, **y}, {1:'1', '1':1}
    DAY_TABLE = {**{(i+1):str(i+1) for i in range(DAYS)}, **{str(i+1):(i+1) for i in range(DAYS)}}

    # 在 duty_type 下，的住院醫師的 code
    CODE_LIST = df_work['Code'].tolist() # code list, ['31','32']

    # 每個住院醫師有幾個假日/平日班
    num_holiday = {}
    num_weekday = {}
    available_code ={}  # dictionary

    # how many holiday/weekday for each resident
    for code in CODE_LIST:
        num_holiday[code] = df_work[df_work['Code']==code]['Holiday'].item()
        num_weekday[code] = df_work[df_work['Code']==code]['Weekday'].item()
    
    # construct available days:
    # process 預約不值班
    # note: 預約值班的前後已在 is_violation 裡面標記 'x'，所以這裡就可以直接從 available code list 裡面去掉該員，不會遺漏
    for day in DAY_LIST:
        available_code[day]= copy.deepcopy(CODE_LIST)  # 一定要使用完整拷貝，不然會變成參照，後面會全部都錯誤
        for code in CODE_LIST:
            if df_work[df_work['Code']==code][day].item()=='x':  # 如果預約不值班 == 'x'，則從 available 中移除
                available_code[day].remove(code)

    # process 如果該員沒有假日班/平日班，則從 avaliable code中間移除
    for code in CODE_LIST:
        if num_holiday[code] == 0:
            for hday in LIST_OF_HOLIDAY:
                if code in available_code[hday]:
                    available_code[hday].remove(code)
        if num_weekday[code] == 0:
            for wday in LIST_OF_WEEKDAY:
                if code in available_code[wday]:
                    available_code[wday].remove(code)
                
                
    # process 預約值班
    reservation_dict = {day:[] for day in DAY_LIST}  
    # 一天1人值班：
    if duty_type !=1:
        for day in DAY_LIST:
            for code in CODE_LIST:
                if df_work[df_work['Code']==code][day].item()==1: # 如果預約值班，則移除其他
                    available_code[day]=[code]
                    reservation_dict[day].append(code)
    else:
    # type1 duty, 一天2人值班，建立 reservation_dict 讓之後程式抓取：
        # reference for reservation numbers in the date
        #'3':2 -> 2 people want duty at 3rd, already cleanse condition>2
        for day in DAY_LIST:
            # reservation_dict[day] = 0  # assign value, 
            for code in CODE_LIST:
                if df_work[df_work['Code']==code][day].item()==1: # 如果預約值班，則增加到 reservation dict
                    reservation_dict[day].append(code)
    
    # 產生 count_start個 符合所有排班規則的 candidate
    preliminary_list = []

    count = count_start
    
    # for progress bar
    total_step = 12  # set 12 intervals
    interval = int(count_start/total_step) 
    progress = [i*interval for i in range(1,total_step+1)]
    
    
    while count >0:  # generate till count = count_start candidates
        stopper = False  # 加速脫離迴圈
        #progress bar
        if progress !=[]:
            if (count_start-count-1)>progress[0]:
                del progress[0]
                prefix = '='*(total_step-len(progress)-1) + '>'
                prefix = "{:-<12}".format(prefix)
                print("{s} {r:0.1%}".format(s=prefix,r=(1-count/count_start)))

        candidate_list = []
        available_code_gen = copy.deepcopy(available_code)  # not alter original list
        num_holiday_gen = copy.deepcopy(num_holiday)
        num_weekday_gen = copy.deepcopy(num_weekday)
        
        for day in DAY_LIST:
        #for day in DAY_LIST_SORTED:
            if stopper == True: # 加速脫離迴圈
                break
            TODAY_IS_HOLIDAY = HOLIDAY_CHECK[day]                
            day_next = str(int(day)+1)  # next day in string
            day_previous = str(int(day)-1)
            
            # 如果今天已經沒有可以用的天數
            for code in CODE_LIST:
            #for code in available_code_gen[day]:
                if TODAY_IS_HOLIDAY == True:
                    #if num_holiday_gen[code] <= 0:
                    if num_holiday_gen[code] == 0:
                        if code in available_code_gen[day]:
                            available_code_gen[day].remove(code)
                    if num_holiday_gen[code] < 0:
                        stopper =True
                        break
                else:
                    #if num_weekday_gen[code] <= 0:
                    if num_weekday_gen[code] == 0:
                        if code in available_code_gen[day]:
                            available_code_gen[day].remove(code)
                    if num_weekday_gen[code] <0:
                        stopper = True
                        break
            if stopper == True: # 加速脫離迴圈
                break            
            
                    
            # type 1 duty
            if duty_type == 1:
                # type 1 要 check reservation dict
                if len(available_code_gen[day]) <2: # not enough item can be choosed
                    stopper ==True
                    #print('stopped', day)
                    break
                else:
                    if len(reservation_dict[day]) ==0:
                        # 如果沒有人預約值班，那亂數選兩個人
                        add_item = random.sample(available_code_gen[day],2)  # sample 2 in type 1 duty
                    elif len(reservation_dict[day]) == 1: 
                        # 如果只有一人預約值班，那先選他，從available code list 中移除掉，之後再亂數
                        add_item = reservation_dict[day] # 先指定 eg. ['31']
                        try:
                            available_code_gen[day].remove(add_item[0]) # remove the first item
                        except:
                            stopper=True
                            break
                        add_item.append(random.choice(available_code_gen[day])) # 一開始有 check len>=2
                    elif len(reservation_dict[day]) == 2:  
                        # 如果兩人預約這天值班，那就都給他們
                        add_item = reservation_dict[day] # add item 即是這兩個
                    candidate_list.append(add_item)  # add ['X','Y'] to candidiate list  
                    
                    remove_when_not_available(add_item)    
                    
                
                    if (DAY_TABLE[day]+1)> DAYS:
                        # 到最後一天的話，就不用移除了
                        break
                    else:
                        # 如果不是最後一天，則依序移除
                        for itm in add_item:
                            if itm in available_code_gen[day_next]:
                                available_code_gen[day_next].remove(itm)
                #if stopper == True:
                #    break
            else:
                # other types ofduty
                if available_code_gen[day] == []: # no item can be choosed
                    #print(day)
                    stopper = True
                    break
                else:
                    add_item = random.choice(available_code_gen[day])   # choice is faster than sample 1[0]
                    candidate_list.append(add_item)
                    remove_when_not_available(add_item)
                    
                    if int(day_next)<=DAYS and add_item in available_code_gen[day_next]:
                        available_code_gen[day_next].remove(add_item)
            if stopper == True:
                break

        
        if len(candidate_list)==DAYS:  # 其實不需要這句，因為都篩選到了最後一天，但速度幾乎無差別
            preliminary_list.append(candidate_list)
            #print(candidate_list)
            count-=1 

    print(f'[type{duty_type},{TYPES_OF_DUTY[duty_type]}班] 初步清單已建立完成')
    print(num_holiday_gen)
    print(num_weekday_gen)
    return preliminary_list
    # create a list of all candidates, return preliminary_list




In [560]:
def preliminary_gen2(df_updated, duty_type, count_start):
    """
    input: df_updated or df
    generate: preliminary_list

    """
    def remove_when_not_available(added_items):
        if type(added_items) == str:
            if TODAY_IS_HOLIDAY == True:
                num_holiday_gen[add_item]-=1
                if num_holiday_gen[add_item] <= 0:  # 可能本來就是0
                    for holiday in LIST_OF_HOLIDAY:
                        if add_item in available_code_gen[holiday]:
                            if add_item not in reservation_dict[holiday]:
                                available_code_gen[holiday].remove(add_item)
            else:
                num_weekday_gen[add_item]-=1
                if num_weekday_gen[add_item] <= 0:
                    for weekday in LIST_OF_WEEKDAY:
                        if add_item in available_code_gen[weekday]:
                            if add_item not in reservation_dict[weekday]:
                                available_code_gen[weekday].remove(add_item)
        elif type(added_items) == list:
            for itm in added_items:
                if TODAY_IS_HOLIDAY == True:
                    num_holiday_gen[itm]-=1
                    if num_holiday_gen[itm] <= 0:  # 可能本來就是0
                        for holiday in LIST_OF_HOLIDAY:
                            if itm in available_code_gen[holiday]:
                                if itm not in reservation_dict[holiday]:
                                    available_code_gen[holiday].remove(itm)
                else:
                    num_weekday_gen[itm]-=1
                    if num_weekday_gen[itm] <= 0:
                        for weekday in LIST_OF_WEEKDAY:
                            if itm in available_code_gen[weekday]:
                                if itm not in reservation_dict[weekday]:
                                    available_code_gen[weekday].remove(itm)
            
    #sys_random = random.SystemRandom()
    # TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR', 5:'VS', 6:'Other', 7:'Other', 8:'Other', 9:'Other', 0:'Test'}

    
    print(f'正在建立 [type{duty_type},{TYPES_OF_DUTY[duty_type]}班] 初步清單...')

    # for check of holiday
    IS_HOLIDAY = df_updated.iloc[2,5:].tolist()  # list of holiday 'v' [nan, 'v'...]
    HOLIDAY_CHECK = {}
    for index,item in enumerate(IS_HOLIDAY):
        if item == 'v':
            HOLIDAY_CHECK[str(index+1)] = True
        else:
            HOLIDAY_CHECK[str(index+1)] = False
    LIST_OF_HOLIDAY = [key for key,item in HOLIDAY_CHECK.items() if item==True]
    LIST_OF_WEEKDAY = [key for key,item in HOLIDAY_CHECK.items() if item==False]
    DAYS = len(IS_HOLIDAY) # 這個月有幾天, eg 30
    DAY_LIST = [str(i+1) for i in range(DAYS)] # 這個月的號碼 eg ['1'...'28']
    df_work = df_updated[df['Type']==duty_type]

    # 建立 int day, str day 對照表
    # combinding 2 dictionaries: z = {**x, **y}, {1:'1', '1':1}
    DAY_TABLE = {**{(i+1):str(i+1) for i in range(DAYS)}, **{str(i+1):(i+1) for i in range(DAYS)}}

    # 在 duty_type 下，的住院醫師的 code
    CODE_LIST = df_work['Code'].tolist() # code list, ['31','32']
    print(CODE_LIST)
    # 每個住院醫師有幾個假日/平日班
    num_holiday = {}
    num_weekday = {}
    available_code ={}  # dictionary

    # how many holiday/weekday for each resident
    for code in CODE_LIST:
        num_holiday[code] = df_work[df_work['Code']==code]['Holiday'].item()
        num_weekday[code] = df_work[df_work['Code']==code]['Weekday'].item()
    print(num_holiday)
    print(num_weekday)
    
    # construct available days:
    # process 預約不值班
    # note: 預約值班的前後已在 is_violation 裡面標記 'x'，所以這裡就可以直接從 available code list 裡面去掉該員，不會遺漏
    for day in DAY_LIST:
        available_code[day]= copy.deepcopy(CODE_LIST)  # 一定要使用完整拷貝，不然會變成參照，後面會全部都錯誤
        for code in CODE_LIST:
            if df_work[df_work['Code']==code][day].item()=='x':  # 如果預約不值班 == 'x'，則從 available 中移除
                available_code[day].remove(code)

    # process 如果該員沒有假日班/平日班，則從 avaliable code中間移除
    for code in CODE_LIST:
        if num_holiday[code] == 0:
            for hday in LIST_OF_HOLIDAY:
                if code in available_code[hday]:
                    available_code[hday].remove(code)
        if num_weekday[code] == 0:
            for wday in LIST_OF_WEEKDAY:
                if code in available_code[wday]:
                    available_code[wday].remove(code)
                
    # process 預約值班
    reservation_dict = {day:[] for day in DAY_LIST}  
    # 一天1人值班：
    if duty_type !=1:
        for day in DAY_LIST:
            for code in CODE_LIST:
                if df_work[df_work['Code']==code][day].item()==1: # 如果預約值班，則移除其他
                    available_code[day]=[code]
                    reservation_dict[day].append(code)
    else:
    # type1 duty, 一天2人值班，建立 reservation_dict 讓之後程式抓取：
        # reference for reservation numbers in the date
        #'3':2 -> 2 people want duty at 3rd, already cleanse condition>2
        for day in DAY_LIST:
            # reservation_dict[day] = 0  # assign value, 
            for code in CODE_LIST:
                if df_work[df_work['Code']==code][day].item()==1: # 如果預約值班，則增加到 reservation dict
                    reservation_dict[day].append(code)
                
    # available code 排序
    #available_code_sorted = sorted(available_code.items(), key = lambda x:len(x[1]))
    #DAY_LIST_SORTED = [item[0] for item in available_code_sorted]
    #print(available_code)
    #print(DAY_LIST_SORTED)


    # 產生 count_start個 符合所有排班規則的 candidate
    preliminary_list = []

    #count_start = 50000
    count = count_start
    
    # for progress bar
    total_step = 12  # set 12 intervals
    interval = int(count_start/total_step) 
    progress = [i*interval for i in range(1,total_step+1)]
    

    while count >0:  # generate till count = count_start candidates
        #progress bar
        if progress !=[]:
            if (count_start-count-1)>progress[0]:
                del progress[0]
                prefix = '='*(total_step-len(progress)-1) + '>'
                prefix = "{:-<12}".format(prefix)
                print("{s} {r:0.1%}".format(s=prefix,r=(1-count/count_start)))

        candidate_list = []
        available_code_gen = copy.deepcopy(available_code)  # not alter original list
        num_holiday_gen = copy.deepcopy(num_holiday)
        num_weekday_gen = copy.deepcopy(num_weekday)
        
        for day in DAY_LIST:
        #for day in DAY_LIST_SORTED:
            TODAY_IS_HOLIDAY = HOLIDAY_CHECK[day]                
            day_next = str(int(day)+1)  # next day in string
            day_previous = str(int(day)-1)
            # type 1 duty
            if duty_type == 1:
                # type 1 要 check reservation dict
                if len(available_code_gen[day]) <2 : # not enough item can be choosed
                    break
                else:
                    if len(reservation_dict[day]) ==0:
                        # 如果沒有人預約值班，那亂數選兩個人
                        add_item = random.sample(available_code_gen[day],2)  # sample 2 in type 1 duty
                    elif len(reservation_dict[day]) == 1: 
                        # 如果只有一人預約值班，那先選他，從available code list 中移除掉，之後再亂數
                        add_item = reservation_dict[day] # 先指定 eg. ['31']
                        available_code_gen[day].remove(add_item[0]) # remove the first item
                        add_item.append(random.choice(available_code_gen[day])) # 一開始有 check len>=2
                    elif len(reservation_dict[day]) == 2:  
                        # 如果兩人預約這天值班，那就都給他們
                        add_item = reservation_dict[day] # add item 即是這兩個
                    candidate_list.append(add_item)  # add ['X','Y'] to candidiate list   
                    remove_when_not_available(add_item)    
                    
                    if (DAY_TABLE[day]+1)> DAYS:
                        # 到最後一天的話，就不用移除了
                        break
                    else:
                        # 如果不是最後一天，則依序移除
                        for itm in add_item:
                            if itm in available_code_gen[day_next]:
                                available_code_gen[day_next].remove(itm)

            else:
                # other types ofduty
                if available_code_gen[day] == []: # no item can be choosed
                    #print(day)
                    break
                else:
                    add_item = random.choice(available_code_gen[day])   # choice is faster than sample 1[0]
                    candidate_list.append(add_item)
                    remove_when_not_available(add_item)
                    
                    ### 要加如果見底，那就之後都不 available 
                    #if TODAY_IS_HOLIDAY == True:
                    #    num_holiday_gen[add_item]-=1
                    #    if num_holiday_gen[add_item] <= 0:  # 可能本來就是0
                    #        for holiday in LIST_OF_HOLIDAY:
                    #            if add_item in available_code_gen[holiday]:
                    #                available_code_gen[holiday].remove(add_item)
                    #else:
                    #    num_weekday_gen[add_item]-=1
                    #    if num_weekday_gen[add_item] <= 0:
                    #        for weekday in LIST_OF_WEEKDAY:
                    #            if add_item in available_code_gen[weekday]:
                    #                available_code_gen[weekday].remove(add_item)
                    
                    if int(day_next)<=DAYS and add_item in available_code_gen[day_next]:
                        available_code_gen[day_next].remove(add_item)
                    #if int(day_previous)>0 and add_item in available_code_gen[day_previous]:
                    #    available_code_gen[day_previous].remove(add_item)
                    # 如果到最後一天，那就結束
                    
                    #if (DAY_TABLE[day]+1)> DAYS:
                    #    break
                    #else:
                    #    # 如果沒有到最後一天，那在隔天移除這次加的項目
                    #    if add_item in available_code_gen[day_next]:
                    #        available_code_gen[day_next].remove(add_item)
        check_violation1 = [value for key,value in num_weekday_gen.items() if value<0]
        check_violation2 = [value for key,value in num_holiday_gen.items() if value<0]
        if len(check_violation2)>0 or len(check_violation1)>0:
            continue
        
        if len(candidate_list)==DAYS:  # 其實不需要這句，因為都篩選到了最後一天，但速度幾乎無差別
            preliminary_list.append(candidate_list)
            #print(candidate_list)
            count-=1 

    print(f'[type{duty_type},{TYPES_OF_DUTY[duty_type]}班] 初步清單已建立完成')
    print(num_holiday_gen)
    print(num_weekday_gen)
    return preliminary_list
    # create a list of all candidates, return preliminary_list



In [7]:
def preliminary_gen(df_updated, duty_type):
    """
    input: df_updated or df
    generate: preliminary_list

    """
    # TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR', 5:'VS', 6:'Other', 7:'Other', 8:'Other', 9:'Other', 0:'Test'}

    
    print(f'正在建立 [type{duty_type},{TYPES_OF_DUTY[duty_type]}班] 初步清單...')

    IS_HOLIDAY = df_updated.iloc[2,5:].tolist()  # list of holiday 'v' [nan, 'v'...]
    DAYS = len(IS_HOLIDAY) # 這個月有幾天, eg 30
    DAY_LIST = [str(i+1) for i in range(DAYS)] # 這個月的號碼 eg ['1'...'28']
    df_work = df_updated[df['Type']==duty_type]

    # combinding 2 dictionaries: z = {**x, **y}, {1:'1', '1':1}
    DAY_TABLE = {**{(i+1):str(i+1) for i in range(DAYS)}, **{str(i+1):(i+1) for i in range(DAYS)}}

    CODE_LIST = df_work['Code'].tolist() # code list, ['31','32']
    num_holiday = {}
    num_weekday = {}
    available_code ={}

    # how many holiday/weekday for each resident
    for code in CODE_LIST:
        num_holiday[code] = df_work[df_work['Code']==code]['Holiday'].item()
        num_weekday[code] = df_work[df_work['Code']==code]['Weekday'].item()

    # construct available days:
    # excluding 預約不值班
    for day in DAY_LIST:
        available_code[day]= CODE_LIST[:]  # 一定要使用完整拷貝，不然會變成參照，後面會全部都錯誤
        for code in CODE_LIST:
            if df_work[df_work['Code']==code][day].item()=='x':  # 如果預約不值班 == 'x'，則從 available 中移除
                available_code[day].remove(code)
    # including 預約值班
    for day in DAY_LIST:
        for code in CODE_LIST:
            if df_work[df_work['Code']==code][day].item()==1: # 如果預約值班，則移除其他
                available_code[day]=[code]
                

    # 產生 count_start個 符合所有排班規則的 candidate
    preliminary_list = []

    count_start = 20000
    count = count_start
    
    # for progress bar
    total_step = 12  # set 12 intervals
    interval = int(count_start/total_step) 
    progress = [i*interval for i in range(1,total_step+1)]
    
    while count >0:  # generate till count = 30000 candidates
        # progress bar
        if progress !=[]:
            if (count_start-count-1)>progress[0]:
                del progress[0]
                prefix = '='*(total_step-len(progress)-1) + '>'
                prefix = "{:-<12}".format(prefix)
                #print("{s} {r:0.1f}%".format(s=prefix,r=100*(1-count/count_start)))
                print("{s} {r:0.1%}".format(s=prefix,r=(1-count/count_start)))
            
        candidate_list = []
        available_code_gen = copy.deepcopy(available_code)  # not alter original list
        for day in DAY_LIST:
            if available_code_gen[day] == []: # no item can be choosed
                # print('break at', day)
                break
            else:
                add_item = random.sample(available_code_gen[day],1)[0]
                candidate_list.append(add_item)
                if (DAY_TABLE[day]+1)> DAYS:
                    break
                try:
                    available_code_gen[DAY_TABLE[DAY_TABLE[day]+1]].remove(add_item)
                except:  # error if at the last day or no the item is no in the next day
                    pass
        if len(candidate_list)==DAYS:  # 如果得出來的，其實不需要這句，因為都篩選到了，但速度幾乎無差別
            preliminary_list.append(candidate_list)
            count-=1 
    
    print(f'[type{duty_type},{TYPES_OF_DUTY[duty_type]}班] 初步清單已建立完成')
    return preliminary_list
    # create a list of all candidates, return preliminary_list


### optimizing the list

### optimize type 1 duty

In [619]:
# load file
df = pd.read_excel('test.xlsm')

TYPES_OF_DUTY = {0:'Test', 1:'CT/MR', 3:'ER', 4:'CR', 5:'VS', 
                 6:'Other', 7:'Other', 8:'Other', 9:'Other'}  # no type 2

df, type_to_generate = data_cleansing(df)
violation, df_updated = is_violation(df,type_to_generate)

檢查輸入資料...
檢查輸入資料...OK


In [613]:


preliminary_list = preliminary_gen2(df_updated, 1, 20000)

正在建立 [type1,CT/MR班] 初步清單...
>----------- 8.3%
=>---------- 16.7%
==>--------- 25.0%
===>-------- 33.3%
====>------- 41.7%
=====>------ 50.0%
[type1,CT/MR班] 初步清單已建立完成


In [614]:
# 8236 
# in method 17.297198057174683
# == method 17.78391122817993  8236
start = time.time()
list_location_std_sorted = optimization2(preliminary_list, df_updated, 1)
print(time.time()-start)

正在尋找 [type1,CT/MR班] 最佳排班...
已完成 [type1,CT/MR班] 最佳排班排序
0.22481298446655273


In [617]:
list_location_std_sorted[0]

[[['13', '14'],
  ['12', '16'],
  ['14', '11'],
  ['15', '13'],
  ['17', '12'],
  ['11', '16'],
  ['13', '14'],
  ['12', '16'],
  ['15', '11'],
  ['14', '13'],
  ['11', '16'],
  ['12', '15'],
  ['17', '14'],
  ['11', '12'],
  ['17', '15'],
  ['13', '16'],
  ['11', '12'],
  ['15', '11'],
  ['16', '14'],
  ['17', '13'],
  ['12', '15'],
  ['16', '14'],
  ['11', '12'],
  ['17', '14'],
  ['13', '16'],
  ['11', '15'],
  ['13', '17'],
  ['16', '14'],
  ['11', '12'],
  ['17', '16'],
  ['11', '15']],
 6513,
 0.42111990576074576]

In [252]:
def optimization_singular(preliminary_list, df_updated, duty_type):
    """
    input: preliminary_list
    output: list_location_std_sorted, after optimization
    """
    # Optimizing the list
    # 1. minimize the total days of QOD in everyone (如果只選標準差多少人不夠)
    # find min() of days  -> 這幾乎是最好的了，因為幾乎<3，所以 2 不需要

    # 2. minimize standard deviation of days of QOD among others
    #np.array([2,2,2,1]).std(ddof=0)
    # 計算個人值班分散程度（標準差） 的標準差，依照順序排列 （大家分散程度要差不多）
    # 在這個情況下，不可能大家同時標準差都很高，導致標準差的標準差值很小


    TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR', 5:'VS', 6:'Other', 7:'Other', 8:'Other', 9:'Other', 0:'Test'}
    DAYS = len(preliminary_list[0])  # 這個月有幾天
    df_work = df_updated[df['Type']==duty_type]
    CODE_LIST = df_work['Code'].tolist() # code list, ['31','32']
    
    
    print(f'正在尋找 [type{duty_type},{TYPES_OF_DUTY[duty_type]}班] 最佳排班...')
    
    

    qod = []  # 各組 qod 的情況
    for n in range(len(preliminary_list)):
        # search for qod (value of location i == value of location i+2)
        list_temp = [preliminary_list[n][i] for i in range(DAYS-2) if preliminary_list[n][i]==preliminary_list[n][i+2]] 
        qod.append(list_temp)
    qod_pd = pd.DataFrame(qod)

    # 1. 找到 QOD 人次最少的組合
    # num of qods in each candidate
    num_of_qod = []
    for i in range(len(preliminary_list)):
        num_of_qod.append(qod_pd.iloc[i].notnull().sum())  # 非0個數 = qod 個數
    min_qod = min(num_of_qod)
    # create index of candidates with minimal qod days in total
    #eg [7376, 11732, 15383, 18130, 20990, 28528, 28785]
    min_qod_index = [index for index,value in enumerate(num_of_qod) if value==min_qod]


    # 2. 每個人分布的標準差 之間的標準差 最小化，取三個
    # 最小 qod 的 candidiate
    min_qod_list = []  
    for index in min_qod_index:
        min_qod_list.append(preliminary_list[index])

    # 由 CODE_LIST 內容依序提出資訊
    list_location_std = []
    for i in range(len(min_qod_list)):
        std_temp = []
        for code in CODE_LIST:
            # location of each code
            temp_list = [location for location,item in enumerate(min_qod_list[i]) if item == code]
            # print(code, temp_list, np.std(temp_list))
            try:
                std_temp.append(np.std(temp_list,ddof=0))
            except:
                for item in temp_list:
                    print(item)
                
        list_location_std.append([min_qod_list[i],
                         min_qod_index[i],
                         np.std(std_temp,ddof=0)])  # the list, location, std value

    # 根據 std value (list_location_std[2]) 來排序
    # sorted_a = sorted(a, key=lambda x: x[1])
    list_location_std_sorted = sorted(list_location_std, key=lambda x:x[2]) # list_location_std[2] is the std value

    # 最多取三個
    if len(list_location_std_sorted)>3:
        list_location_std_sorted=list_location_std_sorted[0:3]

    print(f'已完成 [type{duty_type},{TYPES_OF_DUTY[duty_type]}班] 最佳排班排序')
    
    return list_location_std_sorted


### progress bar

In [193]:
total = 60000
interval = 5000
total_step = 12 # int(total/interval)
progress = [int(total/12*i) for i in range(1,12+1)]
progress

prefix = '-'*(1+total_step-len(progress)) + '>'
prefix = "{:-<12}".format(prefix)

print("{s} {r:0.1f}%".format(s=prefix,r=100*progress[9]/total))



->---------- 83.3%


In [205]:
total = 60000
interval = 5000
total_step = int(total/interval)
progress = [i*interval for i in range(1,total_step+1)]
progress

prefix = '-'*(1+total_step-len(progress)) + '>'

print("{s} {r:0.1%}%".format(s=prefix,r=100*progress[9]/total))

-> 8333.3%%


### Testing for running time

In [282]:
# conclusion: use dict_available {'1':[]} and dict_remaining {'31':3}

# if dict_remaining value ==0 -> remove all item {'31':3}

#0.9814069271087646
dix = {str(i):['31','32','33'] for i in range(1,30)}
start = time.time()
for i in range(100000):
    for value in dix.values():
        try:
            value.remove('32')
        except:
            pass

end = (time.time())
duration = end-start
print(duration)

# 雖然 set比list 快，但是要執行這個項目的機會比較少，所以還是採用 list
#0.2403273582458496
dix = {str(i):['31','32','33'] for i in range(1,30)}
start = time.time()
for i in range(100000):
    for value in dix.values():
        if '32' in value:
            value.remove('32')

end = (time.time())
duration = end-start
print(duration)

# set + remove() is the fastest
# 0.18353843688964844
dix = {str(i):{'31','32','33'} for i in range(1,30)}
start = time.time()
for i in range(100000):
    for value in dix.values():
        if '32' in value:
            value.remove('32')

end = (time.time())
duration = end-start
print(duration)


# set + discard() is not faster than set + if + remove()
# 0.18353843688964844
dix = {str(i):{'31','32','33'} for i in range(1,30)}
start = time.time()
for i in range(100000):
    for value in dix.values():
        value.discard('32')

end = (time.time())
duration = end-start
print(duration)

1.1959936618804932
0.26471686363220215
0.2032618522644043
0.30378127098083496


In [300]:
import time
# conclusion: use dict_available {'1':[]} and dict_remaining {'31':3}

# construct dictionay with list of able: dic_available = {'1':{}} or {'1':[]} -> compare
# so using list is faster in this small sample

# choice(list) is faster than sample(set)
# range(100000):
# set 0.3071775436401367
# list 0.06884527206420898

# append/remove items: set is faster
# range(100000):
# set remove/discard 0.018950462341308594  (remove is slightly faster than discard)
# list remove 0.02590203285217285 
# list remove + try/except 0.02595973014831543

# +/- value: dictionary is the same as list
# dict 0.011967658996582031
# list 0.011968374252319336

#@timing
# 0.1207430362701416
start = (time.time())
dix = {str(i):{'31','32','33','34'} for i in range(1,30)}
# random.choice()
# random.choices()
# random.sample()
for i in range(1000000):
    if '31' in dix['1']:
        pass
    #dix['1'].add('34')
        dix['1'].remove('31')
    #dix['1'].discard('34')
    # random.sample(dix['1'],1)

end = (time.time())
duration = end-start
print(duration)


start = (time.time())
dix = {str(i):['31','32','33','34'] for i in range(1,30)}
# random.choice()
# random.choices()
# random.sample()
for i in range(1000000):
    if '31' in dix['1']:
        pass
    #dix['1'].add('34')
        dix['1'].remove('31')
    #dix['1'].discard('34')
    # random.sample(dix['1'],1)

end = (time.time())
duration = end-start
print(duration)


# 0.1587362289428711
start = (time.time())
dix = {str(i):{'31','32','33','34'} for i in range(1,30)}
for i in range(1000000):
    #dix['1'].add('34')
    #dix['1'].remove('34')
    dix['1'].discard('34')
    #random.choice(dix['1'])

end = (time.time())
duration = end-start
print(duration)



0.12640976905822754
0.14469599723815918
0.14461278915405273


### Generating random list 

In [1313]:
df

Unnamed: 0,Name,Code,Type,Holiday,Weekday,1,2,3,4,5,...,21,22,23,24,25,26,27,28,29,30
Weekday_ch,6,月,,,,一,二,三,四,五,...,日,一,二,三,四,五,六,日,一,二
Weekday_num,,,,,,1,2,3,4,5,...,7,1,2,3,4,5,6,7,1,2
is_holiday,,,,,,,,,,,...,v,,,,,,v,v,,
3,,簡,類,假,平,,,,,,...,,,,,,,,,,
4,李志謙,A,4,2,7,,,,,x,...,,x,,,,x,,,,
5,戴維安,B,4,2,7,,,,,x,...,,x,,,,,,,,
6,吳嘉紘,C,4,1,9,,,,,,...,,x,,,,x,,,,x
7,郭昱,D,4,3,9,,,x,1,x,...,,,,,,,,,x,1
8,李志謙3,31,3,2,7,,,,,,...,,,,,,,,,x,
9,戴維安3,32,3,2,7,,,,,,...,,,,,,,,,x,


In [979]:
IS_HOLIDAY = df.iloc[2,5:] 
#(not np.isnan(IS_HOLIDAY['2']))
len(IS_HOLIDAY)
i=3
IS_HOLIDAY

1     NaN
2     NaN
3     NaN
4     NaN
5     NaN
6       v
7       v
8     NaN
9     NaN
10    NaN
11    NaN
12    NaN
13      v
14      v
15    NaN
16    NaN
17    NaN
18    NaN
19    NaN
20      v
21      v
22    NaN
23    NaN
24    NaN
25    NaN
26    NaN
27      v
28      v
29    NaN
30    NaN
Name: is_holiday, dtype: object

In [7]:
start = (time.time())
dict_r = {}
list_of_r = df_work['Code'].tolist()
for key, value in enumerate(list_of_r):
    dict_r[key]=value
how_many = len(dict_r)-1


for i in range(100000):
    weekday_list_rand = [dict_r[random.randint(0,how_many)] for i in range(20)]
    holiday_list_rand = [dict_r[random.randint(0,how_many)] for i in range(10)]


end = (time.time())
duration = end-start
print(duration)
print(holiday_list_rand)
print(weekday_list_rand)

NameError: name 'df_work' is not defined

In [11]:
start = (time.time())
for i in range(100000):
    random.shuffle(weekday_list)
    weekday_list_rand = weekday_list.copy()
    random.shuffle(holiday_list)
    holiday_list_rand = holiday_list.copy()

end = (time.time())
duration = end-start
print(duration)

2.238849639892578


In [1120]:
start = (time.time())

# 10000 -> 120 second
# if -> 和 try 差不多 time
for i in range(5000):
    random.shuffle(weekday_list)
    weekday_list_rand = weekday_list.copy()
    random.shuffle(holiday_list)
    holiday_list_rand = holiday_list.copy()
    # shuffle is faster than dictionary and random value reference
    # holiday_list_rand = [dict_r[random.randint(0,how_many)] for i in range(10)]
    preliminary_list = []
    for i, day in enumerate(IS_HOLIDAY):
        # if is holiday, pop an item from holiday

        #weekday_list_rand.remove('31')
        # df_work的第 i 天中，誰要on duty(value==1), 回傳'Code'的值
        # if -> 和 try 差不多 time
        if len(df_work[df_work[str(i+1)]==1]['Code'])>0:
            # 如果存在
            code_of_reservation = df_work[df_work[str(i+1)]==1]['Code'].item()
            #print('added',code_of_reservation)
            preliminary_list.append(code_of_reservation)
        else:
            if day == 'v':
                preliminary_list.append(holiday_list_rand.pop())
            else:
                preliminary_list.append(weekday_list_rand.pop())

end = (time.time())
duration = end-start
print(duration)





59.85562300682068


### Construct list by random the validate

In [13]:
# 10 min -> mbp
# 1360.6688895225525 s -> PETMR
import random
# get type of list
duty_type = 3

df_work = df_updated[df['Type']==duty_type]
# 對照 IS_HOLIDAY[0] == IS_HOLIDAY['1']
IS_HOLIDAY = df.iloc[2,5:]  # index from '1' to 總日數
DAYS = len(IS_HOLIDAY) # 這個月有幾天

# create a list of all candidates
holiday_list = []
weekday_list = []

# 假日班有幾個，就加入幾個到假日班list裡面
for code in df_work['Code'].tolist():  # iterate through R in certain type
    num_of_holiday = df[df['Code']==code]['Holiday'].item()  # 該員的假日班有幾個
    for i in range(num_of_holiday):
        holiday_list.append(code)
# 平日班有幾個，就加入幾個到平日班list裡面
for code in df_work['Code'].tolist():
    num_of_weekday = df[df['Code']==code]['Weekday'].item() # 該員的平日班有幾個
    for i in range(num_of_weekday):
        weekday_list.append(code)     

flag = True

start = (time.time())


while flag:
    random.shuffle(weekday_list)
    weekday_list_rand = weekday_list.copy()
    random.shuffle(holiday_list)
    holiday_list_rand = holiday_list.copy()

    # list for out put
    preliminary_list = []
    for i, day in enumerate(IS_HOLIDAY):
        # if is holiday, pop an item from holiday

        #weekday_list_rand.remove('31')
        # df_work的第 i 天中，誰要on duty(value==1 的人存在，>0個，先前已經排除重複登記的情況), 回傳其'Code'的值
        if len(df_work[df_work[str(i+1)]==1]['Code'])>0:
            # 如果存在
            code_of_reservation = df_work[df_work[str(i+1)]==1]['Code'].item()
            #print('added',code_of_reservation)
            preliminary_list.append(code_of_reservation)
        else:
            if day == 'v':
                preliminary_list.append(holiday_list_rand.pop())
            else:
                preliminary_list.append(weekday_list_rand.pop())
    flag = check_validation_after_random(preliminary_list, df_work, DAYS)
            
print(check_validation_after_random(preliminary_list, df_work, DAYS))
# check qd

end = (time.time())
duration = end-start
print(duration)

preliminary_list

False
1360.6688895225525


['33',
 '34',
 '31',
 '33',
 '31',
 '34',
 '31',
 '33',
 '31',
 '34',
 '33',
 '32',
 '34',
 '31',
 '32',
 '34',
 '32',
 '34',
 '31',
 '33',
 '34',
 '33',
 '31',
 '32',
 '31',
 '34',
 '32',
 '34',
 '33',
 '32']

In [9]:
def check_validation_after_random(preliminary_list, df_work, DAYS):
    # 建立一個對照字典，儲存誰無法值班
    dict_x = {}
    list_of_r = df_work['Code'].tolist()
    for code in list_of_r:
        dict_x[code]=[]
        for dates in range(DAYS):
            if df_work[df_work['Code']==code][str(dates+1)].item() == 'x':
                temp = dict_x[code]
                temp.append(dates)
                dict_x[code] = temp

    # check QD:
    for i in range(DAYS-1):
        code = preliminary_list[i]
        if code == preliminary_list[i+1]:
            return True
        #if df_work[df_work['Code']==code][str(i+1)].item()=='x':
        if i in dict_x[code]:
            return True
    if df_work[df_work['Code']==code][str(DAYS)].item()=='x':
        return True
    return False


### 建立一個對照字典，儲存誰無法值班

In [1249]:
# 建立一個對照字典，儲存誰無法值班
dict_x = {}
list_of_r = df_work['Code'].tolist()
for code in list_of_r:
    dict_x[code]=[]
    for dates in range(DAYS):
        if df_work[df_work['Code']==code][str(dates+1)].item() == 'x':
            temp = dict_x[code]
            temp.append(dates)
            dict_x[code] = temp


In [1251]:
dict_x

{'31': [12, 28], '32': [12, 28], '33': [1, 12], '34': [4, 6, 28]}

In [1037]:
# df_work的第X天中，誰要on duty(value==1), 回傳'Code'的值
try:
    print(df_work[df_work['2']==1]['Code'].item())
except:
    pass

### 將 簡碼 (iloc[4] and below)以下 code 轉為 str

In [809]:
df3=df[:]

# 使用 loc 賦值不會出現  SettingWithCopyWarning: 
# A value is trying to be set on a copy of a slice from a DataFrame 
for i in range(4,len(df2.index)):
    df3.loc[i]['Code'] = str(df3.loc[i]['Code'])
    print(type(df3['Code'][i]))
    

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>


In [802]:
# 將 簡碼 (iloc[4] and below)以下 code 轉為 str
# 因為使用 iloc , 會出現 
# SettingWithCopyWarning： A value is trying to be set on a copy of a slice from a DataFrame 
df_str= df3['Code'].iloc[4:].map(lambda x : str(x))
df3['Code'].iloc[4:] = df_str


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [825]:
num_of_holiday = df[df['Code']==code]['Holiday'].item()
num_of_holiday

2

In [822]:
# df[df['Code']==31]['Holiday'].item()
df_work['Code'].item()

ValueError: can only convert an array of size 1 to a Python scalar

### calculation of STD

In [670]:
import random
import time
import numpy as np
start = (time.time())

ls = [random.randint(1,10) for i in range(1000000)]
location = [i for i in range(len(ls)) if ls[i]==1]
# location = [i for i, value in enumerate(ls) if value == 1] 差不多快

print(np.array(location).std())

end = (time.time())
duration = end-start
print(duration)


289331.9754719547
1.3396148681640625


### 將 預約值班前後 變成不值班，避免 QD

In [634]:
# date start from df4:,5:
df.iloc[4:,5:]
df.iloc[4:len(df.index)+4] #== 

df2=df[:]
# 4 to len(df.index) 會指到最後一項列
# 5 to len(df.columns)-1 會指到最後一欄
# 處理第一欄
for i in range(4,len(df2.index)):
    if df2.iloc[i,5] == 1:
        df2.iloc[i,6]='x'
# 處理中間欄
for i in range(4,len(df2.index)):
    for j in range(6,len(df2.columns)-1):
        if df2.iloc[i,j]==1:
            df2.iloc[i,(j+1)]='x'
            df2.iloc[i,(j-1)]='x'
# 處理最後一欄
for i in range(4,len(df2.index)):
    if df2.iloc[i,len(df2.columns)-1] == 1:
        df2.iloc[i,len(df2.columns)-2]='x'

df2

Unnamed: 0,Name,Code,Type,Holiday,Weekday,1,2,3,4,5,...,21,22,23,24,25,26,27,28,29,30
Weekday_ch,6,月,,,,一,二,三,四,五,...,日,一,二,三,四,五,六,日,一,二
Weekday_num,,,,,,1,2,3,4,5,...,7,1,2,3,4,5,6,7,1,2
is_holiday,,,,,,,,,,,...,v,,,,,,v,v,,
3,,簡,類,假,平,,,,,,...,,,,,,,,,,
4,李志謙,A,4,2,7,,,,,x,...,,x,,,,,,,,
5,戴維安,B,4,1,7,,,,,x,...,,x,,,,,,,,
6,吳嘉紘,C,4,1,9,,,,,x,...,,x,,,,x,,,,x
7,郭昱,D,4,3,9,,,x,1,x,...,,x,,,,,,,x,1
8,李志謙3,31,3,2,7,,,,,,...,,,,,,,,,,
9,戴維安3,32,3,1,7,,,,,,...,,,,,,,,,,


In [624]:
len(df.columns)
len(df.index)
df.iloc[:,len(df.columns)-1]

Weekday_ch       二
Weekday_num      2
is_holiday     NaN
3              NaN
4              NaN
5              NaN
6                x
7                1
8              NaN
9              NaN
10               1
11             NaN
12             NaN
13             NaN
14             NaN
15             NaN
Name: 30, dtype: object

### 檢查是否有某日有兩個以上的人預約要值班

In [500]:
TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR'}
days_in_month = df.loc['Weekday_num'].notnull().sum()
duty_type=4
# iterate from '1' to 'end'
for i in range(1,days_in_month+1):
    # any repeated reservation 
    # 符合的 duty type 中 5至end處的值，是1的有多少個
    num_of_reservation = (df[df['Type']==duty_type].iloc[:,5:][str(i)]==1).sum()
    if num_of_reservation>1:
        print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號有超過1人預約要值班')
        is_violation = True
        


CR 班4號有超過1人預約要值班
CR 班5號有超過1人預約要值班


### 檢查是否有某日無人可以值班

In [483]:
num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(20)].map(lambda item: str(item).lower())=='x').sum()

lower_text = lambda item: str(item).lower()
num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(20)].map(lower_text)=='x').sum()
#num_of_exclude
len(df[df['Type']==duty_type].index)

4

In [484]:
TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR'}
days_in_month = df.loc['Weekday_num'].notnull().sum()
duty_type=4
# iterate from '1' to 'end'
for i in range(1,days_in_month+1):
    # any repeated reservation 
    # 符合的 duty type 中 5至end處的值，有多少x or X
    # 使用 map 
    lower_text = lambda item: str(item).lower()
    num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(i)].map(lower_text)=='x').sum()
    
    # 該班 R 人數
    num_of_r = len(df[df['Type']==duty_type].index)
    if num_of_exclude >= num_of_r:
        print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號所有人均預約不值班')
        is_violation = True
        



CR 班10號所有人均預約不值班


In [469]:
num_of_reservation = (df[df['Type']==duty_type].iloc[:,5:][str(i)]==1)
num_of_reservation

4    False
5    False
6    False
7    False
Name: 31, dtype: bool

In [156]:
unselect_unnamed = [str(col) for col in df if 'Unnamed' not in str(col) ]
unselect_unnamed
#df2 = df[unselect_unnamed]

['Name',
 'Code',
 'Type',
 'Holiday',
 'Weekday',
 '1900-01-01 00:00:00',
 '1900-01-02 00:00:00',
 '1900-01-03 00:00:00',
 '1900-01-05 00:00:00',
 '1900-01-06 00:00:00',
 '1900-01-07 00:00:00',
 '1900-01-08 00:00:00',
 '1900-01-09 00:00:00',
 '1900-01-10 00:00:00',
 '1900-01-11 00:00:00',
 '1900-01-12 00:00:00',
 '1900-01-13 00:00:00',
 '1900-01-14 00:00:00',
 '1900-01-15 00:00:00',
 '1900-01-16 00:00:00',
 '1900-01-17 00:00:00',
 '1900-01-18 00:00:00',
 '1900-01-19 00:00:00',
 '1900-01-20 00:00:00',
 '1900-01-21 00:00:00',
 '1900-01-22 00:00:00',
 '1900-01-23 00:00:00',
 '1900-01-24 00:00:00',
 '1900-01-25 00:00:00',
 '1900-01-26 00:00:00',
 '1900-01-27 00:00:00',
 '1900-01-28 00:00:00',
 '1900-01-29 00:00:00',
 '1900-01-30 00:00:00']