In [503]:
import pandas as pd
import random
import numpy as np

### Data clensing

In [1310]:
# load file
df = pd.read_excel('test.xlsm')
#df=pd.DataFrame(df)
#df[df['Unnamed: 2']==4]  # select Unnamed: 2 value ==4 
# rename columns and index
df = df.rename(columns={df.columns[0]:'Name',
                        df.columns[1]:'Code',
                        df.columns[2]:'Type',
                        df.columns[3]:'Holiday',
                        df.columns[4]:'Weekday'})
df = df.rename(index={0:'Weekday_ch',1:'Weekday_num',2:'is_holiday'})
# unselect unnamned coluns
unselect_unnamed = [col for col in df if 'Unnamed' not in str(col)]  # 注意 有些還是 date type, 所以用 str
df = df[unselect_unnamed]  

# 有哪些班要運算，determine type_to_generate
type_to_generate = [int(i) for i in str(df['Name'].loc['is_holiday']) if i in '134']
df['Name'].loc['is_holiday'] = np.nan  # set value as NaN, not to interfere with count of hollidays


# 將住院醫師簡碼 (iloc[4] and below)以下 code 轉為 str
# 使用 loc 賦值不會出現  SettingWithCopyWarning: 
# A value is trying to be set on a copy of a slice from a DataFrame 
for i in range(4,len(df.index)):
    df.loc[i]['Code'] = str(df.loc[i]['Code'])


# rename date index from 1 to date
for i in range(5,len(df.columns)):
    df = df.rename(columns={df.columns[i]:str(i-4)}) 
    

## ==== now the data has been clensed === ##

df

Unnamed: 0,Name,Code,Type,Holiday,Weekday,1,2,3,4,5,...,21,22,23,24,25,26,27,28,29,30
Weekday_ch,6,月,,,,一,二,三,四,五,...,日,一,二,三,四,五,六,日,一,二
Weekday_num,,,,,,1,2,3,4,5,...,7,1,2,3,4,5,6,7,1,2
is_holiday,,,,,,,,,,,...,v,,,,,,v,v,,
3,,簡,類,假,平,,,,,,...,,,,,,,,,,
4,李志謙,A,4,2,7,,,,,x,...,,x,,,,x,,,,
5,戴維安,B,4,2,7,,,,,x,...,,x,,,,,,,,
6,吳嘉紘,C,4,1,9,,,,,,...,,x,,,,x,,,,x
7,郭昱,D,4,3,9,,,,1,,...,,,,,,,,,,1
8,李志謙3,31,3,2,7,,,,,,...,,,,,,,,,x,
9,戴維安3,32,3,2,7,,,,,,...,,,,,,,,,x,


In [1311]:
def is_violation(df, duty_type_array):
    """
    input = dataframe after clensing
    output = if there's no day violation
    print where is the violation
    True -> have violation
    False -> no violation
    """
    # 檢查欲執行項目是否為空白
    if duty_type_array == []:
        raise AssertionError('請輸入要執行的班別')
    
        
    TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR'}
    is_violation = False
    
    # 檢查每一個要 run 的值班類別班數和是否正確
    for duty_type in duty_type_array:
        # test for holiday
        # Weekday_num 非零的欄位代表當月日數
        days_in_month = df.loc['Weekday_num'].notnull().sum()
        num_of_holiday = df.loc['is_holiday'].notnull().sum()
        num_of_weekday = days_in_month - num_of_holiday

        # if CT/MR, type1 -> doubles the days of dutese
        num_of_holiday = num_of_holiday *2 if duty_type ==1 else num_of_holiday
        num_of_weekday = num_of_weekday *2 if duty_type ==1 else num_of_weekday

        # 假日及平日值班數
        num_of_duties_h = df[df['Type']==duty_type]['Holiday'].sum()
        num_of_duties_w = df[df['Type']==duty_type]['Weekday'].sum()


        # 檢查值班總數是否不足， report error message and violation
        if num_of_duties_h < num_of_holiday:
            print(f'{TYPES_OF_DUTY[duty_type]} 班假日值班總數不足，缺少{num_of_holiday-num_of_duties_h}班')
            is_violation = True
        if num_of_duties_w < num_of_weekday:
            print(f'{TYPES_OF_DUTY[duty_type]} 班平日值班總數不足，缺少{num_of_weekday-num_of_duties_w}班')
            is_violation = True
            
        # 預約值班前後兩天無法值班，避免 qd，並 update 新表，以利接下來亂數產生
        # 4 to len(df.index) 會指到最後一項列
        # 5 to len(df.columns)-1 會指到最後一欄
        # 處理第一欄
        for i in range(4,len(df.index)):
            if df.iloc[i,5] == 1:
                df.iloc[i,6]='x'
        # 處理中間欄
        for i in range(4,len(df.index)):
            for j in range(6,len(df.columns)-1):
                if df.iloc[i,j]==1:
                    df.iloc[i,(j+1)]='x'
                    df.iloc[i,(j-1)]='x'
        # 處理最後一欄
        for i in range(4,len(df.index)):
            if df.iloc[i,len(df.columns)-1] == 1:
                df.iloc[i,len(df.columns)-2]='x'
        
            
        # 檢查是否有某日所有人都無法值班
        # iterate from '1' to 'end'
        for i in range(1,days_in_month+1):
            # any repeated reservation 
            # 符合的 duty type 中 5至end處的值，有多少x or X
            # 使用 map 
            lower_text = lambda item: str(item).lower()
            num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(i)].map(lower_text)=='x').sum()

            # 該班 R 人數
            num_of_r = len(df[df['Type']==duty_type].index)
            if num_of_exclude >= num_of_r:
                print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號所有人均無法值班')
                is_violation = True       
        
        # 檢查是否有某日有兩個以上的人預約要值班
        # iterate from '1' to 'end'
        for i in range(1,days_in_month+1):
            # any repeated reservation 
            # 符合的 duty type 中 5至end處的值，是1的有多少個
            num_of_reservation = (df[df['Type']==duty_type].iloc[:,5:][str(i)]==1).sum()
            if num_of_reservation>1:
                print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號有超過1人預約要值班')
                is_violation = True
        
    return is_violation, df

    

        

In [1312]:

violation, df_updated = is_violation(df,type_to_generate)

CT/MR 班假日值班總數不足，缺少9班
CT/MR 班平日值班總數不足，缺少12班
CT/MR 班23號有超過1人預約要值班


#

### Generating random list 

In [1313]:
df

Unnamed: 0,Name,Code,Type,Holiday,Weekday,1,2,3,4,5,...,21,22,23,24,25,26,27,28,29,30
Weekday_ch,6,月,,,,一,二,三,四,五,...,日,一,二,三,四,五,六,日,一,二
Weekday_num,,,,,,1,2,3,4,5,...,7,1,2,3,4,5,6,7,1,2
is_holiday,,,,,,,,,,,...,v,,,,,,v,v,,
3,,簡,類,假,平,,,,,,...,,,,,,,,,,
4,李志謙,A,4,2,7,,,,,x,...,,x,,,,x,,,,
5,戴維安,B,4,2,7,,,,,x,...,,x,,,,,,,,
6,吳嘉紘,C,4,1,9,,,,,,...,,x,,,,x,,,,x
7,郭昱,D,4,3,9,,,x,1,x,...,,,,,,,,,x,1
8,李志謙3,31,3,2,7,,,,,,...,,,,,,,,,x,
9,戴維安3,32,3,2,7,,,,,,...,,,,,,,,,x,


In [979]:
IS_HOLIDAY = df.iloc[2,5:] 
#(not np.isnan(IS_HOLIDAY['2']))
len(IS_HOLIDAY)
i=3
IS_HOLIDAY

1     NaN
2     NaN
3     NaN
4     NaN
5     NaN
6       v
7       v
8     NaN
9     NaN
10    NaN
11    NaN
12    NaN
13      v
14      v
15    NaN
16    NaN
17    NaN
18    NaN
19    NaN
20      v
21      v
22    NaN
23    NaN
24    NaN
25    NaN
26    NaN
27      v
28      v
29    NaN
30    NaN
Name: is_holiday, dtype: object

In [1259]:
dict_r = {}
list_of_r = df_work['Code'].tolist()
for key, value in enumerate(list_of_r):
    dict_r[key]=value
dict_r

{0: '31', 1: '32', 2: '33', 3: '34'}

In [1260]:
len(dict_r)

4

In [1265]:
start = (time.time())
dict_r = {}
list_of_r = df_work['Code'].tolist()
for key, value in enumerate(list_of_r):
    dict_r[key]=value
how_many = len(dict_r)-1


for i in range(100000):
    weekday_list_rand = [dict_r[random.randint(0,how_many)] for i in range(20)]
    holiday_list_rand = [dict_r[random.randint(0,how_many)] for i in range(10)]


end = (time.time())
duration = end-start
print(duration)
print(holiday_list_rand)
print(weekday_list_rand)

3.7287228107452393
['32', '32', '32', '34', '33', '33', '32', '31', '33', '31']
['32', '33', '33', '32', '33', '32', '34', '33', '34', '31', '34', '32', '31', '34', '33', '31', '33', '32', '31', '32']


In [1266]:
start = (time.time())
for i in range(100000):
    random.shuffle(weekday_list)
    weekday_list_rand = weekday_list.copy()
    random.shuffle(holiday_list)
    holiday_list_rand = holiday_list.copy()

end = (time.time())
duration = end-start
print(duration)

2.7245168685913086


In [1120]:
start = (time.time())

# 10000 -> 120 second
# if -> 和 try 差不多 time
for i in range(5000):
    random.shuffle(weekday_list)
    weekday_list_rand = weekday_list.copy()
    random.shuffle(holiday_list)
    holiday_list_rand = holiday_list.copy()
    # shuffle is faster than dictionary and random value reference
    # holiday_list_rand = [dict_r[random.randint(0,how_many)] for i in range(10)]
    final_list = []
    for i, day in enumerate(IS_HOLIDAY):
        # if is holiday, pop an item from holiday

        #weekday_list_rand.remove('31')
        # df_work的第 i 天中，誰要on duty(value==1), 回傳'Code'的值
        # if -> 和 try 差不多 time
        if len(df_work[df_work[str(i+1)]==1]['Code'])>0:
            # 如果存在
            code_of_reservation = df_work[df_work[str(i+1)]==1]['Code'].item()
            #print('added',code_of_reservation)
            final_list.append(code_of_reservation)
        else:
            if day == 'v':
                final_list.append(holiday_list_rand.pop())
            else:
                final_list.append(weekday_list_rand.pop())

end = (time.time())
duration = end-start
print(duration)





59.85562300682068


### 使用 shift() eq
# https://hant-kb.kutu66.com/others/post_12833339

In [1270]:
# 10 min
import random
# get type of list
duty_type = 3

df_work = df_updated[df['Type']==duty_type]
# 對照 IS_HOLIDAY[0] == IS_HOLIDAY['1']
IS_HOLIDAY = df.iloc[2,5:]  # index from '1' to 總日數
DAYS = len(IS_HOLIDAY) # 這個月有幾天

# create a list of all candidates
holiday_list = []
weekday_list = []

# 假日班有幾個，就加入幾個到假日班list裡面
for code in df_work['Code'].tolist():  # iterate through R in certain type
    num_of_holiday = df[df['Code']==code]['Holiday'].item()  # 該員的假日班有幾個
    for i in range(num_of_holiday):
        holiday_list.append(code)
# 平日班有幾個，就加入幾個到平日班list裡面
for code in df_work['Code'].tolist():
    num_of_weekday = df[df['Code']==code]['Weekday'].item() # 該員的平日班有幾個
    for i in range(num_of_weekday):
        weekday_list.append(code)     

flag = True

start = (time.time())


while flag:
    random.shuffle(weekday_list)
    weekday_list_rand = weekday_list.copy()
    random.shuffle(holiday_list)
    holiday_list_rand = holiday_list.copy()

    # list for out put
    final_list = []
    for i, day in enumerate(IS_HOLIDAY):
        # if is holiday, pop an item from holiday

        #weekday_list_rand.remove('31')
        # df_work的第 i 天中，誰要on duty(value==1 的人存在，>0個，先前已經排除重複登記的情況), 回傳其'Code'的值
        if len(df_work[df_work[str(i+1)]==1]['Code'])>0:
            # 如果存在
            code_of_reservation = df_work[df_work[str(i+1)]==1]['Code'].item()
            #print('added',code_of_reservation)
            final_list.append(code_of_reservation)
        else:
            if day == 'v':
                final_list.append(holiday_list_rand.pop())
            else:
                final_list.append(weekday_list_rand.pop())
    flag = check_validation_after_random(final_list, df_work, DAYS)
            
print(check_validation_after_random(final_list, df_work, DAYS))
# check qd

end = (time.time())
duration = end-start
print(duration)

final_list

KeyboardInterrupt: 

In [1271]:
final_list

['33',
 '33',
 '34',
 '33',
 '32',
 '34',
 '31',
 '32',
 '32',
 '33',
 '33',
 '33',
 '32',
 '34']

In [1252]:
def check_validation_after_random(final_list, df_work, DAYS):
    # 建立一個對照字典，儲存誰無法值班
    dict_x = {}
    list_of_r = df_work['Code'].tolist()
    for code in list_of_r:
        dict_x[code]=[]
        for dates in range(DAYS):
            if df_work[df_work['Code']==code][str(dates+1)].item() == 'x':
                temp = dict_x[code]
                temp.append(dates)
                dict_x[code] = temp

    # check QD:
    for i in range(DAYS-1):
        code = final_list[i]
        if code == final_list[i+1]:
            return True
        #if df_work[df_work['Code']==code][str(i+1)].item()=='x':
        if i in dict_x[code]:
            return True
    if df_work[df_work['Code']==code][str(DAYS)].item()=='x':
        return True
    return False


In [1218]:
df_work[df_work['Code']=='31']['1']

8    NaN
Name: 1, dtype: object

In [1249]:
# 建立一個對照字典，儲存誰無法值班
dict_x = {}
list_of_r = df_work['Code'].tolist()
for code in list_of_r:
    dict_x[code]=[]
    for dates in range(DAYS):
        if df_work[df_work['Code']==code][str(dates+1)].item() == 'x':
            temp = dict_x[code]
            temp.append(dates)
            dict_x[code] = temp


In [1251]:
dict_x

{'31': [12, 28], '32': [12, 28], '33': [1, 12], '34': [4, 6, 28]}

In [1037]:
# df_work的第X天中，誰要on duty(value==1), 回傳'Code'的值
try:
    print(df_work[df_work['2']==1]['Code'].item())
except:
    pass

### 將 簡碼 (iloc[4] and below)以下 code 轉為 str

In [809]:
df3=df[:]

# 使用 loc 賦值不會出現  SettingWithCopyWarning: 
# A value is trying to be set on a copy of a slice from a DataFrame 
for i in range(4,len(df2.index)):
    df3.loc[i]['Code'] = str(df3.loc[i]['Code'])
    print(type(df3['Code'][i]))
    

<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>
<class 'str'>


In [802]:
# 將 簡碼 (iloc[4] and below)以下 code 轉為 str
# 因為使用 iloc , 會出現 
# SettingWithCopyWarning： A value is trying to be set on a copy of a slice from a DataFrame 
df_str= df3['Code'].iloc[4:].map(lambda x : str(x))
df3['Code'].iloc[4:] = df_str


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [825]:
num_of_holiday = df[df['Code']==code]['Holiday'].item()
num_of_holiday

2

In [822]:
# df[df['Code']==31]['Holiday'].item()
df_work['Code'].item()

ValueError: can only convert an array of size 1 to a Python scalar

### calculation of STD

In [670]:
import random
import time
import numpy as np
start = (time.time())

ls = [random.randint(1,10) for i in range(1000000)]
location = [i for i in range(len(ls)) if ls[i]==1]
# location = [i for i, value in enumerate(ls) if value == 1] 差不多快

print(np.array(location).std())

end = (time.time())
duration = end-start
print(duration)


289331.9754719547
1.3396148681640625


### 將 預約值班前後 變成不值班，避免 QD

In [634]:
# date start from df4:,5:
df.iloc[4:,5:]
df.iloc[4:len(df.index)+4] #== 

df2=df[:]
# 4 to len(df.index) 會指到最後一項列
# 5 to len(df.columns)-1 會指到最後一欄
# 處理第一欄
for i in range(4,len(df2.index)):
    if df2.iloc[i,5] == 1:
        df2.iloc[i,6]='x'
# 處理中間欄
for i in range(4,len(df2.index)):
    for j in range(6,len(df2.columns)-1):
        if df2.iloc[i,j]==1:
            df2.iloc[i,(j+1)]='x'
            df2.iloc[i,(j-1)]='x'
# 處理最後一欄
for i in range(4,len(df2.index)):
    if df2.iloc[i,len(df2.columns)-1] == 1:
        df2.iloc[i,len(df2.columns)-2]='x'

df2

Unnamed: 0,Name,Code,Type,Holiday,Weekday,1,2,3,4,5,...,21,22,23,24,25,26,27,28,29,30
Weekday_ch,6,月,,,,一,二,三,四,五,...,日,一,二,三,四,五,六,日,一,二
Weekday_num,,,,,,1,2,3,4,5,...,7,1,2,3,4,5,6,7,1,2
is_holiday,,,,,,,,,,,...,v,,,,,,v,v,,
3,,簡,類,假,平,,,,,,...,,,,,,,,,,
4,李志謙,A,4,2,7,,,,,x,...,,x,,,,,,,,
5,戴維安,B,4,1,7,,,,,x,...,,x,,,,,,,,
6,吳嘉紘,C,4,1,9,,,,,x,...,,x,,,,x,,,,x
7,郭昱,D,4,3,9,,,x,1,x,...,,x,,,,,,,x,1
8,李志謙3,31,3,2,7,,,,,,...,,,,,,,,,,
9,戴維安3,32,3,1,7,,,,,,...,,,,,,,,,,


In [624]:
len(df.columns)
len(df.index)
df.iloc[:,len(df.columns)-1]

Weekday_ch       二
Weekday_num      2
is_holiday     NaN
3              NaN
4              NaN
5              NaN
6                x
7                1
8              NaN
9              NaN
10               1
11             NaN
12             NaN
13             NaN
14             NaN
15             NaN
Name: 30, dtype: object

### 檢查是否有某日有兩個以上的人預約要值班

In [500]:
TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR'}
days_in_month = df.loc['Weekday_num'].notnull().sum()
duty_type=4
# iterate from '1' to 'end'
for i in range(1,days_in_month+1):
    # any repeated reservation 
    # 符合的 duty type 中 5至end處的值，是1的有多少個
    num_of_reservation = (df[df['Type']==duty_type].iloc[:,5:][str(i)]==1).sum()
    if num_of_reservation>1:
        print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號有超過1人預約要值班')
        is_violation = True
        


CR 班4號有超過1人預約要值班
CR 班5號有超過1人預約要值班


### 檢查是否有某日無人可以值班

In [483]:
num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(20)].map(lambda item: str(item).lower())=='x').sum()

lower_text = lambda item: str(item).lower()
num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(20)].map(lower_text)=='x').sum()
#num_of_exclude
len(df[df['Type']==duty_type].index)

4

In [484]:
TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR'}
days_in_month = df.loc['Weekday_num'].notnull().sum()
duty_type=4
# iterate from '1' to 'end'
for i in range(1,days_in_month+1):
    # any repeated reservation 
    # 符合的 duty type 中 5至end處的值，有多少x or X
    # 使用 map 
    lower_text = lambda item: str(item).lower()
    num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(i)].map(lower_text)=='x').sum()
    
    # 該班 R 人數
    num_of_r = len(df[df['Type']==duty_type].index)
    if num_of_exclude >= num_of_r:
        print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號所有人均預約不值班')
        is_violation = True
        



CR 班10號所有人均預約不值班


In [469]:
num_of_reservation = (df[df['Type']==duty_type].iloc[:,5:][str(i)]==1)
num_of_reservation

4    False
5    False
6    False
7    False
Name: 31, dtype: bool

In [156]:
unselect_unnamed = [str(col) for col in df if 'Unnamed' not in str(col) ]
unselect_unnamed
#df2 = df[unselect_unnamed]

['Name',
 'Code',
 'Type',
 'Holiday',
 'Weekday',
 '1900-01-01 00:00:00',
 '1900-01-02 00:00:00',
 '1900-01-03 00:00:00',
 '1900-01-05 00:00:00',
 '1900-01-06 00:00:00',
 '1900-01-07 00:00:00',
 '1900-01-08 00:00:00',
 '1900-01-09 00:00:00',
 '1900-01-10 00:00:00',
 '1900-01-11 00:00:00',
 '1900-01-12 00:00:00',
 '1900-01-13 00:00:00',
 '1900-01-14 00:00:00',
 '1900-01-15 00:00:00',
 '1900-01-16 00:00:00',
 '1900-01-17 00:00:00',
 '1900-01-18 00:00:00',
 '1900-01-19 00:00:00',
 '1900-01-20 00:00:00',
 '1900-01-21 00:00:00',
 '1900-01-22 00:00:00',
 '1900-01-23 00:00:00',
 '1900-01-24 00:00:00',
 '1900-01-25 00:00:00',
 '1900-01-26 00:00:00',
 '1900-01-27 00:00:00',
 '1900-01-28 00:00:00',
 '1900-01-29 00:00:00',
 '1900-01-30 00:00:00']