In [327]:
import pandas as pd
import random
import numpy as np

### Data clensing

In [497]:
# load file
df = pd.read_excel('test.xlsm')
#df=pd.DataFrame(df)
#df[df['Unnamed: 2']==4]  # select Unnamed: 2 value ==4 
# rename columns and index
df = df.rename(columns={df.columns[0]:'Name',
                        df.columns[1]:'Code',
                        df.columns[2]:'Type',
                        df.columns[3]:'Holiday',
                        df.columns[4]:'Weekday'})
df = df.rename(index={0:'Weekday_ch',1:'Weekday_num',2:'is_holiday'})
# unselect unnamned coluns
unselect_unnamed = [col for col in df if 'Unnamed' not in str(col)]  # 注意 有些還是 date type, 所以用 str
df = df[unselect_unnamed]  

# determine type_to_generate
type_to_generate = [int(i) for i in str(df['Name'].loc['is_holiday']) if i in '134']
df['Name'].loc['is_holiday'] = np.nan  # set value as NaN, not to interfere with count of hollidays

# rename date index from 1 to date
for i in range(5,len(df.columns)):
    df = df.rename(columns={df.columns[i]:str(i-4)}) 
    

## ==== now the data has been clensed === ##

df

Unnamed: 0,Name,Code,Type,Holiday,Weekday,1,2,3,4,5,...,21,22,23,24,25,26,27,28,29,30
Weekday_ch,6,月,,,,一,二,三,四,五,...,日,一,二,三,四,五,六,日,一,二
Weekday_num,,,,,,1,2,3,4,5,...,7,1,2,3,4,5,6,7,1,2
is_holiday,,,,,,,,,,,...,v,,,,,,v,v,,
3,,簡,類,假,平,,,,,,...,,,,,,,,,,
4,李志謙,A,4,2,7,,,,,1,...,,x,,,,,,,,
5,戴維安,B,4,1,7,,,,1,1,...,,x,,,,,,,,
6,吳嘉紘,C,4,1,9,,,,1,1,...,,x,,,,x,,,,x
7,郭昱,D,4,3,9,,,,1,1,...,,x,,,,,,,,
8,李志謙3,31,3,2,7,,,,,,...,,,,,,,,,,
9,戴維安3,32,3,1,7,,,,,,...,,,,,,,,,,


In [501]:
def is_violation(df, duty_type_array):
    """
    input = dataframe after clensing
    output = if there's no day violation
    print where is the violation
    True -> have violation
    False -> no violation
    """
    # 檢查欲執行項目是否為空白
    if duty_type_array == []:
        raise AssertionError('請輸入要執行的班別')
    
        
    TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR'}
    is_violation = False
    
    # 檢查每一個要 run 的值班類別班數和是否正確
    for duty_type in duty_type_array:
        # test for holiday
        # Weekday_num 非零的欄位代表當月日數
        days_in_month = df.loc['Weekday_num'].notnull().sum()
        num_of_holiday = df.loc['is_holiday'].notnull().sum()
        num_of_weekday = days_in_month - num_of_holiday

        # if CT/MR, type1 -> doubles the days of dutese
        num_of_holiday = num_of_holiday *2 if duty_type ==1 else num_of_holiday
        num_of_weekday = num_of_weekday *2 if duty_type ==1 else num_of_weekday

        # 假日及平日值班數
        num_of_duties_h = df[df['Type']==duty_type]['Holiday'].sum()
        num_of_duties_w = df[df['Type']==duty_type]['Weekday'].sum()


        # 檢查值班總數是否不足， report error message and violation
        if num_of_duties_h < num_of_holiday:
            print(f'{TYPES_OF_DUTY[duty_type]} 班假日值班總數不足，缺少{num_of_holiday-num_of_duties_h}班')
            is_violation = True
        if num_of_duties_w < num_of_weekday:
            print(f'{TYPES_OF_DUTY[duty_type]} 班平日值班總數不足，缺少{num_of_weekday-num_of_duties_w}班')
            is_violation = True
            
        # 檢查是否有某日所有人都無法值班
        # iterate from '1' to 'end'
        for i in range(1,days_in_month+1):
            # any repeated reservation 
            # 符合的 duty type 中 5至end處的值，有多少x or X
            # 使用 map 
            lower_text = lambda item: str(item).lower()
            num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(i)].map(lower_text)=='x').sum()

            # 該班 R 人數
            num_of_r = len(df[df['Type']==duty_type].index)
            if num_of_exclude >= num_of_r:
                print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號所有人均預約不值班')
                is_violation = True       
        
        # 檢查是否有某日有兩個以上的人預約要值班
        # iterate from '1' to 'end'
        for i in range(1,days_in_month+1):
            # any repeated reservation 
            # 符合的 duty type 中 5至end處的值，是1的有多少個
            num_of_reservation = (df[df['Type']==duty_type].iloc[:,5:][str(i)]==1).sum()
            if num_of_reservation>1:
                print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號有超過1人預約要值班')
                is_violation = True
        
    return is_violation

    

        

In [502]:

is_violation(df,type_to_generate)

CT/MR 班假日值班總數不足，缺少9班
CT/MR 班平日值班總數不足，缺少12班
CT/MR 班23號有超過1人預約要值班
ER 班假日值班總數不足，缺少1班
ER 班13號所有人均預約不值班
ER 班6號有超過1人預約要值班
CR 班假日值班總數不足，缺少1班
CR 班10號所有人均預約不值班
CR 班22號所有人均預約不值班
CR 班4號有超過1人預約要值班
CR 班5號有超過1人預約要值班


True

In [448]:
days_in_month = df.loc['Weekday_num'].notnull().sum()
days_in_month

31

In [455]:
(df[df['Type']==duty_type].iloc[:,5:]['8']==1).sum()

1

### 檢查是否有某日有兩個以上的人預約要值班

In [500]:
TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR'}
days_in_month = df.loc['Weekday_num'].notnull().sum()
duty_type=4
# iterate from '1' to 'end'
for i in range(1,days_in_month+1):
    # any repeated reservation 
    # 符合的 duty type 中 5至end處的值，是1的有多少個
    num_of_reservation = (df[df['Type']==duty_type].iloc[:,5:][str(i)]==1).sum()
    if num_of_reservation>1:
        print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號有超過1人預約要值班')
        is_violation = True
        


CR 班4號有超過1人預約要值班
CR 班5號有超過1人預約要值班


### 檢查是否有某日無人可以值班

In [483]:
num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(20)].map(lambda item: str(item).lower())=='x').sum()

lower_text = lambda item: str(item).lower()
num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(20)].map(lower_text)=='x').sum()
#num_of_exclude
len(df[df['Type']==duty_type].index)

4

In [484]:
TYPES_OF_DUTY = {1:'CT/MR', 3:'ER', 4:'CR'}
days_in_month = df.loc['Weekday_num'].notnull().sum()
duty_type=4
# iterate from '1' to 'end'
for i in range(1,days_in_month+1):
    # any repeated reservation 
    # 符合的 duty type 中 5至end處的值，有多少x or X
    # 使用 map 
    lower_text = lambda item: str(item).lower()
    num_of_exclude = (df[df['Type']==duty_type].iloc[:,5:][str(i)].map(lower_text)=='x').sum()
    
    # 該班 R 人數
    num_of_r = len(df[df['Type']==duty_type].index)
    if num_of_exclude >= num_of_r:
        print(f'{TYPES_OF_DUTY[duty_type]} 班{i}號所有人均預約不值班')
        is_violation = True
        



CR 班10號所有人均預約不值班


In [469]:
num_of_reservation = (df[df['Type']==duty_type].iloc[:,5:][str(i)]==1)
num_of_reservation

4    False
5    False
6    False
7    False
Name: 31, dtype: bool

In [156]:
unselect_unnamed = [str(col) for col in df if 'Unnamed' not in str(col) ]
unselect_unnamed
#df2 = df[unselect_unnamed]

['Name',
 'Code',
 'Type',
 'Holiday',
 'Weekday',
 '1900-01-01 00:00:00',
 '1900-01-02 00:00:00',
 '1900-01-03 00:00:00',
 '1900-01-05 00:00:00',
 '1900-01-06 00:00:00',
 '1900-01-07 00:00:00',
 '1900-01-08 00:00:00',
 '1900-01-09 00:00:00',
 '1900-01-10 00:00:00',
 '1900-01-11 00:00:00',
 '1900-01-12 00:00:00',
 '1900-01-13 00:00:00',
 '1900-01-14 00:00:00',
 '1900-01-15 00:00:00',
 '1900-01-16 00:00:00',
 '1900-01-17 00:00:00',
 '1900-01-18 00:00:00',
 '1900-01-19 00:00:00',
 '1900-01-20 00:00:00',
 '1900-01-21 00:00:00',
 '1900-01-22 00:00:00',
 '1900-01-23 00:00:00',
 '1900-01-24 00:00:00',
 '1900-01-25 00:00:00',
 '1900-01-26 00:00:00',
 '1900-01-27 00:00:00',
 '1900-01-28 00:00:00',
 '1900-01-29 00:00:00',
 '1900-01-30 00:00:00']