In [49]:
import pandas as pd
import re
import openpyxl 
import numpy as np
from timeit import default_timer as timer

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

#predefine parameters
filterTransDesc = "HANDLING CHRG"

#name col to remove rows with missing na or to match bs
payoutNameCol = 'Member Name'
merchantNameCol = 'Merchant'
merchantBankNameCol = 'Bank Acc Name'
billPaymentNameCol = "Name"

#col of payout to match amount (payout, merchant and bill payment must have)
poAmountCol = "Amount"
#col of final output as name
poNameCol = "Member Name"
bsName = "BS Name"

#cols to match in bank statement
bsColMatchList = ['Reference 1', 'Reference 2', 'Reference 3', 'Reference 4']
bsAmountCol = "Debit Amount"

#index col to track rows
euIndex = "EU_Index"
copIndex = "COP_Index"
manageCOPIndex = "ManageCOP_Index"
merchantIndex = "Merchant_Index"
billPaymentIndex = "BP_Index"
bsIndex = "BS_Index"

#ruleDict, which file type to use which set of rules
ruleDict = {"PayOut":[1, 2, 3, 4, 6],
           "Merchant":[1, 2, 3, 4],
           "BillPayment":[5]}

In [26]:
#check if required worksheet names are there and return them if so 
def checkWorksheetName(file):
    print("Checking worksheet names \n")
    
    wb = openpyxl.load_workbook(file)
    wsList = wb.sheetnames
    try:
        EU = [i for i in wsList if re.search('EU[^-]', i)][0]
        COP = [i for i in wsList if re.search('COP[^-]', i)][0]
        ManageCOP = [i for i in wsList if re.search('MANAGE COP[^-]', i)][0]
        Merchant = [i for i in wsList if re.search('MERCHANT[^-]', i)][0]
        BillPayment = [i for i in wsList if re.search('BILL PAYMENT[^-]', i)][0]
        
        print("All worksheet names are found")
        return EU, COP, ManageCOP, Merchant, BillPayment
    except Exception as e:
        print("Failed to find worksheet names for all \n")
        print(e)

payoutExcelFile = "Quinton_Payout/PAYOUT - 06.08.2020.xlsx"
# wb = openpyxl.load_workbook(payoutExcelFile)

EU_WsName, COP_WsName, ManageCOP_WsName, Merchant_WsName, BillPayment_WsName = checkWorksheetName(payoutExcelFile)

Checking worksheet names 

All worksheet names are found


In [6]:
#reconciled payout
excelFile = "Quinton_Reconciliation Result/Reconciliation_20200806.xlsx"
wsName = "Reconciled"
dfReconciled = pd.read_excel(excelFile, sheet_name = wsName)

In [27]:
#read file

#read payout files - EU, COP and ManageCOP
dfEU = pd.read_excel(payoutExcelFile, sheet_name = EU_WsName)
#remove empty member name
dfEU = dfEU[pd.notnull(dfEU[payoutNameCol])]
dfEU['DataType'] = "EU" 

dfCOP = pd.read_excel(payoutExcelFile, sheet_name = COP_WsName)
#remove empty member name
dfCOP = dfCOP[pd.notnull(dfCOP[payoutNameCol])]
dfCOP['DataType'] = "COP" 

dfManageCOP = pd.read_excel(payoutExcelFile, sheet_name = ManageCOP_WsName)
#remove empty member name
dfManageCOP = dfManageCOP[pd.notnull(dfManageCOP[payoutNameCol])]
dfManageCOP['DataType'] = "ManageCOP" 

#merchant file
dfMerchant = pd.read_excel(payoutExcelFile, sheet_name = Merchant_WsName)
#remove empty merchant col
dfMerchant = dfMerchant[pd.notnull(dfMerchant[merchantNameCol])]
dfMerchant['DataType'] = "Merchant" 

dfBillPayment = pd.read_excel(payoutExcelFile, sheet_name = BillPayment_WsName)
#remove empty name
dfBillPayment = dfBillPayment[pd.notnull(dfBillPayment[billPaymentNameCol])]
dfBillPayment['DataType'] = "BillPayment" 

In [28]:
#create index
dfEU[euIndex] = np.arange(dfEU.shape[0])
dfCOP[copIndex] = np.arange(dfCOP.shape[0])
dfManageCOP[manageCOPIndex] = np.arange(dfManageCOP.shape[0])
dfMerchant[merchantIndex] = np.arange(dfMerchant.shape[0])
dfBillPayment[billPaymentIndex] = np.arange(dfBillPayment.shape[0])

In [11]:
bsExcelFile = "Quinton_Bank Statement/08.06 3207971832Statement.csv"
# dfBankStatement = pd.read_csv(bsExcelFile, skiprows = 17)

#read and process bank statement file
def processBankStatement(file):
    print("Processing bank statement files \n")
    try:
        #read file
        df = pd.read_csv(bsExcelFile, skiprows = 17)
        #get column names
        bsColList = df.columns
        #clean rows
        cleanRows = []
        for row in df.iterrows():
            #index
            if type(row[0]) == str:
                tempList = [re.sub("=|\"", "", row[0])]
            else:
                tempList = [row[0]]
            #other values
            tempList+=[re.sub("=|\"", "", value) if type(value) == str else value for value in row[1]][:-1]
            cleanRows.append(tempList)
            
        df2 = pd.DataFrame(cleanRows)
        df2.columns = bsColList
        print("Finished processing bank statement")
        
        return df2
    except Exception as e:
        print("Failed to read/process bank statement file \n")
        print(e)

In [12]:
bsColList = dfBankStatement.columns

cleanRows = []
for row in dfBankStatement.iterrows():
    #index
    if type(row[0]) == str:
        tempList = [re.sub("=|\"", "", row[0])]
    else:
        tempList = [row[0]]
    #other values
    tempList+=[re.sub("=|\"", "", value) if type(value) == str else value for value in row[1]][:-1]
    
    cleanRows.append(tempList)
    
dfBankStatement2 = pd.DataFrame(cleanRows)
dfBankStatement2.columns = bsColList

In [13]:
dfBankStatement = processBankStatement(bsExcelFile)

Processing bank statement files 

Finished processing bank statement


In [14]:
dfBankStatement = dfBankStatement.query('`Transaction Description`!= "%s"'%filterTransDesc)
dfBankStatement[bsIndex] = np.arange(dfBankStatement.shape[0])

In [15]:
#check if each col to use/match in bs exists in bs table
def checkBSCols(df, cols):
    checkCol = [i for i in cols if i in df.columns]
    if len(checkCol) != len(cols):
        return False
    else:
        return True

In [16]:
checkBSCols(dfBankStatement, bsColMatchList + [bsAmountCol])

True

In [17]:
colList = [bsIndex] + bsColMatchList + [bsAmountCol]
dfBankStatementFinal = dfBankStatement[colList].copy()

### Exact Match

In [120]:
def exactMatch(df, indexName, nameCol):
    startTime = timer()
    mergeList = []
    indexList = list(df[pd.notnull(df[nameCol])][indexName].values)
    for col in bsColMatchList:
        dfTemp = df[df[indexName].isin(indexList)].copy()
        dfMerged = dfTemp.merge(dfBankStatementFinal, how = 'inner', left_on = [nameCol, poAmountCol],
                                                            right_on = [col, bsAmountCol])
        dfMerged2 = dfMerged[(pd.notnull(dfMerged[nameCol])) & (pd.notnull(dfMerged[col]))].copy()
        dfMerged2.rename(columns = {col: bsName, nameCol : poNameCol}, inplace = True)
        mergeList.append(dfMerged2)
        #update index list

    
    dfMergedFinal = pd.concat(mergeList).copy()[[indexName, poNameCol, poAmountCol, 
                                                 bsIndex, bsName, bsAmountCol, "DataType"]]
#     dfMergedFinal = pd.concat(mergeList).copy()
    dfMergedFinal['MatchCategory'] = "ExactMatch"
    endTime = timer()
    print("Total time: %0.4fs" % (endTime - startTime))
    return dfMergedFinal

In [121]:
#exact matching for payout file
#EU
dfExactMatchEU = exactMatch(dfEU, euIndex, payoutNameCol)
#find no match list
noMatchEUList = set(dfEU[euIndex].values)\
                    .difference(set(dfExactMatchEU[euIndex].values))
dfExactMatchEU.shape

Total time: 0.0770s


(487, 8)

In [122]:
#COP
dfExactMatchCOP = exactMatch(dfCOP, copIndex, payoutNameCol)
#find no match list
noMatchCOPList = set(dfCOP[copIndex].values)\
                    .difference(set(dfExactMatchCOP[copIndex].values))
dfExactMatchCOP.shape

Total time: 0.0820s


(397, 8)

In [123]:
#Manage COP
dfExactMatchManageCOP = exactMatch(dfManageCOP, manageCOPIndex, payoutNameCol)
#find no match list
noMatchManageCOPList = set(dfManageCOP[manageCOPIndex].values)\
                    .difference(set(dfExactMatchManageCOP[manageCOPIndex].values))
dfExactMatchManageCOP.shape

Total time: 0.0829s


(0, 8)

In [124]:
#exact matching for bill payment file
dfExactMatchBP = exactMatch(dfBillPayment, billPaymentIndex, billPaymentNameCol)
#find no match list
noMatchBPList = set(dfBillPayment[billPaymentIndex].values)\
                        .difference(set(dfExactMatchBP[billPaymentIndex].values))
dfExactMatchBP.shape

Total time: 0.0719s


(0, 8)

In [125]:
#exact matching for merchant file
exactMatchMerchantList = []
for colName in [merchantNameCol, merchantBankNameCol]:
    dfExactMatchMerchant = exactMatch(dfMerchant, merchantIndex, colName)
    exactMatchMerchantList.append(dfExactMatchMerchant)

dfExactMatchMerchant = pd.concat(exactMatchMerchantList).copy()
dfExactMatchMerchant.drop_duplicates(subset=[merchantIndex, bsIndex], inplace = True)

#harmonize no match list
noMatchMerchantList = set(dfMerchant[merchantIndex].values)\
                        .difference(set(dfExactMatchMerchant[merchantIndex].values))
dfExactMatchMerchant.shape

Total time: 0.0823s
Total time: 0.0954s


(386, 8)

In [126]:
#combine all exact matches
dfExactMatchFinal = pd.concat([dfExactMatchEU, dfExactMatchCOP, dfExactMatchManageCOP,
                               dfExactMatchBP, dfExactMatchMerchant]).copy()
matchedBSIndex = dfExactMatchFinal[bsIndex].values
dfExactMatchFinal.shape

(1270, 12)

In [127]:
csvFile = "2021-01-24_ExactMatch.csv"
dfExactMatchFinal.to_csv(csvFile, index = False)

In [128]:
#get unmatched list EU for partial matching purpose
noMatchEUlist = set(dfEU[euIndex].values).difference(set(dfExactMatchEU[euIndex].values))
len(noMatchEUlist)

103

In [129]:
#get unmatched list COP for partial matching purpose
noMatchCOPlist = set(dfCOP[copIndex].values).difference(set(dfExactMatchCOP[copIndex].values))
len(noMatchCOPlist)

322

In [130]:
#get unmatched list manage COP for partial matching purpose
noMatchManageCOPlist = set(dfManageCOP[manageCOPIndex].values).difference(set(dfExactMatchManageCOP[manageCOPIndex].values))
len(noMatchManageCOPlist)

14

In [131]:
#get unmatched list BP for partial matching purpose
noMatchBPlist = set(dfBillPayment[billPaymentIndex].values).difference(set(dfExactMatchBP[billPaymentIndex].values))
len(noMatchBPlist)

243

In [132]:
#get unmatched list merchant for partial matching purpose
noMatchMerchantlist = set(dfMerchant[merchantIndex].values).difference(set(dfExactMatchMerchant[merchantIndex].values))
len(noMatchMerchantlist)

276

### partial match

In [185]:
def matchFirst2Terms(name, series):
    ruleName = "matchFirst2Terms"
    #match series with first 1 or 2 terms of names
    if len(name.split(' ')) >= 3:
        match = ' '.join(name.split(' ')[:2])
        resultArray = series.map(lambda x:ruleName if re.search('%s'%match, str(x), flags = re.IGNORECASE)
                                else "False").values
    elif len(name.split(' ')) == 2:
        match = name.split(' ')[0]
        resultArray = series.map(lambda x:ruleName if re.search('%s'%match, re.sub(" ","",str(x)), flags = re.IGNORECASE)
                                else "False").values
    else:
        resultArray = np.full(len(series), "False")

    return resultArray

# def matchTruncated(name, series):
#     ruleName = "matchTruncated"
#     #match series with truncated name if there is 
#     rule1 =  series\
#             .map(lambda x: True if re.search('%s'%re.escape(str(name)), str(x), flags = re.IGNORECASE) 
#                  and str(x) not in ['', 'nan'] else False).values
#     #match name with truncated name in the series if there is
#     rule2 = series\
#                 .map(lambda x: True if re.search('%s'%re.escape(str(x)), str(name), flags = re.IGNORECASE) 
#                      and str(x) not in ['','nan'] else False).values
#     resultArray = (rule1|rule2)
#     resultArray = np.where(resultArray, ruleName, "False")
    
#     return resultArray

#edited - 2021-01-28
def matchTruncated(name, series):
    ruleName = "matchTruncated"
    #match series with truncated name if there is 
    cleanName = re.sub(" binti ", " bt ", str(name), flags = re.IGNORECASE)
    rule1 =  series\
            .map(lambda x: True if re.search('%s'%re.escape(str(name)), str(x), flags = re.IGNORECASE) 
                 and str(x) not in ['', 'nan'] else False).values
    #match name with truncated name in the series if there is
    rule2 = series\
                .map(lambda x: True if re.search('%s'%re.escape(re.sub("ENCIK |COP REFUND", '', 
                                                                re.sub(" binti ", " bt ", str(x), flags = re.I), 
                                                                       flags = re.IGNORECASE).strip()), 
                                                 str(name), flags = re.IGNORECASE) 
                     and re.sub("ENCIK |COP REFUND", '', 
                                re.sub(" binti ", " bt ", str(x), flags = re.I), 
                            flags = re.IGNORECASE).strip() not in ['', 'nan'] else False).values
    resultArray = (rule1|rule2)
    resultArray = np.where(resultArray, ruleName, "False")
    
    return resultArray

def noSpacenoCopRefund(name, series):
    ruleName = "noSpacenoCopRefund"
    #remove cop refund and remove space for BS reference to match wth name
    rule1 = series.map(lambda x:True if re.search('%s'%re.escape(str(name)), 
                            re.sub(' ','',re.sub("COP REFUND", '',str(x))), flags = re.IGNORECASE)
                                else False).values
    rule2 = series.map(lambda x:True if re.search('%s'%re.escape(re.sub(' ','',re.sub("COP REFUND", '',str(x)))), 
                        str(name), flags = re.IGNORECASE) and re.sub(' ','',re.sub("COP REFUND", '',str(x)))!=''
                            else False).values
    resultArray = (rule1|rule2)
    resultArray = np.where(resultArray, ruleName, "False")
    return resultArray

def matchAccountLast4Digit(accountNo, series):
    ruleName = "matchAccountLast4Digit"
    resultArray = series.map(lambda x:ruleName if re.search('%s'%accountNo, str(x), flags = re.IGNORECASE)
                                        and len(accountNo) == 4 else "False").values
    return resultArray

def matchBillTypeRef(billType, series):
    ruleName = "matchBillTypeRef"
    if billType == '':
        resultArray = np.full(len(series), "False")
        reason = ""
    else:
        #if can find billtype - Indah water in reference 
        rule1 =  series\
                .map(lambda x: True if re.search('%s'%billType, str(x), flags = re.IGNORECASE) 
                     and str(x) not in  ['', 'nan'] else False).values
        #if billtype is PAIP and Pengurusan air pahan in reference
        rule2 = series\
                .map(lambda x: True if re.search('%s'%"PENGURUSAN AIR PAHAN", str(x), flags = re.IGNORECASE) 
                     and billType == 'PAIP' else False).values
        #if billtype is PBA and perbadanan bekalan air in reference
        rule3 = series\
                .map(lambda x: True if re.search('%s'%"perbadanan bekalan air", str(x), flags = re.IGNORECASE) 
                     and billType == 'PBA' else False).values
        resultArray = (rule1|rule2|rule3)

        resultArray = np.where(resultArray, ruleName, "False")
    return resultArray

def matchUniqueAmount(amount, series):
    ruleName = "matchUniqueAmount"
    #if only unique match in bs for amount
    resultArray = series.map(lambda x:True if x == amount else False).values
    if sum(resultArray) == 1:
        return np.full(len(series), ruleName)
    else:
        return np.full(len(series), "False")

def partialMatch(df, notMatchIndex, indexCol, nameCol, ruleType):
    matchResult = []
    startTime = timer()
    for index in notMatchIndex:
        #instantiate rule
        dfSubset = df.query('%s == %s'%(indexCol, index))
        if len(dfSubset[nameCol].values) != 0:
            POname = dfSubset[nameCol].values[0]
            POname = re.sub("\u200b", "", POname)
            POamount = dfSubset[poAmountCol].values[0]
            #merchant, other payout
            if "Bank Acc" in dfSubset.columns:
                AccountNo = re.sub(' |-|\.0','', str(dfSubset['Bank Acc'].values[0]))[-4:]
            #bill payment
            elif "Acc Number" in dfSubset.columns:
                AccountNo = re.sub(' |-|\.0','', str(dfSubset['Acc Number'].values[0]))[-4:]
            else:
                AccountNo = ''
            if 'Bill Type' in dfSubset.columns:
                BillType = dfSubset['Bill Type'].values[0]
            else:
                BillType = ''
            for col in ['Reference 1', 'Reference 2', 'Reference 3', 'Reference 4']:
                match1 = matchTruncated(POname, dfBankStatementFinal[col])
                match2 = matchFirst2Terms(POname, dfBankStatementFinal[col])
                match3 = noSpacenoCopRefund(POname, dfBankStatementFinal[col])
                match4 = matchAccountLast4Digit(AccountNo, dfBankStatementFinal[col])
                match5 = matchBillTypeRef(BillType, dfBankStatementFinal[col])
                match6 = matchUniqueAmount(POamount, dfBankStatementFinal[bsAmountCol])
                ruleStr1 = "|".join(['(match%s!="False")'%i for i in ruleDict[ruleType]])
                checkRule = eval(ruleStr1)
                #assign bool statement to checkRule
                dfMatch = dfBankStatementFinal[(dfBankStatementFinal[bsAmountCol] == POamount) & (checkRule)]
                ruleStr1 = '[' + ','.join(['match%s'%i for i in ruleDict[ruleType]]) + ']'
                #assign list to ruleList
                ruleList = eval(ruleStr1)
                if dfMatch.shape[0] == 1:
                    BSname = dfMatch[col].values[0]
                    BSamount = dfMatch[bsAmountCol].values[0]   
                    BSindex = dfMatch[bsIndex].values[0]
                    rule = ','.join([i[BSindex] for i in ruleList if i[BSindex]!="False"])
                    matchResult.append([index, POname, POamount, BSindex, BSname, BSamount, rule])
    #             more than 1 match
                elif dfMatch.shape[0] > 1:
                    for no, name in enumerate(dfMatch[col].values):
                        BSname = name
                        BSamount = dfMatch[bsAmountCol].values[no]
                        BSindex = dfMatch[bsIndex].values[no]
                        rule = ','.join([i[BSindex] for i in ruleList if i[BSindex]!="False"])
                        #nid to remove duplicate after consolidation
                        matchResult.append([index, POname, POamount, BSindex, BSname, BSamount, rule])

    endTime = timer()
    print("Total time: %0.4fs" % (endTime - startTime))
    return matchResult

def constructTable(matchResult, df, indexName, nameCol, dType):
    pandaList = []
    for i in matchResult:
        pandaList.append(i)
    
    dfMatchFinal = pd.DataFrame(pandaList)
    if dfMatchFinal.shape[0]!=0:
        #[index, POname, POamount, BSindex, BSname, BSamount, rule]
        dfMatchFinal.columns = [indexName, poNameCol, poAmountCol, bsIndex, bsName, bsAmountCol, 'Reason']
        dfMatchFinal['MatchCategory'] = "PartialMatch"
        dfMatchFinal['DataType'] = dType
        return dfMatchFinal
    else:
        return None

In [186]:
#edited
matchResultEU = partialMatch(dfEU, noMatchEUlist, 
                             euIndex, payoutNameCol, ruleType = "PayOut")

dfPartialMatchEU = constructTable(matchResultEU, dfEU, euIndex, poNameCol, 'EU')

dfPartialMatchEU.drop_duplicates(euIndex, inplace = True)

dfPartialMatchEU.shape[0]

Total time: 321.0800s


95

In [37]:
matchResultEU = partialMatch(dfEU, noMatchEUlist, 
                             euIndex, payoutNameCol, ruleType = "PayOut")

dfPartialMatchEU = constructTable(matchResultEU, dfEU, euIndex, poNameCol, 'EU')

dfPartialMatchEU.drop_duplicates(euIndex, inplace = True)

dfPartialMatchEU.shape[0]

Total time: 104.1276s


95

In [187]:
#edited
matchResultCOP = partialMatch(dfCOP, noMatchCOPlist, 
                             copIndex, payoutNameCol, ruleType = "PayOut")

dfPartialMatchCOP = constructTable(matchResultCOP, dfCOP, copIndex, poNameCol, 'COP')

dfPartialMatchCOP.drop_duplicates(copIndex, inplace = True)

dfPartialMatchCOP.shape[0]

Total time: 1000.3409s


312

In [38]:
matchResultCOP = partialMatch(dfCOP, noMatchCOPlist, 
                             copIndex, payoutNameCol, ruleType = "PayOut")

dfPartialMatchCOP = constructTable(matchResultCOP, dfCOP, copIndex, poNameCol, 'COP')

dfPartialMatchCOP.drop_duplicates(copIndex, inplace = True)

dfPartialMatchCOP.shape[0]

Total time: 314.8384s


308

In [188]:
#edited
matchResultManageCOP = partialMatch(dfManageCOP, noMatchManageCOPList, 
                                    manageCOPIndex, payoutNameCol, ruleType = "PayOut")

dfPartialMatchManageCOP = constructTable(matchResultManageCOP, dfManageCOP, manageCOPIndex, poNameCol, 'ManageCOP')

dfPartialMatchManageCOP.drop_duplicates(manageCOPIndex, inplace = True)

dfPartialMatchManageCOP.shape[0]

Total time: 44.7978s


14

In [39]:
matchResultManageCOP = partialMatch(dfManageCOP, noMatchCOPlist, 
                                    manageCOPIndex, payoutNameCol, ruleType = "PayOut")

dfPartialMatchManageCOP = constructTable(matchResultManageCOP, dfManageCOP, manageCOPIndex, poNameCol, 'ManageCOP')

dfPartialMatchManageCOP.drop_duplicates(manageCOPIndex, inplace = True)

dfPartialMatchManageCOP.shape[0]

Total time: 9.8473s


9

In [189]:
#edited
matchResultBP = partialMatch(dfBillPayment, noMatchBPlist, 
                             billPaymentIndex, billPaymentNameCol, ruleType = "BillPayment")

dfPartialMatchBP = constructTable(matchResultBP, dfBillPayment, billPaymentIndex, billPaymentNameCol, 'BillPayment')
#ll only keep d first record of multiple matches
dfPartialMatchBP.drop_duplicates(billPaymentIndex, inplace = True)

dfPartialMatchBP.shape[0]

Total time: 822.6159s


17

In [40]:
matchResultBP = partialMatch(dfBillPayment, noMatchBPlist, 
                             billPaymentIndex, billPaymentNameCol, ruleType = "BillPayment")

dfPartialMatchBP = constructTable(matchResultBP, dfBillPayment, billPaymentIndex, billPaymentNameCol, 'BillPayment')
#ll only keep d first record of multiple matches
dfPartialMatchBP.drop_duplicates(billPaymentIndex, inplace = True)

dfPartialMatchBP.shape[0]

Total time: 255.2029s


17

In [190]:
#edited
MerchantList = []
for colName in ['Merchant', 'Bank Acc Name']:
    matchResultMerchant = partialMatch(dfMerchant, noMatchMerchantList, merchantIndex, colName, ruleType = "Merchant")
    dfPartialMatchMerchant = constructTable(matchResultMerchant, dfMerchant, merchantIndex, colName, 'Merchant')
    MerchantList.append(dfPartialMatchMerchant)
    
dfPartialMatchMerchant = pd.concat(MerchantList).copy()
dfPartialMatchMerchant.drop_duplicates(subset=['Merchant_Index', 'BS_Index'], inplace = True)
dfPartialMatchMerchant.shape[0]

Total time: 864.8506s
Total time: 863.6937s


281

In [41]:
MerchantList = []
for colName in ['Merchant', 'Bank Acc Name']:
    matchResultMerchant = partialMatch(dfMerchant, noMatchMerchantList, merchantIndex, colName, ruleType = "Merchant")
    dfPartialMatchMerchant = constructTable(matchResultMerchant, dfMerchant, merchantIndex, colName, 'Merchant')
    MerchantList.append(dfPartialMatchMerchant)
    
dfPartialMatchMerchant = pd.concat(MerchantList).copy()
dfPartialMatchMerchant.drop_duplicates(subset=['Merchant_Index', 'BS_Index'], inplace = True)
dfPartialMatchMerchant.shape[0]

Total time: 264.2156s
Total time: 262.9911s


281

In [191]:
#combine all partial matches - edited
dfPartialMatchFinal = pd.concat([dfPartialMatchEU, dfPartialMatchCOP, dfPartialMatchManageCOP,
                                 dfPartialMatchBP, dfPartialMatchMerchant]).copy()
dfPartialMatchFinal.shape

(719, 13)

In [42]:
#combine all partial matches
dfPartialMatchFinal = pd.concat([dfPartialMatchEU, dfPartialMatchCOP, dfPartialMatchManageCOP,
                                 dfPartialMatchBP, dfPartialMatchMerchant]).copy()

In [43]:
dfPartialMatchFinal.shape

(710, 13)

In [192]:
csvFile = "2021-01-28_PartialMatch.csv"
dfPartialMatchFinal.to_csv(csvFile, index = False)

In [211]:
#merge exact and partial match

dfMerge = pd.concat([dfExactMatchFinal, dfPartialMatchFinal]).copy()
dfMerge.shape

(1989, 13)

### Summary

In [212]:
dfMerge.columns

Index(['EU_Index', 'Member Name', 'Amount', 'BS_Index', 'BS Name',
       'Debit Amount', 'DataType', 'MatchCategory', 'COP_Index',
       'ManageCOP_Index', 'BP_Index', 'Merchant_Index', 'Reason'],
      dtype='object')

In [213]:
reconciledEUIndex = dfMerge[pd.notnull(dfMerge['EU_Index'])]['EU_Index'].values
reconciledCOPIndex = dfMerge[pd.notnull(dfMerge['COP_Index'])]['COP_Index'].values
reconciledManageCOPIndex = dfMerge[pd.notnull(dfMerge['ManageCOP_Index'])]['ManageCOP_Index'].values
reconciledBillPaymentIndex = dfMerge[pd.notnull(dfMerge['BP_Index'])]['BP_Index'].values
reconciledMerchantIndex = dfMerge[pd.notnull(dfMerge['Merchant_Index'])]['Merchant_Index'].values
reconciledBankStatementIndex = dfMerge[pd.notnull(dfMerge['BS_Index'])]['BS_Index'].values

reconciledEU_Sum = dfEU[dfEU['EU_Index'].isin(reconciledEUIndex)]['Amount'].sum()
reconciledCOP_Sum  = dfCOP[dfCOP['COP_Index'].isin(reconciledCOPIndex)]['Amount'].sum()
reconciledManageCOP_Sum  = dfManageCOP[dfManageCOP['ManageCOP_Index'].isin(reconciledManageCOPIndex)]['Amount'].sum()
reconciledBillPayment_Sum  = dfBillPayment[dfBillPayment['BP_Index'].isin(reconciledBillPaymentIndex)]['Amount'].sum()
reconciledMerchant_Sum  = dfMerchant[dfMerchant['Merchant_Index'].isin(reconciledMerchantIndex)]['Amount'].sum()

noReconciledEU_Sum = dfEU[~dfEU['EU_Index'].isin(reconciledEUIndex)]['Amount'].sum()
noReconciledCOP_Sum  = dfCOP[~dfCOP['COP_Index'].isin(reconciledCOPIndex)]['Amount'].sum()
noReconciledManageCOP_Sum  = dfManageCOP[~dfManageCOP['ManageCOP_Index'].isin(reconciledManageCOPIndex)]['Amount'].sum()
noReconciledBillPayment_Sum  = dfBillPayment[~dfBillPayment['BP_Index'].isin(reconciledBillPaymentIndex)]['Amount'].sum()
noReconciledMerchant_Sum  = dfMerchant[~dfMerchant['Merchant_Index'].isin(reconciledMerchantIndex)]['Amount'].sum()
noReconciledBankStatement_Sum = dfBankStatementFinal[~dfBankStatementFinal['BS_Index'].isin(reconciledBankStatementIndex)]['Debit Amount'].sum()

pendingEU_Sum = dfEU[dfEU['Status'] == "Pending"]['Amount'].sum()
pendingCOP_Sum = dfCOP[dfCOP['Status'] == "Pending"]['Amount'].sum()
pendingManageCOP_Sum = dfManageCOP[dfManageCOP['Status'] == "Pending"]['Amount'].sum()
pendingMerchant_Sum = dfMerchant[dfMerchant['Status'] == "Pending"]['Amount'].sum()
billPayment_Sum = dfBillPayment['Amount'].sum()

In [255]:
tableList = []
tableList.append(["Journal", "BS vs Payout (A)", "In BS, not in Payout (B)", "Not in BS (C)"])

tableList.append(['EU', reconciledEU_Sum, "", noReconciledEU_Sum])
tableList.append(['COP', "{:.2f}".format(reconciledCOP_Sum), "", noReconciledCOP_Sum])
tableList.append(['Manage COP', reconciledManageCOP_Sum, "", 
                  noReconciledManageCOP_Sum])
tableList.append(['Bill Payment', reconciledBillPayment_Sum, "", 
                  noReconciledBillPayment_Sum])
tableList.append(['Merchant', reconciledMerchant_Sum, "", 
                  noReconciledMerchant_Sum])
tableList.append(['DIS', "", "", ""])
tableList.append(['QS DIS', "", "", ""])
HandlingSum = 0
tableList.append(['Handling Charges', HandlingSum, "", ""])
tableList.append(['Misc', "", noReconciledBankStatement_Sum, ""])
totalReconciled = reconciledEU_Sum + reconciledCOP_Sum + reconciledManageCOP_Sum\
                  + reconciledBillPayment_Sum + reconciledMerchant_Sum
totalnoReconciled = noReconciledEU_Sum + noReconciledCOP_Sum + \
                    noReconciledManageCOP_Sum + noReconciledBillPayment_Sum + \
                    noReconciledMerchant_Sum
tableList.append(['Total', "{:.2f}".format(totalReconciled), "{:.2f}".format(noReconciledBankStatement_Sum),
                  "{:.2f}".format(totalnoReconciled)])
tableList.append(['Total (A)+(B) / (A)+(C)', 
                  "{:.2f}".format(totalReconciled + noReconciledBankStatement_Sum),
                 "", "{:.2f}".format(totalReconciled + totalnoReconciled)])
pendingTotal = pendingEU_Sum + pendingCOP_Sum + pendingManageCOP_Sum\
                + pendingMerchant_Sum + billPayment_Sum
tableList.append(['As per BS / Payout', "({:.2f})".format(totalReconciled + noReconciledBankStatement_Sum),
                                  "", "({:.2f})".format(pendingTotal)])
tableList.append(['Variance', "", "", totalReconciled + totalnoReconciled - pendingTotal])

In [267]:
summaryTable = pd.DataFrame(tableList)
summaryTable.columns = ["", "Reconciled", "No reconciled", "No reconciled2"]
summaryTable

Unnamed: 0,Unnamed: 1,Reconciled,No reconciled,No reconciled2
0,Journal,BS vs Payout (A),"In BS, not in Payout (B)",Not in BS (C)
1,EU,577492,,5692
2,COP,3132305.00,,30296
3,Manage COP,14785,,0
4,Bill Payment,1818.64,,46633.1
5,Merchant,705141,,5176
6,DIS,,,
7,QS DIS,,,
8,Handling Charges,0,,
9,Misc,,6.1052e+06,


### Validation

In [3]:
csvFile = "2021-01-24_ExactMatch.csv"
dfExactMatchFinal = pd.read_csv(csvFile)

csvFile = "2021-01-26_PartialMatch.csv"
dfPartialMatchFinal = pd.read_csv(csvFile)

In [193]:
dfMerge = pd.concat([dfExactMatchFinal, dfPartialMatchFinal]).copy()

In [194]:
dfMerge.shape

(1989, 13)

In [195]:
dfReconciled.shape

(1920, 16)

In [51]:
dfReconciled.columns

Index(['Country', 'Withdraw Date', 'Username', 'Member Name', 'Withdraw',
       'Combined Name', 'Package', 'Bank', 'Bank Acc', 'Bank-in Slip',
       'Bank-in By', 'Bank-in Date', 'Remarks', 'Status', 'Amount',
       'Remarks.1'],
      dtype='object')

In [196]:
reconcileList = dfReconciled['Member Name']

matchList = dfMerge['Member Name']

userList = dfReconciled['Username']

In [175]:
len(set(matchList))

1723

In [176]:
len(set(reconcileList))

1710

In [197]:
matchMoreList = [i for i in matchList if i not in set(reconcileList) and i not in set(dfReconciled['Username'])]

reconcileMoreList = [i for no, i in enumerate(reconcileList) if i not in set(matchList) and 
                     userList[no] not in set(matchList)]

In [198]:
len(set(matchMoreList))

12

In [199]:
len(set(reconcileMoreList))

3

In [200]:
#reconcile has more
dfReconciledMore = dfReconciled.query("`Member Name` in @reconcileMoreList").iloc[:-2,]
csvFile = "2021-01-28_ReconcileMore_v2.csv"
# dfReconciledMore.to_csv(csvFile, index = False)

In [201]:
dfReconciledMore

Unnamed: 0,Country,Withdraw Date,Username,Member Name,Withdraw,Combined Name,Package,Bank,Bank Acc,Bank-in Slip,Bank-in By,Bank-in Date,Remarks,Status,Amount,Remarks.1
1380,MALAYSIA,2020-08-03,yokeng,Wee Yok Eng @ Wee Eng Eng,BASIC,Wee Yok Eng @ Wee Eng Eng BASIC,BASIC,RHB,21301000083322,,,,,Pending,2600.0,COP_20200803
1872,MALAYSIA,2020-08-03,Helmy448,Mohamad Helmi Mohamad Yusof,BASIC,Mohamad Helmi Mohamad Yusof BASIC,BASIC,CIMB-MALAYSIA,7621375166,,,,,Pending,10000.0,COP_20200803


In [150]:
dfReconciledMore.shape

(3, 16)

In [202]:
#match has more
dfMergeMore = dfMerge.query("`Member Name` in @matchMoreList")
csvFile = "2021-01-28_MatchMore_v2.csv"
dfMergeMore.to_csv(csvFile, index = False)

In [260]:
from styleframe import StyleFrame

In [259]:
!pip install styleframe

Collecting styleframe
  Downloading styleframe-3.0.6-py3-none-any.whl (32 kB)
Collecting colour<0.2,>=0.1.5
  Downloading colour-0.1.5-py2.py3-none-any.whl (23 kB)
Installing collected packages: colour, styleframe
Successfully installed colour-0.1.5 styleframe-3.0.6


In [273]:
excelFile = "2021-01-28_0806Compare.xlsx"
with pd.ExcelWriter(excelFile) as writer:
    summaryTable.to_excel(writer, sheet_name = "Summary")
    dfMerge.to_excel(writer, sheet_name = "MachineMatchResult")
    dfReconciledMore.to_excel(writer, sheet_name = "SisReconcileMore")
    dfMergeMore.to_excel(writer, sheet_name = 'MachineMatchMore')
    dfEU.to_excel(writer, sheet_name = 'EU')
    dfCOP.to_excel(writer, sheet_name = "COP")
    dfManageCOP.to_excel(writer, sheet_name = "ManageCOP")
    dfBillPayment.to_excel(writer, sheet_name = "BillPayment")
    dfMerchant.to_excel(writer, sheet_name = "Merchant")
    dfBankStatementFinal.to_excel(writer, sheet_name = "BankStatement")

In [271]:
#autofit
excelFile = "test.xlsx"
with StyleFrame.ExcelWriter(excelFile) as writer:
    sf = StyleFrame(summaryTable)
#     sf.set_column_width(columns=['aaaaaaaaaaa', 'bbbbbbbbb'],
#                     width=35.3)
    sf.to_excel(excel_writer = writer, 
                best_fit = list(summaryTable.columns), sheet_name = "Summary")

### Debug - 2021-01-28

In [46]:
bsExcelFile = "Quinton_Bank Statement/08.06 3207971832Statement.csv"
# dfBankStatement = pd.read_csv(bsExcelFile, skiprows = 17)

#read and process bank statement file
def processBankStatement(file):
    print("Processing bank statement files \n")
    try:
        #read file
        df = pd.read_csv(bsExcelFile, skiprows = 17)
        #get column names
        bsColList = df.columns
        #clean rows
        cleanRows = []
        for row in df.iterrows():
            #index
            if type(row[0]) == str:
                tempList = [re.sub("=|\"", "", row[0])]
            else:
                tempList = [row[0]]
            #other values
            tempList+=[re.sub("=|\"", "", value) if type(value) == str else value for value in row[1]][:-1]
            cleanRows.append(tempList)
            
        df2 = pd.DataFrame(cleanRows)
        df2.columns = bsColList
        print("Finished processing bank statement")
        
        return df2
    except Exception as e:
        print("Failed to read/process bank statement file \n")
        print(e)

In [47]:
dfBankStatement = processBankStatement(bsExcelFile)
dfBankStatement = dfBankStatement.query('`Transaction Description`!= "%s"'%filterTransDesc)
dfBankStatement[bsIndex] = np.arange(dfBankStatement.shape[0])
colList = [bsIndex] + bsColMatchList + [bsAmountCol]
dfBankStatementFinal = dfBankStatement[colList].copy()

Processing bank statement files 

Finished processing bank statement


In [159]:
def matchFirst2Terms(name, series):
    ruleName = "matchFirst2Terms"
    #match series with first 1 or 2 terms of names
    if len(name.split(' ')) >= 3:
        match = ' '.join(name.split(' ')[:2])
        resultArray = series.map(lambda x:ruleName if re.search('%s'%match, str(x), flags = re.IGNORECASE)
                                else "False").values
    elif len(name.split(' ')) == 2:
        match = name.split(' ')[0]
        resultArray = series.map(lambda x:ruleName if re.search('%s'%match, re.sub(" ","",str(x)), flags = re.IGNORECASE)
                                else "False").values
    else:
        resultArray = np.full(len(series), "False")

    return resultArray

def matchTruncated(name, series):
    ruleName = "matchTruncated"
    #match series with truncated name if there is 
    cleanName = re.sub(" binti ", " bt ", str(name), flags = re.IGNORECASE)
    rule1 =  series\
            .map(lambda x: True if re.search('%s'%re.escape(str(name)), str(x), flags = re.IGNORECASE) 
                 and str(x) not in ['', 'nan'] else False).values
    #match name with truncated name in the series if there is
    rule2 = series\
                .map(lambda x: True if re.search('%s'%re.escape(re.sub("ENCIK |COP REFUND", '', 
                                                                re.sub(" binti ", " bt ", str(x), flags = re.I), 
                                                                       flags = re.IGNORECASE).strip()), 
                                                 str(name), flags = re.IGNORECASE) 
                     and str(x) not in ['', 'nan'] else False).values
    resultArray = (rule1|rule2)
    resultArray = np.where(resultArray, ruleName, "False")
    
    return resultArray

def noSpacenoCopRefund(name, series):
    ruleName = "noSpacenoCopRefund"
    #remove cop refund and remove space for BS reference to match wth name
    rule1 = series.map(lambda x:True if re.search('%s'%re.escape(str(name)), 
                            re.sub(' ','',re.sub("COP REFUND", '',str(x))), flags = re.IGNORECASE)
                                else False).values
    rule2 = series.map(lambda x:True if re.search('%s'%re.escape(re.sub(' ','',re.sub("COP REFUND", '',str(x)))), 
                        str(name), flags = re.IGNORECASE) and re.sub(' ','',re.sub("COP REFUND", '',str(x)))!=''
                            else False).values
    resultArray = (rule1|rule2)
    resultArray = np.where(resultArray, ruleName, "False")
    return resultArray

def matchAccountLast4Digit(accountNo, series):
    ruleName = "matchAccountLast4Digit"
    resultArray = series.map(lambda x:ruleName if re.search('%s'%accountNo, str(x), flags = re.IGNORECASE)
                                        and len(accountNo) == 4 else "False").values
    return resultArray

def matchBillTypeRef(billType, series):
    ruleName = "matchBillTypeRef"
    if billType == '':
        resultArray = np.full(len(series), "False")
        reason = ""
    else:
        #if can find billtype - Indah water in reference 
        rule1 =  series\
                .map(lambda x: True if re.search('%s'%billType, str(x), flags = re.IGNORECASE) 
                     and str(x) not in  ['', 'nan'] else False).values
        #if billtype is PAIP and Pengurusan air pahan in reference
        rule2 = series\
                .map(lambda x: True if re.search('%s'%"PENGURUSAN AIR PAHAN", str(x), flags = re.IGNORECASE) 
                     and billType == 'PAIP' else False).values
        #if billtype is PBA and perbadanan bekalan air in reference
        rule3 = series\
                .map(lambda x: True if re.search('%s'%"perbadanan bekalan a", str(x), flags = re.IGNORECASE) 
                     and billType == 'PBA' else False).values
        resultArray = (rule1|rule2|rule3)

        resultArray = np.where(resultArray, ruleName, "False")
    return resultArray

def matchUniqueAmount(amount, series):
    ruleName = "matchUniqueAmount"
    #if only unique match in bs for amount
    resultArray = series.map(lambda x:True if x == amount else False).values
    if sum(resultArray) == 1:
        return np.full(len(series), ruleName)
    else:
        return np.full(len(series), "False")


#### COP

In [160]:
nameCol = 'Member Name'
indexCol = 'COP_Index'
ruleType = "PayOut"
matchResult = []
startTime = timer()
for index in [301, 404, 464, 617]:
    #instantiate rule
    dfSubset = dfCOP.query('%s == %s'%(indexCol, index))
    if len(dfSubset[nameCol].values) != 0:
        POname = dfSubset[nameCol].values[0]
        POname = re.sub("\u200b", "", POname)
        POamount = dfSubset[poAmountCol].values[0]
        #merchant, other payout
        if "Bank Acc" in dfSubset.columns:
            AccountNo = re.sub(' |-|\.0','', str(dfSubset['Bank Acc'].values[0]))[-4:]
        #bill payment
        elif "Acc Number" in dfSubset.columns:
            AccountNo = re.sub(' |-|\.0','', str(dfSubset['Acc Number'].values[0]))[-4:]
        else:
            AccountNo = ''
        if 'Bill Type' in dfSubset.columns:
            BillType = dfSubset['Bill Type'].values[0]
        else:
            BillType = ''
        for col in ['Reference 1', 'Reference 2', 'Reference 3', 'Reference 4']:
            match1 = matchTruncated(POname, dfBankStatementFinal[col])
            match2 = matchFirst2Terms(POname, dfBankStatementFinal[col])
            match3 = noSpacenoCopRefund(POname, dfBankStatementFinal[col])
            match4 = matchAccountLast4Digit(AccountNo, dfBankStatementFinal[col])
            match5 = matchBillTypeRef(BillType, dfBankStatementFinal[col])
            match6 = matchUniqueAmount(POamount, dfBankStatementFinal[bsAmountCol])
            ruleStr1 = "|".join(['(match%s!="False")'%i for i in ruleDict[ruleType]])
            checkRule = eval(ruleStr1)
            #assign bool statement to checkRule
            dfMatch = dfBankStatementFinal[(dfBankStatementFinal[bsAmountCol] == POamount) & (checkRule)]
            ruleStr1 = '[' + ','.join(['match%s'%i for i in ruleDict[ruleType]]) + ']'
            #assign list to ruleList
            ruleList = eval(ruleStr1)
            if dfMatch.shape[0] == 1:
                BSname = dfMatch[col].values[0]
                BSamount = dfMatch[bsAmountCol].values[0]   
                BSindex = dfMatch[bsIndex].values[0]
                rule = ','.join([i[BSindex] for i in ruleList if i[BSindex]!="False"])
                matchResult.append([index, POname, POamount, BSindex, BSname, BSamount, rule])
#             more than 1 match
            elif dfMatch.shape[0] > 1:
                for no, name in enumerate(dfMatch[col].values):
                    BSname = name
                    BSamount = dfMatch[bsAmountCol].values[no]
                    BSindex = dfMatch[bsIndex].values[no]
                    rule = ','.join([i[BSindex] for i in ruleList if i[BSindex]!="False"])
                    #nid to remove duplicate after consolidation
                    matchResult.append([index, POname, POamount, BSindex, BSname, BSamount, rule])

endTime = timer()
print("Total time: %0.4fs" % (endTime - startTime))

Total time: 12.0741s


In [117]:
matchResult

[[301,
  'Vivian Chan siew yun',
  1360.0,
  4046,
  'CHAN SIEW YUN       COP REFUND',
  1360.0,
  'matchTruncated'],
 [404,
  'Margareta bt kimon',
  1599.0,
  4219,
  'MARGARETA BINTI KIMOCOP REFUND',
  1599.0,
  'matchTruncated'],
 [404,
  'Margareta bt kimon',
  1599.0,
  4213,
  'COP REFUND',
  1599.0,
  'matchTruncated'],
 [464,
  'Mohamad HAIRIZAN bin ismail',
  1400.0,
  4081,
  'ENCIK MOHAMAD HAIRIZCOP REFUND',
  1400.0,
  'matchTruncated'],
 [617,
  'Mohammad Rahmat bin Musa',
  5100.0,
  5480,
  'MOHAMMAD RAHM       COP REFUND',
  5100.0,
  'matchTruncated'],
 [617,
  'Mohammad Rahmat bin Musa',
  5100.0,
  5479,
  'COP REFUND',
  5100.0,
  'matchTruncated']]

In [41]:
dfCOP.query("`Member Name` == 'Vivian Chan siew yun'")

Unnamed: 0,Country,Withdraw Date,Username,Member Name,Withdraw,Package,Bank,Bank Acc,Bank-in Slip,Bank-in By,Bank-in Date,Remarks,Status,Amount,Unnamed: 14,Unnamed: 15,DataType,COP_Index
301,MALAYSIA,44046.0,vivianchan,Vivian Chan siew yun,BASIC,BASIC,CIMB-MALAYSIA,7025759848,,,,,Pending,1360.0,Edit,Reject,COP,301


In [69]:
dfCOP.query("`Member Name` == 'Mohammad Rahmat bin Musa'")

Unnamed: 0,Country,Withdraw Date,Username,Member Name,Withdraw,Package,Bank,Bank Acc,Bank-in Slip,Bank-in By,Bank-in Date,Remarks,Status,Amount,Unnamed: 14,Unnamed: 15,DataType,COP_Index
617,MALAYSIA,44046.0,easymillion,Mohammad Rahmat bin Musa,BASIC,BASIC,RHB,11301000266295,,,,,Pending,5100.0,Edit,Reject,COP,617


In [76]:
dfCOP.query("`Member Name` == 'Mohamad HAIRIZAN bin ismail'")

Unnamed: 0,Country,Withdraw Date,Username,Member Name,Withdraw,Package,Bank,Bank Acc,Bank-in Slip,Bank-in By,Bank-in Date,Remarks,Status,Amount,Unnamed: 14,Unnamed: 15,DataType,COP_Index
464,MALAYSIA,44046.0,Mhairizan,Mohamad HAIRIZAN bin ismail,BASIC,BASIC,Bank Islam,13026020224829,,,,,Pending,1400.0,Edit,Reject,COP,464


In [115]:
dfCOP.query("`Member Name` == 'Margareta bt kimon'")

Unnamed: 0,Country,Withdraw Date,Username,Member Name,Withdraw,Package,Bank,Bank Acc,Bank-in Slip,Bank-in By,Bank-in Date,Remarks,Status,Amount,Unnamed: 14,Unnamed: 15,DataType,COP_Index
404,MALAYSIA,44046.0,Mangga,Margareta bt kimon,BASIC,BASIC,CIMB-MALAYSIA,7629118064,,,,,Pending,1599.0,Edit,Reject,COP,404


In [96]:
dfBankStatementFinal.query("`Reference 1` == 'CHAN SIEW YUN       COP REFUND'")['Reference 1'].values

array(['CHAN SIEW YUN       COP REFUND'], dtype=object)

#### Manage COP

In [161]:
nameCol = 'Member Name'
indexCol = 'ManageCOP_Index'
ruleType = "PayOut"
matchResult = []
startTime = timer()
for index in [0]:
    #instantiate rule
    dfSubset = dfManageCOP.query('%s == %s'%(indexCol, index))
    if len(dfSubset[nameCol].values) != 0:
        POname = dfSubset[nameCol].values[0]
        POname = re.sub("\u200b", "", POname)
        POamount = dfSubset[poAmountCol].values[0]
        #merchant, other payout
        if "Bank Acc" in dfSubset.columns:
            AccountNo = re.sub(' |-|\.0','', str(dfSubset['Bank Acc'].values[0]))[-4:]
        #bill payment
        elif "Acc Number" in dfSubset.columns:
            AccountNo = re.sub(' |-|\.0','', str(dfSubset['Acc Number'].values[0]))[-4:]
        else:
            AccountNo = ''
        if 'Bill Type' in dfSubset.columns:
            BillType = dfSubset['Bill Type'].values[0]
        else:
            BillType = ''
        for col in ['Reference 1', 'Reference 2', 'Reference 3', 'Reference 4']:
            match1 = matchTruncated(POname, dfBankStatementFinal[col])
            match2 = matchFirst2Terms(POname, dfBankStatementFinal[col])
            match3 = noSpacenoCopRefund(POname, dfBankStatementFinal[col])
            match4 = matchAccountLast4Digit(AccountNo, dfBankStatementFinal[col])
            match5 = matchBillTypeRef(BillType, dfBankStatementFinal[col])
            match6 = matchUniqueAmount(POamount, dfBankStatementFinal[bsAmountCol])
            ruleStr1 = "|".join(['(match%s!="False")'%i for i in ruleDict[ruleType]])
            checkRule = eval(ruleStr1)
            #assign bool statement to checkRule
            dfMatch = dfBankStatementFinal[(dfBankStatementFinal[bsAmountCol] == POamount) & (checkRule)]
            ruleStr1 = '[' + ','.join(['match%s'%i for i in ruleDict[ruleType]]) + ']'
            #assign list to ruleList
            ruleList = eval(ruleStr1)
            if dfMatch.shape[0] == 1:
                BSname = dfMatch[col].values[0]
                BSamount = dfMatch[bsAmountCol].values[0]   
                BSindex = dfMatch[bsIndex].values[0]
                rule = ','.join([i[BSindex] for i in ruleList if i[BSindex]!="False"])
                matchResult.append([index, POname, POamount, BSindex, BSname, BSamount, rule])
#             more than 1 match
            elif dfMatch.shape[0] > 1:
                for no, name in enumerate(dfMatch[col].values):
                    BSname = name
                    BSamount = dfMatch[bsAmountCol].values[no]
                    BSindex = dfMatch[bsIndex].values[no]
                    rule = ','.join([i[BSindex] for i in ruleList if i[BSindex]!="False"])
                    #nid to remove duplicate after consolidation
                    matchResult.append([index, POname, POamount, BSindex, BSname, BSamount, rule])

endTime = timer()
print("Total time: %0.4fs" % (endTime - startTime))

Total time: 2.9412s


In [165]:
4 in noMatchCOPlist

False

In [162]:
matchResult

[[0,
  'Chong Su moi',
  1949.0,
  4498,
  'XXXXXX9608          CHONG SU MOI',
  1949.0,
  'matchTruncated,matchFirst2Terms,matchAccountLast4Digit,matchUniqueAmount'],
 [0,
  'Chong Su moi',
  1949.0,
  4498,
  'COP REFUND',
  1949.0,
  'matchTruncated,matchUniqueAmount'],
 [0, 'Chong Su moi', 1949.0, 4498, '', 1949.0, 'matchUniqueAmount'],
 [0, 'Chong Su moi', 1949.0, 4498, '', 1949.0, 'matchUniqueAmount']]

In [80]:
dfManageCOP.query("`Member Name` == 'LIM KIM HOW'")

Unnamed: 0,Country,Withdraw Date,Username,Member Name,Package,Bank,Bank Acc,Bank-in Slip,Bank-in By,Bank-in Date,Remarks,Status,Amount,Unnamed: 13,Unnamed: 14,DataType,ManageCOP_Index
8,MALAYSIA,2020-08-03 00:00:00,KIMHOW0930,LIM KIM HOW,BASIC,MAYBANK BERHAD,161211100000.0,,,,,Pending,485.0,Edit,Reject,ManageCOP,4


In [86]:
dfManageCOP.query("`Member Name` == 'Chong Su moi'")

Unnamed: 0,Country,Withdraw Date,Username,Member Name,Package,Bank,Bank Acc,Bank-in Slip,Bank-in By,Bank-in Date,Remarks,Status,Amount,Unnamed: 13,Unnamed: 14,DataType,ManageCOP_Index
0,MALAYSIA,2020-08-03 00:00:00,viviansumoi,Chong Su moi,PLATINUM,Public Bank Berhad,6324120000.0,,,,,Pending,1949.0,Edit,Reject,ManageCOP,0
