In [2]:
import pandas as pd

# Join the People, Location, and Leader data sets together
with pd.ExcelFile('input/PeopleData.xlsx') as xl:
    df_people = pd.read_excel(xl, sheet_name='People')
    df_leaders = pd.read_excel(xl, sheet_name='Leaders')
    df_location = pd.read_excel(xl, sheet_name='Location')
    df_datedim = pd.read_excel(xl, sheet_name='Date Dim')
    df_goals = pd.read_excel(xl, sheet_name='Goals')

df = df_people.merge(df_leaders, how='left', left_on='Leader 1', right_on='id', suffixes=('_people', '_leader'))
df = df.merge(df_location, how='left', on='Location ID')

# Remove the location id fields, the secondary leader id field
df.drop(columns='Location ID', inplace=True)

# Create last name, first name fields for the agent and the leader
df['Agent Name'] = df['first_name_people'] + ', '+ df['last_name_people']
df['Leader Name'] = df['first_name_leader'] + ', ' + df['last_name_leader']
df = df[['id_people', 'Agent Name', 'Leader Name', 'Leader 1', 'Location']]
df.rename(columns={'id_people':'id'}, inplace=True)

# Limit the dates to just 2021 and join those to the People, Location, Leader step
# Keep the id, agent name, leader 1, leader name, month start date, join, and location field
df_datedim['target_year'] = '2021'
df_datedim = df_datedim[df_datedim['Month Start Date'].dt.year.astype('str') == df_datedim['target_year']]

df['cross_join_key'] = 1
df_datedim['cross_join_key'] = 1

df = df.merge(df_datedim, how='outer', on='cross_join_key')
df.drop(columns=['cross_join_key', 'target_year'], inplace=True)

In [3]:
df.head()

Unnamed: 0,id,Agent Name,Leader Name,Leader 1,Location,Month Start Date
0,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-01-01
1,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-02-01
2,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-03-01
3,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-04-01
4,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-05-01


In [4]:
# union the worksheets in the input step
# merge the mismatched fields

col = {'Offered':'Calls Offered', 'Not Answered':'Calls Not Answered', 'Answered':'Calls Answered'}

with pd.ExcelFile('input/MetricData2021.xlsx') as xl:
    df_metric = None
    for sheet_name in xl.sheet_names:
        df_temp = pd.read_excel(xl, sheet_name=sheet_name).rename(columns=col)
        df_temp['Month'] = sheet_name
        df_metric = pd.concat([df_metric, df_temp])

# create a month start date
df_metric['Month Start Date'] = pd.to_datetime('2021' + df_metric['Month']+ '01', format='%Y%b%d')
# remove the table names and file paths field
df_metric.drop(columns='Month', inplace=True)

In [5]:
df_metric.head()

Unnamed: 0,AgentID,Calls Offered,Calls Not Answered,Calls Answered,Total Duration,Sentiment,Transfers,Month Start Date
0,1,477,18,459,2385,48,,2021-01-01
1,2,440,9,431,5720,-15,,2021-01-01
2,3,514,1,513,2056,-25,,2021-01-01
3,4,445,2,443,7565,-53,,2021-01-01
4,5,399,3,396,5187,63,,2021-01-01


In [6]:
# join the data with the people - remember we need to show every agent for every month
df = df.merge(df_metric, how='left', left_on=['id', 'Month Start Date'], right_on=['AgentID', 'Month Start Date'])

In [7]:
df.head()

Unnamed: 0,id,Agent Name,Leader Name,Leader 1,Location,Month Start Date,AgentID,Calls Offered,Calls Not Answered,Calls Answered,Total Duration,Sentiment,Transfers
0,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-01-01,4.0,445.0,2.0,443.0,7565.0,-53.0,
1,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-02-01,4.0,606.0,16.0,590.0,4848.0,97.0,
2,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-03-01,4.0,413.0,75.0,338.0,2478.0,23.0,
3,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-04-01,4.0,760.0,12.0,748.0,6080.0,21.0,
4,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-05-01,4.0,486.0,22.0,464.0,1458.0,-33.0,23.0


In [8]:
# add the goals input to the flow
# clean the goal data to have the goal name & numeric value
# add the goals to the combined people & data step
# be sure that you aren't increasing the row count - the goals should be additional columns

goals_1 = df_goals.iat[0, 0]
goals_2 = df_goals.iat[1, 0]

df[goals_1] = int(goals_1[-1])
df[goals_2] = int(goals_2[-1])

In [9]:
df.head()

Unnamed: 0,id,Agent Name,Leader Name,Leader 1,Location,Month Start Date,AgentID,Calls Offered,Calls Not Answered,Calls Answered,Total Duration,Sentiment,Transfers,Not Answered Percent < 5,Sentiment Score >= 0
0,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-01-01,4.0,445.0,2.0,443.0,7565.0,-53.0,,5,0
1,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-02-01,4.0,606.0,16.0,590.0,4848.0,97.0,,5,0
2,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-03-01,4.0,413.0,75.0,338.0,2478.0,23.0,,5,0
3,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-04-01,4.0,760.0,12.0,748.0,6080.0,21.0,,5,0
4,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-05-01,4.0,486.0,22.0,464.0,1458.0,-33.0,23.0,5,0


In [10]:
# create a calculation for the percent of offered that weren't answered (for each agent, each month)
df['Not Answered Rate'] = df['Calls Not Answered'] / df['Calls Offered']

# create a calculation for the average duration by agent (for each agent, each month)
df['Agent Avg Duration'] = df['Total Duration'] / df['Calls Answered']

In [11]:
# create a calculation that determines if the sentiment score met the goal
df['Met Sentiment Goal'] = df['Sentiment'] >= df['Sentiment Score >= 0']

# create a calculation that determines if the not answered percent met the goal
df['Met Not Answered Rate'] =  df['Not Answered Rate'] < (df['Not Answered Percent < 5'] / 100)

In [12]:
df.head()

Unnamed: 0,id,Agent Name,Leader Name,Leader 1,Location,Month Start Date,AgentID,Calls Offered,Calls Not Answered,Calls Answered,Total Duration,Sentiment,Transfers,Not Answered Percent < 5,Sentiment Score >= 0,Not Answered Rate,Agent Avg Duration,Met Sentiment Goal,Met Not Answered Rate
0,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-01-01,4.0,445.0,2.0,443.0,7565.0,-53.0,,5,0,0.004494,17.076749,False,True
1,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-02-01,4.0,606.0,16.0,590.0,4848.0,97.0,,5,0,0.026403,8.216949,True,True
2,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-03-01,4.0,413.0,75.0,338.0,2478.0,23.0,,5,0,0.181598,7.331361,True,False
3,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-04-01,4.0,760.0,12.0,748.0,6080.0,21.0,,5,0,0.015789,8.128342,True,True
4,4,"Fleur, Garnam","Kylie, Howroyd",1,Margaree,2021-05-01,4.0,486.0,22.0,464.0,1458.0,-33.0,23.0,5,0,0.045267,3.142241,False,True


In [13]:
df = df[['id', 'Agent Name', 'Leader 1', 'Leader Name', 'Month Start Date', 'Location', 'Calls Answered', 'Calls Not Answered',
        'Not Answered Rate', 'Met Not Answered Rate', 'Not Answered Percent < 5', 'Calls Offered', 'Total Duration', 'Agent Avg Duration',
        'Transfers', 'Sentiment', 'Sentiment Score >= 0', 'Met Sentiment Goal']]

In [14]:
df.to_csv('output/output-2022-week07.csv')

In [18]:
output = pd.read_csv('output/output-2022-week07.csv')
output.head()

Unnamed: 0.1,Unnamed: 0,id,Agent Name,Leader 1,Leader Name,Month Start Date,Location,Calls Answered,Calls Not Answered,Not Answered Rate,Met Not Answered Rate,Not Answered Percent < 5,Calls Offered,Total Duration,Agent Avg Duration,Transfers,Sentiment,Sentiment Score >= 0,Met Sentiment Goal
0,0,4,"Fleur, Garnam",1,"Kylie, Howroyd",2021-01-01,Margaree,443.0,2.0,0.004494,True,5,445.0,7565.0,17.076749,,-53.0,0,False
1,1,4,"Fleur, Garnam",1,"Kylie, Howroyd",2021-02-01,Margaree,590.0,16.0,0.026403,True,5,606.0,4848.0,8.216949,,97.0,0,True
2,2,4,"Fleur, Garnam",1,"Kylie, Howroyd",2021-03-01,Margaree,338.0,75.0,0.181598,False,5,413.0,2478.0,7.331361,,23.0,0,True
3,3,4,"Fleur, Garnam",1,"Kylie, Howroyd",2021-04-01,Margaree,748.0,12.0,0.015789,True,5,760.0,6080.0,8.128342,,21.0,0,True
4,4,4,"Fleur, Garnam",1,"Kylie, Howroyd",2021-05-01,Margaree,464.0,22.0,0.045267,True,5,486.0,1458.0,3.142241,23.0,-33.0,0,False
