In [11]:
import numpy as np
import matplotlib as plt
import seaborn as sns
import pandas as pd

In [16]:
df = pd.read_csv("Call_Center_Dataset.csv")
df.head(5)

Unnamed: 0,Call Id,Agent,Date,Time,Topic,Answered (Y/N),Resolved,Speed of answer in seconds,AvgTalkDuration,Satisfaction rating
0,ID0001,Diane,2021-01-01,9:12:58,Contract related,Y,Y,109.0,0:02:23,3.0
1,ID0002,Becky,2021-01-01,9:12:58,Technical Support,Y,N,70.0,0:04:02,3.0
2,ID0003,Stewart,2021-01-01,9:47:31,Contract related,Y,Y,10.0,0:02:11,3.0
3,ID0004,Greg,2021-01-01,9:47:31,Contract related,Y,Y,53.0,0:00:37,2.0
4,ID0005,Becky,2021-01-01,10:00:29,Payment related,Y,Y,95.0,0:01:00,3.0


In [35]:
df = pd.read_csv("Call_Center_Dataset.csv")
df.nunique()

Call Id                       5000
Agent                            8
Date                            90
Time                           375
Topic                            5
Answered (Y/N)                   2
Resolved                         2
Speed of answer in seconds     116
AvgTalkDuration                391
Satisfaction rating              5
dtype: int64

In [36]:
# drop duplicated rows
df = df.drop_duplicates()

In [37]:
# see the number of missing values in each column
df.isnull().sum()

Call Id                         0
Agent                           0
Date                            0
Time                            0
Topic                           0
Answered (Y/N)                  0
Resolved                        0
Speed of answer in seconds    946
AvgTalkDuration               946
Satisfaction rating           946
dtype: int64

In [38]:
#drop missing values
df = df.dropna()

In [39]:
#descriptive statistics
df.describe()

Unnamed: 0,Speed of answer in seconds,Satisfaction rating
count,4054.0,4054.0
mean,67.52072,3.403552
std,33.592872,1.21222
min,10.0,1.0
25%,39.0,3.0
50%,68.0,3.0
75%,97.0,4.0
max,125.0,5.0


In [40]:
# get info of Dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4054 entries, 0 to 4998
Data columns (total 10 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Call Id                     4054 non-null   object 
 1   Agent                       4054 non-null   object 
 2   Date                        4054 non-null   object 
 3   Time                        4054 non-null   object 
 4   Topic                       4054 non-null   object 
 5   Answered (Y/N)              4054 non-null   object 
 6   Resolved                    4054 non-null   object 
 7   Speed of answer in seconds  4054 non-null   float64
 8   AvgTalkDuration             4054 non-null   object 
 9   Satisfaction rating         4054 non-null   float64
dtypes: float64(2), object(8)
memory usage: 348.4+ KB


In [41]:
# Convert object datatype to time dtype
df['AvgTalkDuration'] = pd.to_datetime(df['AvgTalkDuration'], format='%H:%M:%S').dt.time
# use lambda to caculate total_s
df['TotalSeconds'] = df['AvgTalkDuration'].apply(lambda x: x.hour * 3600 + x.minute * 60 + x.second)
df['TotalSeconds']

0       143
1       242
2       131
3        37
4        60
       ... 
4990    386
4995    340
4996    196
4997    109
4998     58
Name: TotalSeconds, Length: 4054, dtype: int64

In [43]:
# retrieve the list of column labels
df.columns

Index(['Call Id', 'Agent', 'Date', 'Time', 'Topic', 'Answered (Y/N)',
       'Resolved', 'Speed of answer in seconds', 'AvgTalkDuration',
       'Satisfaction rating', 'TotalSeconds'],
      dtype='object')

In [44]:
#Total Calls: Count the total number of calls made to the call center.
len(df)

4054

In [45]:
#Total Unique Callers: Count the number of unique callers who have contacted the call center.
df['Call Id'].nunique()

4054

In [46]:
#Total Calls Resolved
len(df[df['Resolved']== 'Y'])

3646

In [47]:
#Call Resolution Rate: Percentage of calls that were successfully resolved.
Call_Resolution_Rate = len(df[df['Resolved']== 'Y'])/len(df)*100
# Round to 2 decimal places
Call_Resolution_Rate = round(Call_Resolution_Rate,2)
# Format as a string with 2 decimal places and %
Call_Resolution_Rate = f"{Call_Resolution_Rate:.2f}%"
Call_Resolution_Rate

'89.94%'

In [48]:
#Average Call Duration (/s): Calculate the average duration of calls.
Average_Call_Duration = df['TotalSeconds'].mean()
Average_Call_Duration = round (Average_Call_Duration,2)
Average_Call_Duration

224.92

In [49]:
#Total Abandoned Calls: Count the number of calls that were abandoned by callers before being answered.

len(df[df['Answered (Y/N)']=='N'])

0

In [50]:
#Abandonment Rate: Percentage of calls that were abandoned.

len(df[df['Answered (Y/N)']=='N'])/len(df)

0.0

In [51]:
#Average Hold Time: Calculate the average time callers spend on hold before their calls are answered.
average_hold_time = df[df['Answered (Y/N)'] == 'Y']['Speed of answer in seconds'].mean()
average_hold_time = round(average_hold_time,2)
average_hold_time

67.52

In [52]:
#Service Level Agreement (SLA) Adherence: Measure the percentage of calls answered within a certain timeframe, e.g., 80% of calls answered within 20 seconds.
#Assume the timeframe threshold is 60
Sla_threshold = 90
SLA_Adherence = len(df[df['Speed of answer in seconds']<=Sla_threshold])/len(df)
SLA_Adherence = round(SLA_Adherence,2)
SLA_Adherence = f"{SLA_Adherence:.2f}%"
SLA_Adherence

'0.70%'

In [53]:
#Caller Satisfaction: Calculate the average caller satisfaction score.
Caller_Satisfaction = df['Satisfaction rating'].mean()
Caller_Satisfaction = round(Caller_Satisfaction,2)
Caller_Satisfaction

3.4