In [1]:
import pandas as pd
import numpy as np
from langchain import PromptTemplate    
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser

import openai
import tiktoken

import os
import datetime
import random

os.environ["OPENAI_API_KEY"] = "XXXXXX"
openai.api_key = os.getenv("OPENAI_API_KEY")
#OPENAI_MODEL = "gpt-3.5-turbo-16k-0613"
OPENAI_MODEL = "gpt-3.5-turbo"

# General Functions

In [42]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

# Importing Script Templates

In [6]:
df_script = pd.read_excel('../../data/scripts_call_center_ee.xlsx', sheet_name='general_script1')
df_script

Unnamed: 0,Step,Agent Script,Customer Response Options
0,Greeting,"""Good [morning/afternoon/evening], thank you f...",-
1,Identify Issue,"""Could you please tell me about the issue you'...","""[Describe issue]"""
2,Confirm Account Details,"""May I have your name and account number to pu...","""[Provide name and account number]"""
3,Verify Identity,"""For security, can you confirm your billing ad...","""[Provide verification information]"""
4,Analyze Issue,"""Thank you for that information. Let me review...",-
5,Provide Solution,"""I've looked into your issue, and it appears t...","""Yes/No"""
6,Schedule Technician,"""Would you like me to schedule a technician to...","""[Provide date and time]"""
7,Confirm Resolution,"""I have scheduled the technician for [date/tim...","""No, thank you"" / ""Yes, [describe other issue]"""
8,Closing,"""Thank you for calling [Company Name]. Have a ...","""Thank you, goodbye"""


In [9]:
df_script['Step']

0                   Greeting
1             Identify Issue
2    Confirm Account Details
3            Verify Identity
4              Analyze Issue
5           Provide Solution
6        Schedule Technician
7         Confirm Resolution
8                    Closing
Name: Step, dtype: object

# Importing Data Transcripts Calls

In [4]:
df_transcripts = pd.read_csv('../../data/transcripts_2022.csv')
df_transcripts

Unnamed: 0,Transcript,Date
0,Agent 1: Thank you for calling ElectricVolt. T...,2022-01-01 00:00:00.000000000
1,"Agent 2: Thank you for calling ElectricVolt, m...",2022-01-02 12:33:08.284518828
2,Agent 2: Thank you for calling ElectricVolt. M...,2022-01-04 01:06:16.569037656
3,"Agent 3: Thank you for calling ElectricVolt, m...",2022-01-05 13:39:24.853556485
4,"Agent 1: Thank you for calling ElectricVolt, m...",2022-01-07 02:12:33.138075313
...,...,...
235,"Agent 1: Thank you for calling ElectricVolt, m...",2022-12-24 21:47:26.861924688
236,Agent 1: Thank you for calling ElectricVolt. M...,2022-12-26 10:20:35.146443516
237,[Phone ringing]\n\nAgent 1: Thank you for call...,2022-12-27 22:53:43.430962344
238,Agent 1: Thank you for calling ElectricVolt. M...,2022-12-29 11:26:51.715481172


# Initializing Model

In [7]:
chat_model = ChatOpenAI(model_name = OPENAI_MODEL, temperature=0) #deterministic
chat_model

ChatOpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo-16k-0613', temperature=0.0, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None, tiktoken_model_name=None)

# Prompts Templating

In [153]:
prompt_template = ChatPromptTemplate.from_template(
"""
Based in the following transcript for a call center in electrical company:

{transcript}

Provide an score between 1 - 5 if following Script Quality Criteria is fulfilled:

{script_quality_criteria}

If you can't apply a criteria the check is False and the score is 0.

The ouput will be a JSON file with scores for each criteria (step) found in the transcript:

{format_instructions}

""")

In [157]:
#preparing schemas for output in JSON Format
QA01_Greeting_schema = ResponseSchema(name="qa01_greeting_schema",
                             description="""
                             Return an score from 1 to 5 based in the quality of the greeting criteria, 
                             return 0 if the greeting is not correct or not exist

                             The Score will be based in the following Agent Script: %s
                             
                             """
                             % str(df_script['Agent Script'][0]))

QA02_Identify_schema = ResponseSchema(name="qa02_identify_schema",
                             description="""
                             Return an score from 1 to 5 based in the quality  of the identification criteria, 
                             return 0 if the identification is not correct or not exist

                             The Score will be based in the following Agent Script: %s
                             """
                             % str(df_script['Agent Script'][1]))

QA03_confirm_schema = ResponseSchema(name="qa03_confirm_schema",
                             description="""
                             Return an score from 1 to 5 based in the quality of the confirmation criteria,
                             return 0 if the confirmation is not correct or not exist

                             The Score will be based in the following Agent Script: %s
                             """
                             % str(df_script['Agent Script'][2]))      

QA04_verify_schema = ResponseSchema(name="qa04_verify_schema",
                             description="""
                             Return an score from 1 to 5 based in the quality of the verification criteria,
                             return 0 if the verification is not correct or not exist

                             The Score will be based in the following Agent Script: %s
                             """
                             % str(df_script['Agent Script'][3]))      

QA05_analyze_schema = ResponseSchema(name="qa05_analyze_schema",
                             description="""
                             Return an score from 1 to 5 based in the quality of the analysis criteria,
                             return 0 if the analysis is not correct or not exist

                             The Score will be based in the following Agent Script: %s
                             """
                             % str(df_script['Agent Script'][4]))                      

QA06_solution_schema = ResponseSchema(name="qa06_solution_schema",
                             description="""
                             Return an score from 1 to 5 based in the quality of the solution criteria,
                             return 0 if the solution is not correct or not exist

                             The Score will be based in the following Agent Script: %s
                             """
                             % str(df_script['Agent Script'][5]))     

QA07_schedule_schema = ResponseSchema(name="qa07_schedule_schema",
                             description="""
                             Return an score from 1 to 5 based in the quality of the schedule criteria,
                             return 0 if the schedule is not correct or not exist

                             The Score will be based in the following Agent Script: %s
                             """
                             % str(df_script['Agent Script'][6]))       

QA08_resolution_schema = ResponseSchema(name="qa08_resolution_schema",
                             description="""
                             Return an score from 1 to 5 based in the quality of the resolution criteria,
                             return 0 if the resolution is not correct or not exist

                             The Score will be based in the following Agent Script: %s
                             """
                             % str(df_script['Agent Script'][7]))         

QA09_closing_schema = ResponseSchema(name="qa09_closing_schema",
                             description="""
                             Return an score from 1 to 5 based in the quality of the closing criteria,
                             return 0 if the closing is not correct or not exist

                             The Score will be based in the following Agent Script: %s
                             """
                             % str(df_script['Agent Script'][8]))          

response_schemas = [QA01_Greeting_schema, QA02_Identify_schema, QA03_confirm_schema, 
                    QA04_verify_schema, QA05_analyze_schema, QA06_solution_schema, 
                    QA07_schedule_schema, QA08_resolution_schema, QA09_closing_schema]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

In [180]:
output_score_list = []
transcript_number = 1
tokens_list = []
for transcript in df_transcripts['Transcript']:
    prompt_call = prompt_template.format_messages(transcript=transcript, 
                                              script_quality_criteria=df_script,
                                              format_instructions=format_instructions)
    #Calling LLM
    print("Transcript number: ", transcript_number)

    n_tokens = num_tokens_from_string(str(prompt_call[0]),encoding_name="cl100k_base")
    print("Number of Tokens: ", n_tokens)
    response = chat_model(prompt_call)

    #Parsing LLM output
    output_dict = output_parser.parse(response.content)

    #appending intention and explanation to list
    aux_list = [output_dict.get("qa01_greeting_schema"), output_dict.get("qa02_identify_schema"), output_dict.get("qa03_confirm_schema"),
                output_dict.get("qa04_verify_schema"), output_dict.get("qa05_analyze_schema"), output_dict.get("qa06_solution_schema"),
                output_dict.get("qa07_schedule_schema"), output_dict.get("qa08_resolution_schema"), output_dict.get("qa09_closing_schema")]

    output_score_list.append(aux_list)
    tokens_list.append(n_tokens)

    transcript_number += 1

Transcript number:  1
Number of Tokens:  1767
Transcript number:  2
Number of Tokens:  1764
Transcript number:  3
Number of Tokens:  1813
Transcript number:  4
Number of Tokens:  1857
Transcript number:  5
Number of Tokens:  2044
Transcript number:  6
Number of Tokens:  1719
Transcript number:  7
Number of Tokens:  1863
Transcript number:  8
Number of Tokens:  1704
Transcript number:  9
Number of Tokens:  1796
Transcript number:  10
Number of Tokens:  1800
Transcript number:  11
Number of Tokens:  1832
Transcript number:  12
Number of Tokens:  1754
Transcript number:  13
Number of Tokens:  1705
Transcript number:  14
Number of Tokens:  1880
Transcript number:  15
Number of Tokens:  1807
Transcript number:  16
Number of Tokens:  1699
Transcript number:  17
Number of Tokens:  1902
Transcript number:  18
Number of Tokens:  1851
Transcript number:  19
Number of Tokens:  1879
Transcript number:  20
Number of Tokens:  1889
Transcript number:  21
Number of Tokens:  1923
Transcript number:  22

# Saving QA Transcript

In [192]:
df_score_transcript = pd.DataFrame(output_score_list)
df_score_transcript.columns = "QA_"+df_script['Step']

In [193]:
#append df_score_transcript to df_transcript as additinoal columns
df_transcripts_final_score = df_transcripts.copy()

df_transcripts_final_score = pd.concat([df_transcripts_final_score, df_score_transcript], axis=1)
df_transcripts_final_score

Unnamed: 0,Transcript,Date,QA_Greeting,QA_Identify Issue,QA_Confirm Account Details,QA_Verify Identity,QA_Analyze Issue,QA_Provide Solution,QA_Schedule Technician,QA_Confirm Resolution,QA_Closing
0,Agent 1: Thank you for calling ElectricVolt. T...,2022-01-01 00:00:00.000000000,5,5,4,4,5,5,5,5,5
1,"Agent 2: Thank you for calling ElectricVolt, m...",2022-01-02 12:33:08.284518828,5,5,4,4,5,5,5,5,5
2,Agent 2: Thank you for calling ElectricVolt. M...,2022-01-04 01:06:16.569037656,5,5,5,4,5,5,5,5,5
3,"Agent 3: Thank you for calling ElectricVolt, m...",2022-01-05 13:39:24.853556485,5,5,5,5,5,5,5,5,5
4,"Agent 1: Thank you for calling ElectricVolt, m...",2022-01-07 02:12:33.138075313,5,5,5,5,5,5,5,5,5
...,...,...,...,...,...,...,...,...,...,...,...
235,"Agent 1: Thank you for calling ElectricVolt, m...",2022-12-24 21:47:26.861924688,5,5,5,4,5,5,5,5,5
236,Agent 1: Thank you for calling ElectricVolt. M...,2022-12-26 10:20:35.146443516,5,5,4,4,5,5,5,5,5
237,[Phone ringing]\n\nAgent 1: Thank you for call...,2022-12-27 22:53:43.430962344,5,5,4,4,5,5,5,5,5
238,Agent 1: Thank you for calling ElectricVolt. M...,2022-12-29 11:26:51.715481172,5,5,5,5,5,5,5,5,5


In [195]:
now = datetime.datetime.now()
now = now.strftime("%Y%m%d%H%M%S")

df_transcripts_final_score.to_csv("../../data/interim/transcripts_final_score_{}.csv".format(now), index=False)