In [59]:
from langchain.llms import AzureOpenAI
import openai
from dotenv import load_dotenv
import os
from IPython.display import display, HTML, JSON
import json

In [None]:


load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_EMBEDDING_MODEL_NAME = os.getenv("OPENAI_EMBEDDING_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")

# Configure OpenAI API
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

### Model initialization

In [None]:
def init_llm(model="text-davinci-003",
             deployment_name="text-davinci-003", 
             temperature=0,
             max_tokens=3000,
             stop="<|im_end|>", 
             ):
    
    llm = AzureOpenAI(deployment_name=deployment_name,  
                  model=model,
                  temperature=temperature,) 
    return llm

### **Text Summarization**

There are two types of text summarization: **extractive** and **abstractive**. Extractive summarization involves selecting phrases from the source text and concatenating them to make a summary. In contrast, abstractive summarization aims to generate new phrases and sentences that capture the salient information from the source text. 

#### Basic Summarization 

In [None]:
def summarize_text(llm, prompt_prefix, text_file):
    
    with open(text_file, 'r') as file:
        # read the entire file into a string
        data = file.read()

    # concatenate the prompt with the data
    prompt = prompt_prefix + data
    #check the number of tokens in the prompt
    num_tokens = llm.get_num_tokens(prompt)
    print (f"Number of tokens in final prompt is: {num_tokens}")
    return llm(prompt)

In [None]:
llm=init_llm()
prompt_prefix = "Summarize the text below for call center supervisor:\n"

sum = summarize_text(llm, prompt_prefix, "./data/bank-call-center-transcript.txt")
display(HTML(sum))



#### More advanced prompts

In [None]:
llm=init_llm()
prompt_prefix = """ Summarize the text below for call center supervisor.\n 
Did the clerk meet all the items below: \n

1.Greet the Customer Politely and Professionally \n
2.Understand the Customer's Inquiry \n
3.Provide Clear and Detailed Information \n
4.Ask Clarifying Questions \n
5.Discuss the Benefits and Risks \n
6.Explain Available Tools and Resources \n
7.Invite Further Action \n
8.Offer to Assist with Next Steps \n
9.End on a Positive Note \n

"""

sum = summarize_text(llm, prompt_prefix, "./data/bank-call-center-transcript.txt")
display(HTML(sum))

##### Output in tabular format

In [70]:

llm=init_llm()
prompt_prefix = """Summarize the text below for call center supervisor.\n 
Provide the output in HTML format in tabular represenation, one line for each item.\n
The items clerk met mark with green color, the items the clerk didn't meet mark with red color.\n

Did the clerk meet all the items below:\n

1.Greet the Customer Politely and Professionally\n
2.Understand the Customer's Inquiry\n
3.Provide Clear and Detailed Information\n
4.Ask Clarifying Questions\n
5.Discuss the Benefits and Risks\n
6.Explain Available Tools and Resources\n
7.Invite Further Action\n
8.Offer to Assist with Next Steps\n
9.End on a Positive Note\n
"""

res = summarize_text(llm, prompt_prefix, "./data/bank-call-center-transcript.txt")
display(HTML(res))

Number of tokens in final prompt is: 699


Item,Did Clerk Meet?
Greet the Customer Politely and Professionally,Yes
Understand the Customer's Inquiry,Yes
Provide Clear and Detailed Information,Yes
Ask Clarifying Questions,Yes
Discuss the Benefits and Risks,Yes
Explain Available Tools and Resources,


##### Output as JSON

In [69]:
llm=init_llm()
prompt_prefix = """Summarize the text below for call center supervisor.\n 
Provide the output in JSON format with schema as below:\n

{
    "Greet the Customer Politely and Professionally": "Yes/No",
    "Understand the Customer's Inquiry": "Yes/No",
    "Provide Clear and Detailed Information": "Yes/No",
    "Ask Clarifying Questions": "Yes/No",
    "Discuss the Benefits and Risks": "Yes/No",
    "Explain Available Tools and Resources": "Yes/No",
    "Invite Further Action": "Yes/No",
    "Offer to Assist with Next Steps": "Yes/No",
    "End on a Positive Note": "Yes/No"
}

Did the clerk meet all the items below:\n

1.Greet the Customer Politely and Professionally\n
2.Understand the Customer's Inquiry\n
3.Provide Clear and Detailed Information\n
4.Ask Clarifying Questions\n
5.Discuss the Benefits and Risks\n
6.Explain Available Tools and Resources\n
7.Invite Further Action\n
8.Offer to Assist with Next Steps\n
9.End on a Positive Note\n
"""

res = summarize_text(llm, prompt_prefix, "./data/bank-call-center-transcript.txt")
display(JSON(json.loads(res)))


Number of tokens in final prompt is: 807


<IPython.core.display.JSON object>