In [1]:
from langchain.llms import AzureOpenAI
import openai
from dotenv import load_dotenv
import os
from IPython.display import display, HTML, JSON
import json

In [3]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")

OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_ADA_EMBEDDING_MODEL_NAME = os.getenv("OPENAI_ADA_EMBEDDING_MODEL_NAME")

OPENAI_DAVINCI_DEPLOYMENT_NAME = os.getenv("OPENAI_DAVINCI_DEPLOYMENT_NAME")
OPENAI_DAVINCI_MODEL_NAME = os.getenv("OPENAI_DAVINCI_MODEL_NAME")

# Configure OpenAI API
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

### Model initialization

In [4]:
def init_llm(model=OPENAI_DAVINCI_MODEL_NAME,
             deployment_name=OPENAI_DAVINCI_DEPLOYMENT_NAME, 
             temperature=0,
             max_tokens=3000,
             stop="<|im_end|>", 
             ):
    
    llm = AzureOpenAI(deployment_name=deployment_name,  
                  model=model,
                  temperature=temperature,) 
    return llm

### **Text Summarization**

There are two types of text summarization: **extractive** and **abstractive**. Extractive summarization involves selecting phrases from the source text and concatenating them to make a summary. In contrast, abstractive summarization aims to generate new phrases and sentences that capture the salient information from the source text. 

#### Basic Summarization 

In [5]:
def summarize_text(llm, prompt_prefix, text_file):
    
    with open(text_file, 'r') as file:
        # read the entire file into a string
        data = file.read()

    # concatenate the prompt with the data
    prompt = prompt_prefix + data
    #check the number of tokens in the prompt
    num_tokens = llm.get_num_tokens(prompt)
    print (f"Number of tokens in final prompt is: {num_tokens}")
    return llm(prompt)

In [6]:
# uses davinci model
llm=init_llm()
prompt_prefix = "Summarize the text below for a call center supervisor:\n"

sum = summarize_text(llm, prompt_prefix, "./data/bank-call-center-transcript.txt")
display(HTML(sum))

Number of tokens in final prompt is: 554


#### More advanced prompts

In [8]:
llm=init_llm()
prompt_prefix = """ Prepare a summary for the call center supervisor based on the points mentioned below. 
Please evaluate whether the clerk successfully accomplished the following tasks:

Greeting the customer politely and professionally.
Accurately understanding the customer's inquiry.
Providing clear and detailed information in response.
Asking questions as needed for clarification.
Discussing both benefits and risks with the customer.
Explaining the tools and resources available to the customer.
Inviting the customer to take further action.
Offering assistance for the next steps.
Ending the conversation on a positive note.

"""

sum = summarize_text(llm, prompt_prefix, "./data/bank-call-center-transcript.txt")
display(HTML(sum))

Number of tokens in final prompt is: 663


##### Output in tabular format

In [9]:
llm=init_llm()
prompt_prefix = """Please compile a summary of the following information for the attention of the 
call center supervisor.
Present the output in the form of an HTML table, with each item on a separate row.

Assign a color code to each item based on the clerk's performance - 
items that were successfully addressed should be marked in green, 
whereas items that were not met should be highlighted in red.

Here are the evaluation criteria to consider:

Did the clerk greet the customer in a polite and professional manner?
Did the clerk begin the conversation on a negative note?
Did the clerk understand the customer's inquiry?
Did the clerk provide clear and detailed information?
Did the clerk ask questions to clarify the situation?


"""

res = summarize_text(llm, prompt_prefix, "./data/bank-call-center-transcript.txt")
display(HTML(res.strip()))

Number of tokens in final prompt is: 695


Evaluation Criteria,Performance
Did the clerk greet the customer in a polite and professional manner?,Yes
Did the clerk begin the conversation on a negative note?,No
Did the clerk understand the customer's inquiry?,Yes
Did the clerk provide clear and detailed information?,Yes
Did the clerk ask questions to clarify the situation?,Yes</


##### Output as JSON

In [10]:
llm=init_llm()
prompt_prefix = """"Prepare a summary of the following points for the call center supervisor. 
The output should be presented in JSON format, adhering to the following schema:

{
    "Greet the Customer Politely and Professionally": "Yes/No",
    "Understand the Customer's Inquiry": "Yes/No",
    "Provide Clear and Detailed Information": "Yes/No",
    "Ask Clarifying Questions": "Yes/No",
    "Discuss the Benefits and Risks": "Yes/No",
    "Explain Available Tools and Resources": "Yes/No",
    "Invite Further Action": "Yes/No",
    "Offer to Assist with Next Steps": "Yes/No",
    "End on a Positive Note": "Yes/No"
}

Please evaluate the clerk's performance based on the following points:

Did the clerk greet the customer in a polite and professional manner?
Did the clerk comprehend the customer's inquiry accurately?
Did the clerk provide comprehensive and clear information?
Did the clerk ask relevant questions to clarify the customer's situation?
Did the clerk explain the benefits and potential risks to the customer?
Did the clerk detail the tools and resources available to the customer?
Did the clerk encourage the customer to take further action?
Did the clerk offer assistance with proceeding to the next steps?
Did the clerk end the interaction on a positive and upbeat note?"
"""

res = summarize_text(llm, prompt_prefix, "./data/bank-call-center-transcript.txt")
display(JSON(json.loads(res)))


Number of tokens in final prompt is: 847


<IPython.core.display.JSON object>

### Summarize large documents
To summarize larger documents, we can split the document into smaller chunks and summarize each chunk separately. We can then combine the summaries of each chunk to get the final summary.
LangChain has a built-in chain for doing that. 



In [11]:
from langchain import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [13]:
from langchain.document_loaders import PyPDFLoader
llm=init_llm()

large_pdf_path ="./data/Large_language_model.pdf"
loader = PyPDFLoader(large_pdf_path)

#output type is List[Document]
pages = loader.load_and_split()

#count tokens in the document
total_tokens = 0

for page in pages:
    total_tokens += llm.get_num_tokens(page.page_content)
    
#The document has more 14000 tokens. This is too many for the LLM to process in one go.
# This is whay we split the document into smaller chunks.
print(f"Total tokens in the document: {total_tokens}")


Total tokens in the document: 14386


In [14]:
#create summarization chain
summary_chain = load_summarize_chain(llm=llm, chain_type='map_reduce',verbose=False )

##### There are following chain_types: stuff, map_reduce, refine, map-rerank
See here for the details: https://docs.langchain.com/docs/components/chains/index_related_chains

In [19]:
#Many calls in loop causing RateLimitError: Too Many Requests
#sum = summary_chain.run(pages)
sum = summary_chain.run(pages[0:2])

display(HTML(sum))

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the provided prompt, parameters and chosen model. Operation under Azure OpenAI API version 2023-05-15 have exceeded token rate limit of your current OpenAI S0 pricing tier. Please retry after 45 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Creates a completion for the provided prompt, parameters and chosen model. Operation under Azure OpenAI API version 2023-05-15 have exceeded token rate limit of your current OpenAI S0 pricing tier. Please retry after 41 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.llms.open

RateLimitError: Requests to the Creates a completion for the provided prompt, parameters and chosen model. Operation under Azure OpenAI API version 2023-05-15 have exceeded token rate limit of your current OpenAI S0 pricing tier. Please retry after 15 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.