Step 1: Configure your working directory  

In [1]:
import os
print(os.getcwd())
os.chdir("..")
print(os.getcwd())

c:\Users\I583373\Downloads
c:\Users\I583373


Step 2: Import packages

In [2]:
import json 

from pydantic import BaseModel, Field 
import openai 
from langchain.llms import AzureOpenAI 
from langchain.prompts import PromptTemplate 
from langchain.chains.summarize import load_summarize_chain 
from langchain.docstore.document import Document 
from langchain.output_parsers import PydanticOutputParser 

ModuleNotFoundError: No module named 'pydantic'

Step 3: Initialize Azure OpenAI client object from LangChain

In [None]:
with open("/secrets/openai-secrets-sap-aicoe-exp.json", "r") as f: 
 azure_openai_credentials = json.load(f) 

llm = AzureOpenAI( 
 openai_api_base="https://aicoe-smu-kaist-challenge.openai.azure.com/", 
 openai_api_key=azure_openai_credentials["openai_api_key"], 
 openai_api_type="azure", 
 openai_api_version="2022-12-01", 
 deployment_name="text-davinci-003", 
 model = "text-davinci-003", 
 temperature=0, 
) 

Step 4: Retrieve data from given text file

In [None]:
def read_data_from_file(file_path: str) -> str:
    with open(file_path, 'r') as file: 
        data = file.read()
    return data 

file_path = 'Data/Targaryen.txt' 
data = read_data_from_file(file_path) 
documents_to_summarize = [Document(page_content=data)] 

Step 5: Use summarization chain from LangChain out-of-the-box 

In [None]:
summary_chain = load_summarize_chain(llm=llm, chain_type="stuff", verbose=False) 
print(f"Summary output: {summary_chain.run(documents_to_summarize)}") 

Step 6: Using custom prompt template and output parsing for summarization chain 

In [None]:
class OutputSchema(BaseModel): 
 summary: str = Field(description="Summary of the text") 

In [None]:
parser = PydanticOutputParser(pydantic_object=OutputSchema) 
prompt_template_str = """Summarize the following text delimited in triple backticks, in 1 sentence 
```{text}``` 

{format_instructions} 

SUMMARY: 
""" 

prompt_template = PromptTemplate( 
 template=prompt_template_str, 
 input_variables=["text"], 
 partial_variables={"format_instructions": parser.get_format_instructions()} 
) 

summary_chain = load_summarize_chain(llm=llm, chain_type="stuff", prompt=prompt_template, verbose=False) 
output = summary_chain.run(documents_to_summarize) 
parsed_output = parser.parse(output) 
print(f"Parsed output: {parsed_output.dict()}") 