## Setup for inference

In [1]:
import os

from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

llm = ChatOpenAI(api_key=OPENAI_API_KEY)

# Smoke test
llm.invoke("How tall is the eiffel tower?")

AIMessage(content='The Eiffel Tower is 1,063 feet (324 meters) tall, including antennas.', response_metadata={'token_usage': {'completion_tokens': 20, 'prompt_tokens': 16, 'total_tokens': 36}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_c2295e73ad', 'finish_reason': 'stop', 'logprobs': None}, id='run-e0631445-8b1a-451d-a2f5-08a52e1fc5c6-0')

### Retrieval Augmented Generation (RAG)

In [2]:
import pandas as pd

df = pd.read_csv('../data/reinvent_qa.csv', delimiter=';')

with pd.option_context("display.max_rows", None):
    with pd.option_context("display.max_colwidth", None):
        display(df.head())


Unnamed: 0,Question,Answer
0,What city was AWS re:Invent 2022 held in?,Las Vegas
1,When did AWS re:Invent 2022 take place?,"November 28 to December 2, 2022"
2,How many years has AWS re:Invent been running?,11 years
3,How many people attended re:Invent 2022 in person?,"Over 51,000"
4,How many keynotes were featured at re:Invent 2022?,5 keynotes


#### Prompting without retriever


In [4]:


template = """

Human: Answer the question below.
Keep your response as precise as possible and limit it to a few words. 
If you don't know the answer, respond "I don't know".

Here is the question: 
{question}

Assistant:"""


def answer_question_llm(question: str) -> str:
    prompt_message = PromptTemplate.from_template(template).format(
        question=question
    )
    print(prompt_message)
    answer = llm.invoke(prompt_message)
    return answer.content.strip()


# Smoke test
answer_question_llm("What city was AWS re:Invent 2022 held in?")



Human: Answer the question below.
Keep your response as precise as possible and limit it to a few words. 
If you don't know the answer, respond "I don't know".

Here is the question: 
What city was AWS re:Invent 2022 held in?

Assistant:


"I don't know."

In [5]:
from langchain.prompts import PromptTemplate

template = """

Human: Answer the question below.
Keep your response as precise as possible and limit it to a few words. 
If you don't know the answer, respond "I don't know".

Here is the question: 
{question}

Assistant:"""

def ask_llm(row):
    prompt_message = PromptTemplate.from_template(template).format(
        question=row['Question']
    )
    answer = llm.invoke(prompt_message)
    return answer.content.strip()

df["LLM_answer"] = df.apply(ask_llm, axis=1)

with pd.option_context("display.max_rows", None):
    with pd.option_context("display.max_colwidth", None):
        display(df.head())

Unnamed: 0,Question,Answer,LLM_answer
0,What city was AWS re:Invent 2022 held in?,Las Vegas,Las Vegas
1,When did AWS re:Invent 2022 take place?,"November 28 to December 2, 2022",I don't know.
2,How many years has AWS re:Invent been running?,11 years,9 years
3,How many people attended re:Invent 2022 in person?,"Over 51,000",I don't know.
4,How many keynotes were featured at re:Invent 2022?,5 keynotes,I don't know.


#### Prompting with retriever

A way to incorporate current knowledge into the model is to use an information from related sources. Let's use LangChain document loader. 

In [6]:
import re
from langchain.document_loaders import UnstructuredURLLoader

# List of URLs for the loader. We will only use one in this example.
urls = [
    "https://aws.amazon.com/blogs/security/three-key-security-themes-from-aws-reinvent-2022/",
]

# Define the URL Loader
loader = UnstructuredURLLoader(urls=urls)

# Load the data
data = loader.load()

# Pre-process the content for prettier display
data[0].page_content = re.sub("\n{3,}", "\n", data[0].page_content)
data[0].page_content = re.sub(" {2,}", " ", data[0].page_content)

print(data[0].page_content[214:1200])
print()


AWS re:Invent returned to Las Vegas, Nevada, November 28 to December 2, 2022. After a virtual event in 2020 and a hybrid 2021 edition, spirits were high as over 51,000 in-person attendees returned to network and learn about the latest AWS innovations.

Now in its 11th year, the conference featured 5 keynotes, 22 leadership sessions, and more than 2,200 breakout sessions and hands-on labs at 6 venues over 5 days.

With well over 100 service and feature announcements—and innumerable best practices shared by AWS executives, customers, and partners—distilling highlights is a challenge. From a security perspective, three key themes emerged.

Turn data into actionable insights

Security teams are always looking for ways to increase visibility into their security posture and uncover patterns to make more informed decisions. However, as AWS Vice President of Data and Machine Learning, Swami Sivasubramanian, pointed out during his keynote, data often exists in silos; it isn’t alw



#### Split documents into chunks

Handling extensive documents can be problematic for RAG due to their potential to exceed the context window's capacity. To manage this, documents are typically divided into smaller segments. This division not only facilitates the retrieval of the most pertinent segments by the retriever but also prevents the need to process the whole document through an LLM at once. In this segment, we utilize the [`RecursiveCharacterTextSplitter`](https://api.python.langchain.com/en/latest/text_splitter/langchain.text_splitter.RecursiveCharacterTextSplitter.html), a standard [text splitter](https://python.langchain.com/docs/modules/data_connection/document_transformers/#text-splitters) tool in LangChain. This splitter operates by taking an array of separators, initially splitting the text using the first separator, and progressing to subsequent separators if the resulting segments are still excessively large.


In [7]:
import random
from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
)

# Use the recursive character splitter
recur_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=200,
    separators=[r"\n\n", r"\n", r"(?<=\. )", r" ", r""],
    is_separator_regex=True,
)

# Perform the splits using the splitter
data_splits = recur_splitter.split_documents(data)

# Print a random chunk
print(random.choice(data_splits).page_content)

AWS Security Blog

Three key security themes from AWS re:Invent 2022

Anne Grahn and

Paul Hawkins | on

AWS re:Invent, Events, Security, Identity, & Compliance, Thought Leadership |

Permalink |

Comments |

Share
AWS re:Invent returned to Las Vegas, Nevada, November 28 to December 2, 2022. After a virtual event in 2020 and a hybrid 2021 edition, spirits were high as over 51,000 in-person attendees returned to network and learn about the latest AWS innovations.

Now in its 11th year, the conference featured 5 keynotes, 22 leadership sessions, and more than 2,200 breakout sessions and hands-on labs at 6 venues over 5 days.

With well over 100 service and feature announcements—and innumerable best practices shared by AWS executives, customers, and partners—distilling highlights is a challenge. From a security perspective, three key themes emerged.

Turn data into actionable insights

Security teams are always looking for ways to increase visibility into their security posture and uncove

#### Embeddings and vector databases

For RAG to be successful, we need a way of doing a semantic search to **retrieve the documents that contain the most relevant information to be used in the answer generation process**. At this stage, the concept of **embedding** comes into play. This is the transformation of the previously extracted and chunked text into a vector in a high-dimensional space that represents the semantic meaning.

In this example we will use Amazon's  to generate the embeddings.

In [11]:
from langchain_openai import OpenAIEmbeddings

llm_embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large", 
    dimensions=1024, 
    api_key=OPENAI_API_KEY)

# Smoke test
text = "This is a test document."
query_result = llm_embeddings.embed_query(text)
print(f"""
    vector size: {len(query_result)}
    query vectoir: {query_result[:25]} ...
""")



    vector size: 1024
    query vectoir: [-0.019694647271140783, -0.0373372483308096, -0.027477411446023763, 0.07857839199827614, -0.030555480763231713, 0.02947940838592398, -0.03193185622055001, 0.0879377287167644, -0.02294789276413989, 0.02597591280977609, 0.02533777621533371, 0.033858777390447584, -0.021508956648679415, -0.06651635736201304, -0.009622099119529994, 0.05310297376686256, -0.03195687899369087, -0.0016563398507412205, -0.01775521285202776, -0.03228220484684229, 0.022685129431775666, 0.006140500798186626, -0.05665652047722721, 0.06246231048535083, 0.02177171998104364] ...



We also need a place to store the documents' vector representation efficiently, allowing for quick retrieval. For the sake of this example we will use FAISS (Facebook AI Similarity Search). For real production system you will need scalable vector search databases. See more: https://python.langchain.com/docs/integrations/vectorstores/

In [17]:
from langchain_community.vectorstores import FAISS

# Create a vector DB from documents retrieved from the URL and split with the RecursiveCharacterTextSplitter
db = FAISS.from_documents(
    data_splits,
    llm_embeddings,
)

In [20]:
docs = db.search("When was AWS Re:Invent?", search_type='similarity')
print(docs[0].page_content)


AWS Security Blog

Three key security themes from AWS re:Invent 2022

Anne Grahn and

Paul Hawkins | on

AWS re:Invent, Events, Security, Identity, & Compliance, Thought Leadership |

Permalink |

Comments |

Share
AWS re:Invent returned to Las Vegas, Nevada, November 28 to December 2, 2022. After a virtual event in 2020 and a hybrid 2021 edition, spirits were high as over 51,000 in-person attendees returned to network and learn about the latest AWS innovations.

Now in its 11th year, the conference featured 5 keynotes, 22 leadership sessions, and more than 2,200 breakout sessions and hands-on labs at 6 venues over 5 days.

With well over 100 service and feature announcements—and innumerable best practices shared by AWS executives, customers, and partners—distilling highlights is a challenge. From a security perspective, three key themes emerged.

Turn data into actionable insights

Security teams are always looking for ways to increase visibility into their security posture and uncove

#### Prompting with retriever

Finally, let's  assemble the text generation with the LLM and the retriever. The query to the model is converted into a vector using the embedding model. This query vector represents the semantic meaning of the user's query. To find the most relevant documents to the user's query, we use a process called "vector similarity search". In essence, this process compares the query vector with all the document vectors in the database, finding the ones most similar to the query vector. The similarity between vectors is typically measured using the "cosine similarity", which captures the angle between the vectors in a multidimensional space. The documents corresponding to the most similar vectors are then returned as the search results.

https://python.langchain.com/docs/modules/data_connection/retrievers/


In [22]:
import warnings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# Supress warnings
warnings.filterwarnings("ignore")

context_template = """

Human: Answer the question below.
Use the given context to answer the question. 
If you don't know the answer, respond "I don't know".
Keep your response as precise as possible and limit it to a few words. 

Here is the context:
{context}

Here is the question: 
{question}

Assistant:"""

# Define the prompt template for Q&A
context_prompt_template = PromptTemplate.from_template(context_template)

# Define the RetrievalQ&A chain
# We pass the llm and the FAISS vector store, retrieving the k most relevant documents
rag_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=db.as_retriever(
        search_type="similarity", search_kwargs={"k": 3}
    ),
    return_source_documents=True,
    chain_type="stuff",
    chain_type_kwargs={"prompt": context_prompt_template},
)

# Perform RAG using the RetrievalQA chain with FAISS as retriever
df["LLM_answer "] = df.Question.apply(
    lambda question: rag_chain({"query": question})["result"].strip()
)

with pd.option_context("display.max_rows", None):
    with pd.option_context("display.max_colwidth", None):
        display(df.head())

Unnamed: 0,Question,Answer,LLM_answer,claude_rag_answer
0,What city was AWS re:Invent 2022 held in?,Las Vegas,Las Vegas,Las Vegas
1,When did AWS re:Invent 2022 take place?,"November 28 to December 2, 2022","November 28 to December 2, 2022","November 28 to December 2, 2022"
2,How many years has AWS re:Invent been running?,11 years,11,11 years.
3,How many people attended re:Invent 2022 in person?,"Over 51,000","Over 51,000","Over 51,000"
4,How many keynotes were featured at re:Invent 2022?,5 keynotes,5,5


#### Evaluation

TBD