# OpenAI - RAG - Retrieval-Augmented Generation   
_(Förhöjd hämtningsgenerering)_

Installera beroenden.

In [None]:
%pip install langchain langchain-openai python-dotenv faiss-cpu PyPDF bs4

Ladda in 'OPENAI_API_KEY' från .env-filen. När api-nyckeln är satt i environment så kommer OpenAPI-implementationen använda den.

In [None]:
from dotenv import load_dotenv

load_dotenv()

Ladda in en PDF och ställ en fråga med dokumentet som kontext.

In [None]:

from langchain.document_loaders.pdf import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from operator import itemgetter

pdfloader = PyPDFLoader('../resources/book.pdf')
data = pdfloader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(data)

vectorstore = FAISS.from_documents(
    all_splits, embedding=OpenAIEmbeddings()
)
retriever = vectorstore.as_retriever()

template = '''Answer the question based only on the following context:
{context}

Question: {question}
'''
prompt = ChatPromptTemplate.from_template(template)

chat_model = ChatOpenAI()

chain = (
    {
        'context': itemgetter('question') | retriever,
        'question': itemgetter('question')
    }
    | prompt
    | chat_model
)

response = chain.invoke({'question': 'Who is the villain of the story?'})
print(response.content)

Ladda in en webbsida och ställ en fråga med dokumentet som kontext.

In [None]:
from langchain.document_loaders.web_base import WebBaseLoader;
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from operator import itemgetter

loader = WebBaseLoader('https://en.wikipedia.org/wiki/Marcus_Wandt')
data = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(data)

vectorstore = FAISS.from_documents(
    all_splits, embedding=OpenAIEmbeddings()
)
retriever = vectorstore.as_retriever()

template = '''Answer the question based only on the following context:
{context}

Question: {question}
'''
prompt = ChatPromptTemplate.from_template(template)

chat_model = ChatOpenAI()

chain = (
    {
        'context': itemgetter('question') | retriever,
        'question': itemgetter('question')
    }
    | prompt
    | chat_model
)

response = chain.invoke({'question': 'When is Marcus Wandt born?'})
print(response.content)