In [1]:
import os

from dotenv import load_dotenv

from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Pinecone
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from pinecone import Pinecone as PineconeClient
import requests


Serverless index from [this dataset](https://huggingface.co/datasets/Cohere/wikipedia-22-12).

In [3]:
# Init
pinecone = PineconeClient(api_key=PINECONE_API_KEY)

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
vectorstore = Pinecone.from_existing_index(index_name=PINECONE_INDEX_NAME,
                                           embedding=embeddings)

retriever = vectorstore.as_retriever()

In [4]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

# RAG
model = ChatOpenAI(temperature=0, 
                   model="gpt-4-1106-preview")

prompt = ChatPromptTemplate.from_template("tell me a joke about {topic}")
chain = prompt | model

In [5]:
# Input Schema
# A description of the inputs accepted by a Runnable. 
# This is a Pydantic model dynamically generated from the structure of any Runnable. 
# You can call .schema() on it to obtain a JSONSchema representation.

# The input schema of the chain is the input schema of its first part, the prompt.
chain.input_schema.schema()

{'title': 'PromptInput',
 'type': 'object',
 'properties': {'topic': {'title': 'Topic', 'type': 'string'}}}

In [6]:
# from langchain.chains import create_retrieval_chain
# from langchain.chains.combine_documents import create_stuff_documents_chain

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
# RAG
model = ChatOpenAI(temperature=0, 
                   model="gpt-4-1106-preview")

# Define the chain to directly pass documents without additional formatting
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=lambda x: x["context"])
    | prompt
    | model
    | StrOutputParser()
)

# Retrieve documents based on the input question
retrieve_docs = (lambda x: x["input"]) | retriever

# Chain setup to get documents and use them in the RAG chain
chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain_from_docs
)

# question_answer_chain = create_stuff_documents_chain(model, prompt)
# chain = create_retrieval_chain(retriever, question_answer_chain)

In [7]:
chain.input_schema.schema()

{'title': 'RunnableSequenceInput',
 'type': 'object',
 'properties': {'input': {'title': 'Input', 'type': 'string'}}}

In [44]:
result = chain.invoke({"input": "wie is yannick?"})
result

{'input': 'wie is yannick?',
 'context': [Document(metadata={'filename': 'ticket.pdf', 'filetype': 'application/pdf', 'orig_elements': 'eJy9kMFqwzAQRH9F6FwSeW3Jdk499NJLKKXQQwhhJa0dNbJqHCU0hPx7pbSloR/Q68wbmJnVmZOngULcOMsXjBfGWlloITpbtjUYqExRKaPrzpoWCPgd4wNFtBgx8WfeOU8BB8rh6MyO4my0XcayE0/j1cFx9M5gdO9h/m17DP0Be9onf8Up9Hyd1DEpm3AYNE1Jh6sy/dYD1aRuLQIYW6mSbGs7I0SFthGigYJfUiLSR8zwM3WUs47Y48OCKVUBSCWh0I2RQoDVVNZA7JXYlrSmwN6IaTrSPrrehZ71NKDzlgXEiZ0whLRvlnrvXdjde3ekWfB5ys/KJU5T2nikl9wgVfn7bt3UtSgbgSTaQirTgIRWtkZI3VGlxD+/e3sWsDmD2zFPiV1+oZf1J0z0q10=', 'page_number': 2.0}, page_content='Referentie ID: 6642256521b8c5002dbe372e We hebben je bevestiging gemaild naar yannick.lansink@live.nl\n\n2 / 2'),
  Document(metadata={'filename': 'ticket.pdf', 'filetype': 'application/pdf', 'orig_elements': 'eJzVld1u1DAQhV/FynVZ7Dj+6xWlFQgBFRJbIdRW1cQe77qbOEviVGwr3h2nC9KCitTesOpNFJ0Zj/2dcSbndwU22GJMV8EVh6SgTHHLaC20dYZjyTRV0knQSJ0BoYoDUrSYwEGCnH9X+NBghBanxSnYFabZ2vkpbYqkzfo+Aut1Eyyk0MWXv8INxMUICxxy/LzAuCgus7rOylUc