In [None]:
import vertexai
from google.oauth2 import service_account

PROJECT_ID = "<YOUR_PROJECT_ID>"  
REGION = "<YOUR_REGION>"
CREDS_PATH = "<PROJECT_API_KEY_PATH>"  

try:
    credentials = service_account.Credentials.from_service_account_file(
        CREDS_PATH, scopes=["https://www.googleapis.com/auth/cloud-platform"],
    )
except:
    print("Error while getting service account credentials!")


vertexai.init(credentials=credentials, project=PROJECT_ID, location=REGION)
print("Vertex AI initialized!")

In [None]:
from langchain_community.vectorstores.faiss import FAISS
from langchain_community.vectorstores.chroma import Chroma
from langchain_google_vertexai import VertexAIEmbeddings, ChatVertexAI
from langchain_core.documents import Document
from langchain_experimental.text_splitter import  SemanticChunker
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain import hub
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

import wikipediaapi
import numpy as np

In [None]:
wiki_wiki = wikipediaapi.Wikipedia(user_agent='sample-rag/1.0 (vancence.ho@ollion.com)', language='en')

def fetch_wiki_page(title):
    page = wiki_wiki.page(title)
    if page.exists():
        return page.text
    else:
        return None 
    
print("Wikipedia API initialized as: wiki_wiki!")

In [None]:
article_titles = ["Python (programming language)", "Artificial Intelligence", "Machine Learning", "Natural Language Processing", "Retrieval Augmented Generation", "OpenAI", "Deep Learning"]

documents = []

for title in article_titles:
    content = fetch_wiki_page(title)
    if content:
        documents.append(Document(page_content=content, metadata={"title": title, "source": "Wikipedia"}))

print(f"Number of documents fetched: {len(documents)}")
print(documents)

In [None]:
embeddings = VertexAIEmbeddings(model_name="text-embedding-004", credentials=credentials, project=PROJECT_ID, region=REGION)

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=50, add_start_index=True)
splits = text_splitter.split_documents(documents)

In [None]:
chroma_store = Chroma.from_documents(documents=splits, embedding=embeddings)

In [None]:
retriever = chroma_store.as_retriever(search_type="similarity", search_kwargs={"k":3}) # Retrieve top 3 similar documents as context for the llm 

In [None]:
llm = ChatVertexAI(
    
    model="gemini-1.5-flash-001",
    temperature=0,
    max_tokens=None,
    max_retries=3,
    stop=None,
    credentials=credentials, 
    project=PROJECT_ID, 
    location=REGION
    # other params...
)

print(f"LLM initialized with model: {llm.model_name}!")

In [None]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful chatbot that can provide information about various topics. "
            "You can answer questions, provide explanations, and give examples. "
            "You can also ask questions to clarify the user's intent. "
            "You can also provide links to relevant resources. "
            "Use the following pieces of retrieved context to answer the user's question. "
            "If you don't know the answer, say that you don't know. "
            "Use three sentences maximum and keep the answer simple and concise. "
            "\n\n"
            "Context: {context}" 
            
        ),
        (
            "user",
            "{input}"
            
        )
    ]
)

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

### Utilising built-in chains

- `create_stuff_documents_chain`
- `create_retrieval_chain`

In [None]:
question_answer_chain = create_stuff_documents_chain(llm, prompt_template)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
response = rag_chain.invoke({"input": "When is Python 2.0 released?"})
print(response["answer"])

### Utilising normal chaining

In [None]:
rag_chain2 = (
    {
        "context": retriever | format_docs,
        "input": RunnablePassthrough()
    }
    | prompt_template
    | llm
    | StrOutputParser()
)

rag_chain2.invoke("when is python 2.0 released?")

### Clean Up

In [None]:
chroma_store.delete_collection()

### Prompt Template Examples

In [None]:
prompt_template = PromptTemplate.from_template(
    "Tell me a {adjective} joke about {content}."
)
prompt_template.format(adjective="funny", content="chickens")

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant that translates {input_language} to {output_language}.",
        ),
        ("human", "{input}"),
    ]
)

chain = prompt | llm
output = chain.invoke(
    {
        "input_language": "English",
        "output_language": "Korean",
        "input": "I love programming.",
    }
)