In [12]:
# replace the standard sqlite3 module with pysqlite3
# for compatibility with Chroma
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')


import langchain
import os
import bs4
from dotenv import load_dotenv
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_google_vertexai import ChatVertexAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

# load environment variables
load_dotenv()

True

In [None]:
# print("LANGCHAIN_API_KEY", os.getenv("LANGCHAIN_API_KEY"))
# print("GEMINI_API_KEY", os.getenv("GEMINI_API_KEY"))
# print("TAVILI_API_KEY", os.getenv("TAVILY_API_KEY"))

In [2]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/workspaces/LLM-agent-with-Gemini/fleet-anagram-244304-7dafcc771b2f.json"
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")

In [None]:
search = TavilySearchResults(max_results=2)
search.invoke("what are some of the Julia language rules")

In [3]:
web_paths = [
    "https://google.github.io/styleguide/pyguide.html",
    "https://google.github.io/styleguide/Rguide.html",
    "https://google.github.io/styleguide/cppguide.html",
    "https://google.github.io/styleguide/go/",
    "https://google.github.io/styleguide/javaguide.html"
]

# Only keep post title, headers, and content from the full HTML.
# bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))

docs = []
for path in web_paths:
    loader = WebBaseLoader(web_paths=(path,),
                        #    bs_kwargs={"parse_only": bs4_strainer}
    )
    docs += loader.load()

In [None]:
len(docs[0].page_content)

In [4]:

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

468

In [None]:
all_splits[10].metadata

In [5]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding = OpenAIEmbeddings())

In [6]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

retrieved_docs = retriever.invoke("What are the python linting rules")

len(retrieved_docs)

6

In [7]:
print(retrieved_docs[0].page_content)

2 Python Language Rules



2.1 Lint
Run pylint over your code using this pylintrc.



2.1.1 Definition
pylint
is a tool for finding bugs and style problems in Python source code. It finds
problems that are typically caught by a compiler for less dynamic languages like
C and C++. Because of the dynamic nature of Python, some
infrequent.



2.1.2 Pros
Catches easy-to-miss errors like typos, using-vars-before-assignment, etc.



2.1.3 Cons
pylint
isn’t perfect. To take advantage of it, sometimes we’ll need to write around it,



2.1.4 Decision
Make sure you run
pylint
on your code.
def do_PUT(self):  # WSGI name, so pylint: disable=invalid-name
  ...


In [8]:
llm = ChatVertexAI(model="gemini-pro")

In [9]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Be detailed and concise.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("What are the python and r language rules respectively?"):
    print(chunk, end="", flush=True)

The question is about the differences between python and r language rules, respectively.

Here is the answer:

The main difference between the Python and R style guides is that Python prefers BigCamelCase for function names, while R prefers lower_case_with_underscores. This is because Python is a more object-oriented language, while R is more functional.

The other major difference is that Python uses semicolons to terminate statements, while R does not. Semicolons are optional in Python, but they are never used in R. This is because R is a more "scripting" language, while Python is more "compiled".

Finally, Python has a more strict indentation requirement than R. Python code must be indented consistently, while R code can be indented in any way that is consistent with the overall layout of the code.

I hope this helps!

Thanks for asking!

In [13]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [14]:
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

In [36]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [16]:
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [26]:
conversational_rag_chain.invoke(
    {"input": "What are Python language rules?"},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]


"## Python Language Rules: Your Guide to Writing Clean Code\n\nThe Python language, known for its elegant syntax and intuitive structure, adheres to a set of well-defined rules that guide developers towards crafting clean, consistent, and efficient code. These rules, detailed in the Google Python Style Guide, serve as a roadmap for navigating the intricacies of Python development, ensuring a high standard of code quality and maintainability.\n\n**Key Pillars of Python Language Rules:**\n\n* **Linting:** Utilize the power of `pylint` to uncover potential bugs and style inconsistencies, keeping your code polished and error-free.\n* **Import Management:** Import only the modules you truly need, and maintain a consistent order for your imports, making your code organized and readily understandable.\n* **Packaging Prowess:** Adhere to specific naming conventions for packages and avoid unnecessary imports within classes, keeping your code structure clear and focused.\n* **Exception Handling:

In [18]:
conversational_rag_chain.invoke(
    {"input": "How is it different from R's?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

'The Python language rules are different from the R language rules in a few key ways.\n\nFirst, Python uses snake_case for variable and function names, while R uses camelCase. Second, Python uses tabs for indentation, while R uses spaces. Third, Python uses the `def` keyword to define functions, while R does not. Finally, Python has a more strict set of rules about line length and complexity than R.\n\nThese are just a few of the differences between the Python and R language rules. For more detailed information, you can consult the style guide for each language. '

In [34]:
for chunk in conversational_rag_chain.stream(
    {"input": "What about rules for R?"},
    config={"configurable": {"session_id": "abc123"}}
):
    if answer_chunk := chunk.get("answer"):
        print(f"{answer_chunk} ", end="")

## Demystifying  R Language Rules: A Comprehensive Guide to Clean and Efficient Code

As you continue your exploration of R language rules,  allow me to provide a comprehensive overview to empower you with the knowledge and tools necessary to craft clean, consistent, and efficient R code.  These rules, outlined in the Tidyverse Style Guide and the Google R Style Guide, serve as a valuable roadmap for navigating the intricacies of R development, ensuring your code is not only functional but also elegant and sustainable.

**Core Principles of R Language Rules:**

* **Readability:** Prioritize  code clarity and readability, making it easily understandable for others.
* **Consistency:** Adhere to uniform coding practices, promoting a consistent coding style across projects and teams.
* **Efficiency:** Strive for code optimization, maximizing performance and resource utilization.
* **Data Handling:** Embrace tidy data principles, such as  using well-formatted data frames and avoiding redund