In [1]:
import pandas as pd

file_name = "conference_session_info.csv"

df = pd.read_csv(file_name)

In [2]:
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path = file_name)

In [3]:
docs = loader.load()

In [4]:
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter

In [5]:
chunk_size = 128
chunk_overlap = 32

c_text_splitter = CharacterTextSplitter(
    chunk_size = chunk_size,
    chunk_overlap = chunk_overlap,
    length_function = len,
    separator = ' '
)

r_text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = chunk_size,
    chunk_overlap = chunk_overlap,
    length_function = len,
    add_start_index = True
)

In [6]:
pages = r_text_splitter.split_documents(docs)

In [7]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

In [8]:
from langchain_community.embeddings import OllamaEmbeddings

embedding = OllamaEmbeddings(model="mistral:latest")

persist_directory = 'persist_chroma'

In [9]:
vectordb = Chroma.from_documents(
    documents=pages,
    embedding=embedding,
    persist_directory=persist_directory
)

In [10]:
print(vectordb._collection.count())

465


In [11]:
vectordb.persist()

In [12]:
question = "Which sessions are about augmented reality?"

docs = vectordb.similarity_search(question, k=3)

docs

[Document(page_content='about areas of making that you want to learn about. How do we build and nurture this community?', metadata={'row': 29, 'source': 'conference_session_info.csv', 'start_index': 317}),
 Document(page_content='about areas of making that you want to learn about. How do we build and nurture this community?', metadata={'row': 29, 'source': 'conference_session_info.csv', 'start_index': 317}),
 Document(page_content='about areas of making that you want to learn about. How do we build and nurture this community?', metadata={'row': 29, 'source': 'conference_session_info.csv', 'start_index': 317})]

In [13]:
vectordb = Chroma(
      persist_directory=persist_directory,
      embedding_function=embedding
)

In [14]:
question = "Sessions about robots working on farms"

docs_ss = vectordb.similarity_search(question, k=3)
docs_ss

[Document(page_content='Start Date: 07/27/2020 05:00 PM\nEnd Date: 07/27/2020 06:30 PM\nSession Name: Robots on the Farm', metadata={'row': 17, 'source': 'conference_session_info.csv', 'start_index': 0}),
 Document(page_content='Start Date: 07/27/2020 05:00 PM\nEnd Date: 07/27/2020 06:30 PM\nSession Name: Robots on the Farm', metadata={'row': 17, 'source': 'conference_session_info.csv', 'start_index': 0}),
 Document(page_content='Start Date: 07/27/2020 05:00 PM\nEnd Date: 07/27/2020 06:30 PM\nSession Name: Robots on the Farm', metadata={'row': 17, 'source': 'conference_session_info.csv', 'start_index': 0})]

In [21]:
from langchain.chains import RetrievalQA
from langchain_community.chat_models import ChatOllama

import langchain
langchain.verbose = True

llm_name = "mistral:latest"

llm = ChatOllama(model_name=llm_name, temperature=1)

In [22]:
qa_chain_default = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(search_kwargs={"k":3}),
    chain_type="stuff",
    return_source_documents=True
)

In [23]:
question = "Who talks about robot dogs in their session? and what is the session description?"

result = qa_chain_default({"query": question})
result



[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
about areas of making that you want to learn about. How do we build and nurture this community?

about areas of making that you want to learn about. How do we build and nurture this community?

issues, and why it works. Plus how the entertainment industry is helping the medical industry in its studies.
Human: Who talks about robot dogs in their session? and what is the session description?[0m


OllamaEndpointNotFoundError: Ollama call failed with status code 404. Maybe your model is not found and you should pull the model with `ollama pull llama2`.

In [19]:
# LangChain supports many other chat models. Here, we're using Ollama
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

# supports many more optional parameters. Hover on your `ChatOllama(...)`
# class to view the latest available supported parameters
llm = ChatOllama(model="mistral:latest")
prompt = ChatPromptTemplate.from_template("Tell me a short joke about {topic}")

# using LangChain Expressive Language chain syntax
# learn more about the LCEL on
# https://python.langchain.com/docs/expression_language/why
chain = prompt | llm | StrOutputParser()

# for brevity, response is printed in terminal
# You can use LangServe to deploy your application for
# production
print(chain.invoke({"topic": "Space travel"}))

 Why don't spaceships use turn signals?

Because they're shaped like U-Fleets! (This one is for those who know their space history with a bit of a NASA twist.)
