<a href="https://colab.research.google.com/github/sugarforever/LangChain-Tutorials/blob/main/LangChain_ChatGithub.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade langchain deeplake openai tiktoken

In [None]:
import os

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake

os.environ['OPENAI_API_KEY'] = ''
os.environ['ACTIVELOOP_TOKEN'] = ''


In [None]:
embeddings = OpenAIEmbeddings(disallowed_special=())

In [None]:
!git clone https://github.com/chroma-core/chroma.git

In [None]:
import os
from langchain.document_loaders import TextLoader

root_dir = './chroma'
docs = []
for dirpath, dirnames, filenames in os.walk(root_dir):
    for file in filenames:
        try: 
            loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
            docs.extend(loader.load_and_split())
        except Exception as e: 
            pass

In [None]:
len(docs)

In [None]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)


In [None]:
len(texts)

In [None]:
username = "wyang14"
db = DeepLake(dataset_path=f"hub://{username}/chroma_source", embedding_function=embeddings, public=True)
db.add_documents(texts)

In [None]:
db = DeepLake(dataset_path="hub://wyang14/chroma_source", read_only=True, embedding_function=embeddings)

In [None]:
retriever = db.as_retriever()
retriever.search_kwargs['distance_metric'] = 'cos'
retriever.search_kwargs['fetch_k'] = 100
retriever.search_kwargs['maximal_marginal_relevance'] = True
retriever.search_kwargs['k'] = 10


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

model = ChatOpenAI(model='gpt-3.5-turbo')
qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)

In [None]:
questions = [
    "What does Chroma do?",
    "How to use Chroma?"
] 
chat_history = []

for question in questions:  
    result = qa({"question": question, "chat_history": chat_history})
    chat_history.append((question, result['answer']))
    print(f"Question:\n {question} \n")
    print(f"Answer:\n {result['answer']} \n\n")

In [None]:
def ask(question, chat_history):
  response = qa({"question": question, "chat_history": chat_history})
  print(f"Question:\n {question}\n")
  print(f"Answer:\n {response['answer']}\n")

In [None]:
ask("What's the main programming language used in Chroma?", chat_history)

In [None]:
ask('Summarize the storage part of Chroma', chat_history)

In [None]:
ask('Tell me more about Sentence Transformers', chat_history)

In [None]:
ask('Show me some example code on how to use Chroma to store embeddings', chat_history)

In [None]:
ask('What is the Python class for Chroma query interface?', chat_history)

In [None]:
ask('Show me the public functions of class Client', chat_history)

In [None]:
ask('What are the underlying databases used by Chroma?', chat_history)

In [None]:
ask('Which class implements the DuckDB support?', chat_history)