In [None]:
!git clone https://github.com/zackproser/portfolio.git

In [None]:
!pip install \
  langchain_community \
  langchain_pinecone \
  langchain_openai \
  unstructured \
  langchain-text-splitters

In [None]:
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
import glob

In [None]:
loader = DirectoryLoader('portfolio', glob="**/*.mdx")

In [None]:
docs = loader.load()

In [None]:
docs[0]

In [None]:
from google.colab import userdata

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ['PINECONE_API_KEY'] = userdata.get('PINECONE_API_KEY')

In [None]:
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small"
)

index_name = "pinecone-chatbot"

# Split our documents into chunks
text_splitter = RecursiveCharacterTextSplitter()
split_docs = text_splitter.split_documents(docs)

In [None]:
split_docs[25]

In [None]:
vectorstore = PineconeVectorStore.from_documents(split_docs, embeddings, index_name=index_name)

In [None]:
query = "What is the programming bug?"

similar_docs = vectorstore.similarity_search(query)

In [None]:
similar_docs

In [None]:
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0
)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

qa.invoke(query)