In [None]:
from dotenv import load_dotenv
load_dotenv(dotenv_path='.env')

In [None]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings 

# ----- OpenAI ----- #
chat_model = ChatOpenAI()
embeddings = OpenAIEmbeddings()

# ----- Ollama ----- #
# chat_model = ChatOllama()
# embeddings = OllamaEmbeddings()


In [None]:
embeddings.embed_query("Hello sot!")

In [None]:
import numpy as np
from numpy.linalg import norm

def cosine_similarity(a, b):
    return np.dot(a, b) / (norm(a) * norm(b))

vector1 = embeddings.embed_query("artificial intelligence")
vector2 = embeddings.embed_query("machine learning")

print(cosine_similarity(vector1, vector2))

vector3 = embeddings.embed_query("toyota corolla")

print(cosine_similarity(vector1, vector3))

# Using Pinecone Vector Database

In [None]:
import os
from pinecone import Pinecone as PineconeClient
from langchain_community.vectorstores import Pinecone

pinecone_client = PineconeClient(
   api_key=os.getenv("PINECONE_API_KEY"),
   environment=os.getenv("PINECONE_ENVIRONMENT")
)

vecdb = Pinecone.from_existing_index(
    index_name="sot-demo",
    embedding=embeddings,
)

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

file_path = "./1706.03762.pdf"

# Initialize PDF loader
loader = PyPDFLoader(file_path)

# Initialize text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=0,
)

# Load and split text
data = loader.load_and_split(text_splitter=text_splitter)

len(data)

In [None]:
vecdb.add_documents(data)

In [None]:
from langchain.chains import RetrievalQA
from langchain_core.callbacks import StdOutCallbackHandler

from langchain.globals import set_verbose, set_debug
set_debug(True)
set_verbose(True)

chain = RetrievalQA.from_chain_type(
    llm=chat_model,
    retriever=vecdb.as_retriever(),
    verbose=True,
)

chain.invoke("What is a transformer?", callback_handler=StdOutCallbackHandler())