In [1]:
from dotenv import load_dotenv
load_dotenv(dotenv_path='.env')

True

In [2]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings 

# ----- OpenAI ----- #
chat_model = ChatOpenAI()
embeddings = OpenAIEmbeddings()

# ----- Ollama ----- #
# chat_model = ChatOllama()
# embeddings = OllamaEmbeddings()


In [3]:
embeddings.embed_query("Hello sot!")

[-0.015847994027185078,
 0.0009191049037337303,
 -0.017371082692209036,
 -0.020220307649311424,
 -0.01355023217530438,
 0.01916990244418255,
 -0.00871836674159544,
 0.0026194490513110906,
 -0.01183675882413629,
 -0.005061642270822813,
 0.04283028844414264,
 -0.0014049175672195427,
 -0.00828507450134752,
 0.008895622573394808,
 0.009105703800685098,
 -0.01717413218190866,
 0.02593188921182869,
 -0.01320885029737298,
 0.03466338667041404,
 0.00012350472398580517,
 -0.0037256574753406595,
 -0.024947132935036507,
 0.003394123169490408,
 -0.0031446518867061716,
 -0.0016954203615367866,
 -0.016622668145364394,
 0.008094687952558239,
 -0.018053846448071835,
 0.015585393657225435,
 -0.02833469214301582,
 0.008403245366321291,
 -0.010142979220146633,
 -0.038287285745695745,
 -0.024763312210403468,
 -0.007753306540288127,
 -0.02616823094177623,
 -0.01167263277800426,
 -0.0008912035271140268,
 0.012394785890869073,
 -0.01918303222984989,
 0.017712464570140436,
 0.0012982357303659806,
 0.008094687

In [4]:
import numpy as np
from numpy.linalg import norm

def cosine_similarity(a, b):
    return np.dot(a, b) / (norm(a) * norm(b))

vector1 = embeddings.embed_query("artificial intelligence")
vector2 = embeddings.embed_query("machine learning")

print(cosine_similarity(vector1, vector2))

vector3 = embeddings.embed_query("toyota corolla")

print(cosine_similarity(vector1, vector3))

0.8816879297904884
0.7611042394113705


# Using Pinecone Vector Database

In [23]:
import os
from pinecone import Pinecone as PineconeClient
from langchain_community.vectorstores import Pinecone

pinecone_client = PineconeClient(
   api_key=os.getenv("PINECONE_API_KEY"),
   environment=os.getenv("PINECONE_ENVIRONMENT")
)

vecdb = Pinecone.from_existing_index(
    index_name="sot-demo",
    embedding=embeddings,
)

In [21]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

file_path = "./1706.03762.pdf"

# Initialize PDF loader
loader = PyPDFLoader(file_path)

# Initialize text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=0,
)

# Load and split text
data = loader.load_and_split(text_splitter=text_splitter)

len(data)

91

In [15]:
vecdb.add_documents(data)

['10e762d6-58ea-44fe-8115-c28915f259c5',
 '0b7d8e0c-49c0-401a-afe3-9ec94f6270ee',
 'dedd968b-f15f-4f9e-8025-1d47669bc6d0',
 '1e5164b3-fa35-4867-bf7b-587ac6125970',
 'd0bb1344-e953-4e2d-8ae3-e77b1705c21e',
 '214eccaa-70ed-40db-9670-5e6d789d5b2a',
 '109871f6-daa4-46da-aa94-34e76887943f',
 '1e4f5551-2bd7-4c63-829b-f39e0dc2907a',
 '0f9e81f3-8ec4-401a-964a-0d40bac7d251',
 '324885f1-e348-4f91-bbdc-0c269ee62358',
 '8cea68da-6ac2-4232-87cc-bdb009b7311c',
 'e31bfd3c-1a4b-4ac2-913b-3cd46690eb2f',
 '05667dfe-33e6-4bac-8b30-2d9a5e5e2c1e',
 '624386e1-edc8-4acc-a157-e26e32b2cbdb',
 'e2c97203-2bbd-413c-8c2c-d455af20307b',
 '7d8642e2-69d2-4df0-81c2-9faecbb23c02',
 '6727e28f-737d-4538-b4ae-1253f2dc01af',
 'ccc53766-a449-4b65-b7f0-b36f9f6c1b5e',
 'b98f4be0-7c1b-4012-82ae-fd24acbba3b3',
 'e300b50b-9087-4775-bd9b-f24fcaa154a2',
 'e49bf539-16f8-47f5-b115-9704ca74bad4',
 '4b1c7e9d-3ce0-41b3-b8dd-2e9714b66182',
 'c49252b7-09f9-4a72-bb78-4d6e28f2d8d1',
 'e37887cc-2b09-44ec-925f-6348cf5a700c',
 '67dd8611-bbbd-

In [33]:
from langchain.chains import RetrievalQA
from langchain_core.callbacks import StdOutCallbackHandler

from langchain.globals import set_verbose, set_debug
set_debug(True)
set_verbose(True)

chain = RetrievalQA.from_chain_type(
    llm=chat_model,
    retriever=vecdb.as_retriever(),
    verbose=True,
)

chain.invoke("What is a transformer?", callback_handler=StdOutCallbackHandler())

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What is a transformer?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is a transformer?",
  "context": "In this work, we presented the Transformer, the first sequence transduction model based entirely on\nattention, replacing the recurrent layers most commonly used in encoder-decoder architectures with\nmulti-headed self-attention.\nFor translation tasks, the Transformer can be trained significantly faster than architectures based\non recurrent or convolutional layers. On both WMT 2014 English-to-German and WMT 2014\n\naligned RNNs or convolution. In the following sections, we will describe the Transformer, motivate\nself-attention and 

{'query': 'What is a transformer?',
 'result': 'The Transformer is a model architecture that relies entirely on attention mechanisms for sequence transduction tasks, replacing the recurrent layers typically used in encoder-decoder architectures with multi-headed self-attention. It allows for faster training compared to models based on recurrent or convolutional layers and enables more parallelization.'}