In [1]:
from dotenv import load_dotenv
load_dotenv(dotenv_path='.env')

True

In [2]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings 

# ----- OpenAI ----- #
chat_model = ChatOpenAI()
embeddings = OpenAIEmbeddings()

# ----- Ollama ----- #
# chat_model = ChatOllama()
# embeddings = OllamaEmbeddings()


In [3]:
embeddings.embed_query("Hello sot!")

[-0.015847994027185078,
 0.0009191049037337303,
 -0.017371082692209036,
 -0.020220307649311424,
 -0.01355023217530438,
 0.01916990244418255,
 -0.00871836674159544,
 0.0026194490513110906,
 -0.01183675882413629,
 -0.005061642270822813,
 0.04283028844414264,
 -0.0014049175672195427,
 -0.00828507450134752,
 0.008895622573394808,
 0.009105703800685098,
 -0.01717413218190866,
 0.02593188921182869,
 -0.01320885029737298,
 0.03466338667041404,
 0.00012350472398580517,
 -0.0037256574753406595,
 -0.024947132935036507,
 0.003394123169490408,
 -0.0031446518867061716,
 -0.0016954203615367866,
 -0.016622668145364394,
 0.008094687952558239,
 -0.018053846448071835,
 0.015585393657225435,
 -0.02833469214301582,
 0.008403245366321291,
 -0.010142979220146633,
 -0.038287285745695745,
 -0.024763312210403468,
 -0.007753306540288127,
 -0.02616823094177623,
 -0.01167263277800426,
 -0.0008912035271140268,
 0.012394785890869073,
 -0.01918303222984989,
 0.017712464570140436,
 0.0012982357303659806,
 0.008094687

In [4]:
import numpy as np
from numpy.linalg import norm

def cosine_similarity(a, b):
    return np.dot(a, b) / (norm(a) * norm(b))

vector1 = embeddings.embed_query("artificial intelligence")
vector2 = embeddings.embed_query("machine learning")

print(cosine_similarity(vector1, vector2))

vector3 = embeddings.embed_query("toyota corolla")

print(cosine_similarity(vector1, vector3))

0.8816879297904884
0.7611042394113705


# Using Pinecone Vector Database

In [8]:
import os
from pinecone import Pinecone as Pinecone
from langchain_community.vectorstores import Pinecone

pinecone_client = Pinecone(
   api_key=os.getenv("PINECONE_API_KEY"),
   environment=os.getenv("PINECONE_ENVIRONMENT")
)

vecdb = Pinecone.from_existing_index(
    index_name="sot-demo",
    embedding=embeddings,
)

In [6]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

file_path = "./papers/1706.03762.pdf"

# Initialize PDF loader
loader = PyPDFLoader(file_path)

# Initialize text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100,
)

# Load and split text
data = loader.load_and_split(text_splitter=text_splitter)

len(data)

print(data[0])

page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.comNoam Shazeer∗\nGoogle Brain\nnoam@google.comNiki Parmar∗\nGoogle Research\nnikip@google.comJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.comAidan N. Gomez∗ †\nUniversity of Toronto' metadata={'source': './papers/1706.03762.pdf', 'page': 0}


In [9]:
vecdb.add_documents(data)

['bec15916-24c8-4eb1-8a85-0608970642d8',
 '7301accb-644c-4300-aae2-19056b469990',
 '1d93bbc3-54d9-4782-9064-3a2c5b62d241',
 'a6480977-66cc-4033-ac76-8d80959766f0',
 'ea8ecb28-a0ba-4d66-b6d2-31128a30367f',
 '75036440-b147-420c-9547-d31e802f7cc9',
 '704b9d9c-915b-4872-a848-b3f8955924a9',
 '2c6b9168-a758-4ba4-97f6-19a08d324d98',
 'f5bb1ccd-2369-48e4-b68c-973c0633d2d6',
 '0dbea2fb-f8b2-4f08-ab06-eded51493a43',
 '23630fda-84b8-4c41-8ac7-a2751e29d584',
 '30268865-ea9c-4c88-b908-cd4ffd220fe6',
 '016ce368-2f7a-4e09-8df0-13a485bb2ebe',
 '753cb8aa-4dc5-45f2-b015-c154d8cd3fec',
 'b68e190c-1418-4930-8e06-9b4901197cf6',
 '98e570ff-fd35-4461-af61-ca82d44f41c1',
 '56e245de-89a0-4d37-ad8d-75ace610c5f7',
 '6de43cb4-e6dc-4603-ab4f-9b326f8375ab',
 '0583a451-600d-4290-9b2c-784bb7537304',
 'b74d5a66-4952-4779-a725-71136d99a770',
 'e934fe94-bfa9-4a52-8821-2ab236621caa',
 '0e65090b-4bd6-4c95-bc74-7b98529d70b1',
 'b0fa151e-3b2c-4d27-8f53-a0098bffb6e7',
 'b03a3d05-a661-49e8-b7b5-609a699c8f8c',
 '5656b088-c587-

In [10]:
from langchain.chains import RetrievalQA
from langchain_core.callbacks import StdOutCallbackHandler

from langchain.globals import set_verbose, set_debug
set_debug(True)
set_verbose(True)

chain = RetrievalQA.from_chain_type(
    llm=chat_model,
    retriever=vecdb.as_retriever(),
    verbose=True,
)

chain.invoke("What is a transformer?", callback_handler=StdOutCallbackHandler())

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What is a transformer?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What is a transformer?",
  "context": "To the best of our knowledge, however, the Transformer is the first transduction model relying\nentirely on self-attention to compute representations of its input and output without using sequence-\naligned RNNs or convolution. In the following sections, we will describe the Transformer, motivate\nself-attention and discuss its advantages over models such as [17, 18] and [9].\n3 Model Architecture\nMost competitive neural sequence transduction models have an encoder-decoder structure [ 5,2,35].\n\nIn this work we propose the Transform

{'query': 'What is a transformer?',
 'result': 'A transformer is a model architecture for neural sequence transduction that relies entirely on self-attention to compute representations of its input and output without using traditional sequence-aligned RNNs or convolution. The Transformer model allows for more parallelization and has been shown to achieve state-of-the-art results in tasks such as translation, with significantly faster training times compared to models based on recurrent or convolutional layers.'}