In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# LLM 
llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Llama-3.1-8B-Instruct",
    task="text-generation",
    max_new_tokens=200
)

# response = llm.invoke("Why do parrots talk?")
# print(response.content)

chat_model = ChatHuggingFace(llm=llm)
response = chat_model.invoke("Why do parrots talk?")
print(response.content)


Parrots are known for their ability to mimic human speech and other sounds they hear, which is often referred to as "talking." However, parrots don't actually "talk" in the same way that humans do. Instead, they possess a unique ability to mimic and reproduce the sounds they hear, often with remarkable accuracy.

There are several reasons why parrots are able to mimic human speech and other sounds:

1. **Brain structure**: Parrots have a large brain-to-body mass ratio, which allows for advanced cognitive abilities, including learning and memory. Their brain contains a large auditory cortex, which is responsible for processing and storing sound information.
2. **Vocal anatomy**: Parrots have a unique syrinx, which is a vocal organ located at the base of the trachea. The syrinx is made up of two pairs of cartilaginous rings that are covered in muscles and connective tissue. This allows parrots to produce a wide range of sounds, including


In [6]:
# Embedding model
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

Loading weights: 100%|██████████| 103/103 [00:00<00:00, 474.59it/s, Materializing param=pooler.dense.weight]                             
BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


In [7]:
# PDF Load
loader = PyPDFLoader("islr.pdf")
docs = loader.load()

In [8]:
# Splitter
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200
)
chunks = splitter.split_documents(docs)

In [None]:
print(chunks[10])

In [13]:
# Vector Store
vector_store = FAISS.from_documents(chunks, embeddings)

In [15]:
# Retriever
retriever = vector_store.as_retriever(
    search_type = "similarity", search_kwargs = {"k":4}
)

In [16]:
query = "Who are the authors of this books?"

In [21]:
result = retriever.invoke(query)
context = [doc.page_content for doc in result]
print(context)

['Springer Texts in Statistics\nSeries Editors:\nG. Casella\nS. Fienberg\nI. Olkin\nFor further volumes:\nhttp://www.springer.com/series/417', 'be obtained from Springer. Permissions for use may be obtained through RightsLink at the Copyright\nClearance Center. Violations are liable to prosecution under the respective Copyright Law.\nThe use of general descriptive names, registered names, trademarks, service marks, etc. in this publi-\ncation does not imply, even in the absence of a speciﬁc statement, that such names are exempt from\nthe relevant protective laws and regulations and therefore free for general use.\nWhile the advice and information in this book are b elieved to be true and accurate at the date of\npublication, neither the authors nor the editors northe publisher can accept any legal responsibility for\nany errors or omissions that may be made. The publisher makes no warranty, express or implied, with\nrespect to the material contained herein.\nPrinted on acid-free paper\

In [22]:
prompt = f"""
You are a helpful AI Assistant.
User Question: {query}
Answer the user question clearly and concisely using this context: {context}
"""

In [23]:
print(prompt)


You are a helpful AI Assistant.
User Question: Who are the authors of this books?
Answer the user question clearly and concisely using this context: ['Springer Texts in Statistics\nSeries Editors:\nG. Casella\nS. Fienberg\nI. Olkin\nFor further volumes:\nhttp://www.springer.com/series/417', 'be obtained from Springer. Permissions for use may be obtained through RightsLink at the Copyright\nClearance Center. Violations are liable to prosecution under the respective Copyright Law.\nThe use of general descriptive names, registered names, trademarks, service marks, etc. in this publi-\ncation does not imply, even in the absence of a speciﬁc statement, that such names are exempt from\nthe relevant protective laws and regulations and therefore free for general use.\nWhile the advice and information in this book are b elieved to be true and accurate at the date of\npublication, neither the authors nor the editors northe publisher can accept any legal responsibility for\nany errors or omissio

In [24]:
response = chat_model.invoke(prompt)
print(response.content)

The authors of this book are:

1. Trevor Hastie
2. Robert Tibshirani
3. Daniela Witten
4. Gareth James
