In [27]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader
from IPython.display import display as Markdown
from tqdm.autonotebook import tqdm as notebook_tqdm

In [28]:
local_path = '../data/Think_and_Grow_Rich.pdf'

# Local Pdf file uploads
if local_path:
    loader = UnstructuredPDFLoader(file_path=local_path)
    data = loader.load()
else:       
    print("Upload a pdf file")

In [None]:
Markdown(data[0].page_content)

In [30]:
# check ollama status
import ollama as Ollama
from ollama import Client
from ollama import chat
from ollama import ChatResponse

Client = Client(
    host='http://localhost:11434',
)

print(Client.list())

Ollama.pull('nomic-embed-text') # pulling nomic embeddings

# check nomic embeddings are available
models = Client.list()
if str(models).find('nomic-embed-text') == -1:
    print("Nomic embeddings not found")
else:
    print("Nomic embeddings found")



models=[Model(model='nomic-embed-text:latest', modified_at=datetime.datetime(2025, 2, 1, 12, 20, 36, 7117, tzinfo=TzInfo(UTC)), digest='0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f', size=274302450, details=ModelDetails(parent_model='', format='gguf', family='nomic-bert', families=['nomic-bert'], parameter_size='137M', quantization_level='F16')), Model(model='llama3.3:latest', modified_at=datetime.datetime(2025, 1, 27, 13, 14, 5, 308781, tzinfo=TzInfo(UTC)), digest='a6eb4748fd2990ad2952b2335a95a7f952d1a06119a0aa6a2df6cd052a93a3fa', size=42520413916, details=ModelDetails(parent_model='', format='gguf', family='llama', families=['llama'], parameter_size='70.6B', quantization_level='Q4_K_M')), Model(model='deepseek-r1:8b', modified_at=datetime.datetime(2025, 1, 27, 12, 24, 42, 88429, tzinfo=TzInfo(UTC)), digest='28f8fd6cdc677661426adab9338ce3c013d7e69a5bea9e704b364171a5d61a10', size=4920738407, details=ModelDetails(parent_model='', format='gguf', family='llama', famili

Nomic embeddings found


In [None]:
# 1. First clean up any existing ChromaDB installations
%pip uninstall -y chromadb
%pip uninstall -y protobuf

# 2. Install specific versions known to work together
%pip install -q protobuf
%pip install -q chromadb==0.4.22  # Using a stable older version
%pip install -q langchain-ollama

In [32]:
# 3. Set the environment variable
import os
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

In [33]:
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

In [34]:
# Split and chunk 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
chunks = text_splitter.split_documents(data)

In [35]:
# Try creating the vector database
vector_db = Chroma.from_documents(
    documents=chunks,
    embedding=OllamaEmbeddings(model="nomic-embed-text"),
    collection_name="local-rag"
)

### Retrieval

In [36]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [37]:
# LLM from Ollama
local_model = "phi4"
llm = ChatOllama(model=local_model, base_url = "http://localhost:11434")

In [38]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [39]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [40]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [41]:
chain.invoke("Why you are the master of your fate?")

'The idea that "you are the master of your fate" stems from the belief that individuals have significant control over their own lives and destinies through their thoughts, actions, decisions, and attitudes. Here\'s why this perspective holds weight:\n\n1. **Autosuggestion**: The principle of autosuggestion suggests that persistent thoughts can influence behavior and outcomes. By consciously choosing positive thoughts and affirmations, you can guide your actions toward desired goals.\n\n2. **Self-Confidence and Determination**: Building self-confidence empowers individuals to pursue their aspirations with determination. Believing in one\'s ability to achieve a definite purpose is crucial for taking the necessary steps towards success.\n\n3. **Control Over Environment**: While external circumstances are often beyond control, you have the power to shape your environment by choosing where to focus attention and energy. This includes surrounding yourself with supportive people and environme

In [42]:
chain.invoke("What is this book about?")

'The book appears to explore themes related to success, personal development, and the power of ideas in generating wealth. It emphasizes concepts such as:\n\n1. **Imagination and Ideas**: The story of "The Enchanted Kettle" serves as an allegory for how a simple idea can lead to immense success and prosperity. This tale illustrates that it\'s not just tangible assets but also creativity and innovative thinking that drive business growth.\n\n2. **Impact of a Single Idea**: The narrative demonstrates how one small, well-conceived idea can have far-reaching impacts—creating jobs, fostering economic development, and bringing about positive changes in communities and industries.\n\n3. **Economic Prosperity During Hard Times**: The book underscores the resilience of businesses built on strong ideas, as evidenced by the continued success of the "Enchanted Kettle" company even during economic downturns.\n\n4. **Personal Growth and Relationships**: There\'s an element of personal storytelling, 

In [46]:
output = chain.invoke("Summarize whole book into one paragraph less than 100 words")

# split output into chain of things and response

print(output)

"Think and Grow Rich," by Napoleon Hill, encapsulates a philosophy on success centered around transforming desires into tangible achievements through focused thought and action. The core principle is leveraging auto-suggestion to influence the subconscious mind, empowering individuals to harness their inner potential and align it with Infinite Intelligence for financial prosperity. The book emphasizes specialized knowledge over general knowledge, advocating for practical application toward wealth accumulation. It also highlights persistence, faith, and the belief that mastering one's destiny is possible through disciplined mental training and unwavering dedication to personal goals.

(Note: While I summarized the key points from your provided excerpts, "Think and Grow Rich" has additional elements not covered here due to space constraints.)


In [48]:
chain.invoke("WHats the name of this book and author? give me the answer in this format: 'The name of the book is <name> and the author is <author>'")

'The name of the book is "Think and Grow Rich" and the author is Napoleon Hill.'