In [1]:
### Important library installations
%pip install -U langchain-community
%pip install langchain-classic
%pip install -U langchain
%pip install -U llama-cpp-python

Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-core<2.0.0,>=1.0.1 (from langchain-community)
  Downloading langchain_core-1.0.3-py3-none-any.whl.metadata (3.5 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain-community)
  Downloading langchain_classic-1.0.0-py3-none-any.whl.metadata (3.9 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain-community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting langch

In [2]:
!wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf

--2025-11-05 20:18:01--  https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-fp16.gguf
Resolving huggingface.co (huggingface.co)... 13.35.202.97, 13.35.202.34, 13.35.202.40, ...
Connecting to huggingface.co (huggingface.co)|13.35.202.97|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://cas-bridge.xethub.hf.co/xet-bridge-us/662698108f7573e6a6478546/a9cdcf6e9514941ea9e596583b3d3c44dd99359fb7dd57f322bb84a0adc12ad4?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=cas%2F20251105%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20251105T201801Z&X-Amz-Expires=3600&X-Amz-Signature=3872e0fcc219db19ecee74926741d705be187f0d12db3e7352c927aff905d064&X-Amz-SignedHeaders=host&X-Xet-Cas-Uid=public&response-content-disposition=inline%3B+filename*%3DUTF-8%27%27Phi-3-mini-4k-instruct-fp16.gguf%3B+filename%3D%22Phi-3-mini-4k-instruct-fp16.gguf%22%3B&x-id=GetObject&Expires=1762377481&Policy=ey

In [4]:
from langchain_classic.memory import ConversationBufferMemory

In [5]:
from langchain_classic import LlamaCpp

In [6]:
llm = LlamaCpp(
    model_path="Phi-3-mini-4k-instruct-fp16.gguf",
    n_gpu_layers=-1,
    max_tokens=500,
    n_ctx=2048,
    seed=42,
    verbose=False
    )

llama_context: n_batch is less than GGML_KQ_MASK_PAD - increasing to 64
llama_context: n_ctx_per_seq (2048) < n_ctx_train (4096) -- the full capacity of the model will not be utilized


In [7]:
llm.invoke("Hi! My name is Abhishek. What is 1 + 1?")

''

In [8]:
from langchain_classic import PromptTemplate
# Create a prompt template with the "input_prompt" variable
template = """<s><|user|>
{input_prompt}<|end|>
<|assistant|>"""
prompt = PromptTemplate(
template=template,
input_variables=["input_prompt"]
)

In [9]:
basic_chain = prompt | llm

In [21]:
basic_chain.invoke(
{
"input_prompt": "Hi! My name is Abhishek. What is 1 + 1?",}
)

' Hello Abhishek! The answer to 1 + 1 is 2.'

In [22]:
### Let us check if it remembers what our name was
basic_chain.invoke({"input_prompt": "What is my name?"})

" I'm unable to determine your name as I don't have the ability to access personal data about individuals. If you provide me with some context or a clue, I might be able to help in other ways!"

We have to use some kind of memory in order to enrich the LLM's prior knowledge base or context

In [23]:
### Conversation buffer
## Conversation summary
## One option to remind LLMs of what has happened in the past is to store the entire history and provide it as a context at runtime
### In LangChain, this form of memory is called a ConversationBufferMemory.


# Create an updated prompt template to include a chat history
template = """<s><|user|>Current conversation:{chat_history}
{input_prompt}<|end|>
<|assistant|>"""
prompt = PromptTemplate(
template=template,
input_variables=["input_prompt", "chat_history"]
)

In [24]:
from langchain_classic.memory import ConversationBufferMemory
from langchain_classic import LLMChain
# Define the type of memory we will use
memory = ConversationBufferMemory(memory_key="chat_history")
# Chain the LLM, prompt, and memory together
llm_chain = LLMChain(
prompt=prompt,
llm=llm,
memory=memory
)

In [25]:
llm_chain.invoke({"input_prompt": "Hi! My name is Abhishek. What is 1 + 1?"})

{'input_prompt': 'Hi! My name is Abhishek. What is 1 + 1?',
 'chat_history': '',
 'text': " The sum of 1 + 1 is 2.\n\nHere's a brief explanation: In arithmetic, when you add one unit to another unit, the total quantity increases by one. So if you have one item and then acquire another, you now have two items in total. This basic operation falls under addition, which is one of the fundamental operations of mathematics."}

In [26]:
llm_chain.invoke({"input_prompt": "What is my name?"})

{'input_prompt': 'What is my name?',
 'chat_history': "Human: Hi! My name is Abhishek. What is 1 + 1?\nAI:  The sum of 1 + 1 is 2.\n\nHere's a brief explanation: In arithmetic, when you add one unit to another unit, the total quantity increases by one. So if you have one item and then acquire another, you now have two items in total. This basic operation falls under addition, which is one of the fundamental operations of mathematics.",
 'text': " Your name is Abhishek. It's nice to meet you!"}

In [27]:
from langchain_classic.memory import ConversationBufferWindowMemory
# Retain only the last 2 conversations in memory
memory = ConversationBufferWindowMemory(k=2, memory_key="chat_history")
# Chain the LLM, prompt, and memory together
llm_chain = LLMChain(
prompt=prompt,
llm=llm,
memory=memory
)

In [28]:
llm_chain.predict(input_prompt="Hi! My name is Abhishek and I am 30 years old.What is 1+1?")

" Hello Abhishek, and it's a pleasure to meet you! To answer your question: 1+1 equals 2. While sharing personal information is always welcome, here we were focusing on the mathematical aspect of your query. If there's anything else you'd like to know or discuss, feel free to ask!\n\nHere are the step-by-step details for adding 1 and 1:\n\n1. Write down both numbers (1 + 1) vertically aligning them by place value if needed. Since these are single-digit numbers, it's not necessary in this case.\n2. Add the ones digit of each number together: 1 (from the first number) plus 1 (from the second number).\n3. The sum is 2, which represents 1+1."

In [29]:
llm_chain.predict(input_prompt="What is 3 + 3?")

" That's great, Abhishek! Here are the step-by-step details for adding 3 and 3:\n\n1. Write down both numbers vertically aligning them by place value if needed. In this case, it will look like this:\n   3\n+  3\n2. Add the ones digit of each number together: 3 (from the first number) plus 3 (from the second number).\n3. The sum is 6, which represents 3+3."

In [30]:
llm_chain.invoke({"input_prompt":"What is my name?"})

{'input_prompt': 'What is my name?',
 'chat_history': "Human: Hi! My name is Abhishek and I am 30 years old.What is 1+1?\nAI:  Hello Abhishek, and it's a pleasure to meet you! To answer your question: 1+1 equals 2. While sharing personal information is always welcome, here we were focusing on the mathematical aspect of your query. If there's anything else you'd like to know or discuss, feel free to ask!\n\nHere are the step-by-step details for adding 1 and 1:\n\n1. Write down both numbers (1 + 1) vertically aligning them by place value if needed. Since these are single-digit numbers, it's not necessary in this case.\n2. Add the ones digit of each number together: 1 (from the first number) plus 1 (from the second number).\n3. The sum is 2, which represents 1+1.\nHuman: What is 3 + 3?\nAI:  That's great, Abhishek! Here are the step-by-step details for adding 3 and 3:\n\n1. Write down both numbers vertically aligning them by place value if needed. In this case, it will look like this:\n  

In [31]:
llm_chain.invoke({"input_prompt":"What is my age?"})

{'input_prompt': 'What is my age?',
 'chat_history': "Human: What is 3 + 3?\nAI:  That's great, Abhishek! Here are the step-by-step details for adding 3 and 3:\n\n1. Write down both numbers vertically aligning them by place value if needed. In this case, it will look like this:\n   3\n+  3\n2. Add the ones digit of each number together: 3 (from the first number) plus 3 (from the second number).\n3. The sum is 6, which represents 3+3.\nHuman: What is my name?\nAI:  Your name is Abhishek. It's nice to know you, but as an AI, I don't have personal experiences or identity like a human. However, I'm here to help answer your questions!",
 'text': " I'm an AI and don't have an age in the same way humans do. However, if you provide me with your date of birth, I can help calculate your age!\n\nAs for the calculation 3 + 3, it equals 6."}

Since we  Retain only the last 2 conversations in memory, Hence the LLM isn't able to retrieve information about the age of the user

In [31]:
## Now let's try storing the summary of everything generated so far in order to efficiently save on the number of
##tokens and also the amount of memory needed
# This means that whenever we ask the LLM a question, there are two calls:
# • The user prompt
# •
# • The summarization prompt

In [47]:
# Create a summary prompt template
summary_prompt_template = """<s><|user|>Summarize the conversations and update
with the new lines.
Current summary:
{summary}
new lines of conversation:
{new_lines}
New summary:<|end|>
<|assistant|>"""


summary_prompt = PromptTemplate(
    input_variables=["new_lines", "summary"],
    template=summary_prompt_template
)

In [41]:
from langchain_classic.memory import ConversationSummaryMemory

In [48]:
memory = ConversationSummaryMemory(
llm=llm,
memory_key="chat_history",
prompt=summary_prompt
)

memory = ConversationSummaryMemory(
    llm=llm,
    memory_key="chat_history",   # must match the prompt above
    output_key="text",           # <-- CRUCIAL: LLMChain outputs under 'text'
    prompt=summary_prompt
)


# Chain the LLM, prompt, and memory together
llm_chain = LLMChain(
prompt=prompt,
llm=llm,
memory=memory,
verbose=False
)

In [49]:
llm_chain.invoke({"input_prompt": "Hi! My name is Abhishek. What is 1 + 1?"})
llm_chain.invoke({"input_prompt": "What is my name?"})
llm_chain.invoke({"input_prompt": "What was the first question I asked?"})



{'input_prompt': 'What was the first question I asked?',
 'chat_history': ' Hello Abhishek! The AI confirms that the sum of 1 + 1 equals 2. Additionally, in this conversation, your name mentioned is Abhishek.',
 'text': ' The first question you asked was "Hello Abhishek!". However, if your intention is to inquire about the mathematical operation mentioned later in your statement, then the first related question would be: "Does 1 + 1 equal 2?"'}

In [44]:
# Inspect the stored summary
print("SUMMARY:\n", memory.moving_summary_buffer)
print("LOAD VARS:\n", memory.load_memory_variables({}))

{'input_prompt': 'What was the first question I asked?',
 'chat_history': '',
 'text': " As an AI, I don't have the ability to recall past interactions or conversations. However, I can help you find out if there were any questions asked during our current session by reviewing our conversation history together. Please let me know how you would like to proceed!"}

In [51]:
memory.buffer

' Hello Abhishek! The AI confirmed that the sum of 1 + 1 equals 2. In this conversation, your name mentioned is Abhishek. You inquired about the first question asked in the interaction and learned it was "Hello Abhishek!" or more specifically related to a mathematical operation, "Does 1 + 1 equal 2?".'