#### Retrieval:

In [3]:
from langchain.embeddings.base import Embeddings
import numpy as np

class SentenceTransformerEmbeddings(Embeddings): #We need to inherit from Embeddings for integrating into langchain pipeline
    def __init__(self, model):
        self.model = model

    def embed_documents(self,texts):
        return [self.model.encode(text) for text in texts]

    def embed_query(self, query):
        return self.model.encode(query)

In [4]:
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer('all-mpnet-base-v2')

  from tqdm.autonotebook import tqdm, trange


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
sentence_embeddings = SentenceTransformerEmbeddings(embedding_model)

In [6]:

from langchain_community.vectorstores import FAISS

db = FAISS.load_local("faiss_index", sentence_embeddings, allow_dangerous_deserialization=True)


In [7]:
# Perform a query by using the retriever
retriever = db.as_retriever()  # Convert FAISS into a retriever object
query = "What is phylosophy?"
retrieved_docs = retriever.get_relevant_documents(query)

# Print out the retrieved documents
for doc in retrieved_docs:
    print(doc.page_content)
    print('\n')


systematics, the field of biology that studies the diversity of life and the relationships of living things through time. Today, systematists typically treat “relatedness” solely in terms of recency of common ancestry, but this was not always the case. Pre-Darwinian taxonomists discussed the relationships of various groups and their place in the “natural system”, and while the rise of evolutionary theory allowed that one sense of relatedness was genealogical, it did not eliminate the idea of the broader notion. Debates about the role of phylogeny in classification and taxonomy were widespread (e.g., Huxley [ed.] 1940; Winsor 1995) though they began to take on a new form beginning in the late 1950s as collaborations turned into organized research programs pushing their agendas.  In his analysis of the period, David Hull (1988: ch. 5) titled one of his chapters “Systematists at War” and thus the name “The Systematics Wars” is sometimes used to describe the debates of the period. Hull (19

  retrieved_docs = retriever.get_relevant_documents(query)


#### Question Answering:

In [9]:
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/CodeQwen1.5-7B-Chat",
    torch_dtype="auto",
    device_map="auto"
)


Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:  75%|#######5  | 2.93G/3.89G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.71G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

In [10]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/CodeQwen1.5-7B-Chat")

prompt = "how to enjoy life"
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]

tokenizer_config.json:   0%|          | 0.00/972 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.46M [00:00<?, ?B/s]

In [11]:
from langchain_core.prompts import ChatPromptTemplate

template = ChatPromptTemplate([
     ('system', "You are a helpful assistant."),
    ('user', "{input}")
])

prompt  = template.invoke({'input': 'how to enjoy life?'})

In [24]:
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to('cuda')

generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=512
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

In [25]:
response

'\n\nThere is no one "right" way to enjoy life, as everyone has different preferences and experiences. However, here are some tips that might help you find what works best for you:\n\n1. Identify your values and goals: Before you start looking for ways to enjoy life, it\'s important to identify what matters most to you. Are you passionate about a certain career or field? Do you enjoy spending time with loved ones? Or do you prioritize personal growth and self-development?\n\n2. Prioritize relationships: If you value spending time with loved ones, it\'s important to make time for them. Set aside time every day or week to spend with friends and family, and consider getting married or starting a family.\n\n3. Cultivate hobbies and interests: Pursuing your passions can be a great way to relax and unwind. Consider taking up a new hobby, such as painting, writing, or photography, or pursuing an interest, such as hiking, yoga, or meditation.\n\n4. Take care of your physical needs: Taking care

#### Add retrieval to the question answering:

In [12]:
from langchain_core.prompts import ChatPromptTemplate 
# chat prompt template is used for running model in conversational format

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "You're name is Dr.Phil"
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [29]:
from langchain_core.prompts import MessagesPlaceholder

def format_template(system_prompt, context, user_input): #Custom prompt template
    template = [
        {'role':'system', 'content': system_prompt.format(context = context)},
        MessagesPlaceholder(variable_name = 'chat_history'),
        {'role': 'user', 'content': user_input}
    ]
    return template

In [186]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain_core.runnables import Runnable
import torch



class CustomLLMRunnable(Runnable):
    def invoke(self, prompt,config=None, **kwargs):
        session_id = kwargs.get("session_id")
        config = kwargs.get("config")
        messages = []
        
        for input in prompt.messages:
            if isinstance(input, SystemMessage):
                messages.append({'role':'system', 'content':input.content})
            if isinstance(input, HumanMessage):
                messages.append({'role': 'user', 'content': input.content})
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        
     
        model_inputs = tokenizer([text], return_tensors="pt").to('cuda')
        
        generated_ids = model.generate(
            model_inputs.input_ids,
            max_new_tokens=512
        )
        
        # Slice generated tokens to exclude input
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]
        
        # Decode the response and return it
        response = tokenizer.batch_decode(generated_ids,skip_special_tokens=True)[0]
        return response

custom_llm_runnable = CustomLLMRunnable()


In [178]:
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "You're name is Dr.Phil"
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

question = 'what is phylosophy?'
context = retriever.get_relevant_documents(question)
context = [document.page_content for document in context]

qa_prompt = ChatPromptTemplate.from_messages([
     ('system', system_prompt.format(context = context)),
    MessagesPlaceholder(variable_name = 'chat_history'),
        ('user','{user_input}')
])


prompt = qa_prompt.invoke({ 'user_input': 'Is there such a thing as objective morality, or are all moral judgments subjective and culturally influenced?','chat_history':[]})
custom_llm_runnable.invoke(prompt)

'Objective morality refers to moral judgments that are based on a set of principles and laws that have been established by rational thought or scientific investigation. It is not subjective and cannot be influenced by cultural or personal beliefs or values. The ultimate goal of objective morality is to create a fair and consistent system of ethical behavior that can be applied equally by all members of society. This can be achieved through the development of ethical frameworks and ethical theories that provide a set of basic principles that can be used to evaluate and guide moral behavior. Objective morality can be useful in various fields such as medicine, law, and political science, as it helps to ensure that people are following ethical guidelines and making decisions based on consistent principles and values. However, it is important to note that objective morality cannot replace individual moral judgments or beliefs. Ultimately, moral judgments must be subjective and influenced by

#### Add Chat History:

In [175]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    custom_llm_runnable,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [187]:
conversational_rag_chain.invoke(
    {"input": prompt},
    config={
        "configurable": {"session_id": "abc123"}
    },  # constructs a key "abc123" in `store`.
)["answer"]

TypeError: CustomLLMRunnable.invoke() takes 2 positional arguments but 3 were given