In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import tensorflow as tf

In [None]:
tf.test.is_gpu_available()

***Installing The Necessary Libraries***

In [None]:
!pip install langchain
!pip install transformers
!pip install accelerate
!pip install datasets
!pip install bitsandbytes
!pip install peft
!pip intall pypdf
!pip install faiss-cpu
!pip install sentence-transformers

In [None]:
!pip install huggingface-hub -q

***Authenticating HuggingFace Token***

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
import accelerate
accelerate.__version__

In [None]:
pip install --upgrade accelerate


In [None]:
!pip install accelerate==0.21.0

***Loading the Quantizatized Llama 2 Model***

In [None]:
import torch
import transformers

model_id = "meta-llama/Llama-2-13b-chat-hf"

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

model_config = transformers.AutoConfig.from_pretrained(
    model_id
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto'
)

model.eval()

tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)

***Setting up the Input pipeline***

In [None]:
import transformers
generate_text = transformers.pipeline(
                model = model,
                tokenizer = tokenizer,
                task = "text-generation",
                return_full_text = True,
                max_new_tokens=1024)

In [None]:
from langchain.llms import HuggingFacePipeline

llm_13B = HuggingFacePipeline(pipeline=generate_text, model_kwargs={"temperature": 0.7})

***Sample Test***

In [None]:
result = llm_13B("what is need of quantization in large language models")
print(result)

In [None]:
!pip install pypdf

In [None]:
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import os
import sys
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import RetrievalQA

***Loading the World Campus Courses***

In [None]:
loader = PyPDFDirectoryLoader('/content/drive/MyDrive/Py_code/CAPSTONE/WorldCampusPDF')
data = loader.load()

***Splitting the PDFs into Chunks***

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(data)
print(len(docs))

***Embedding the vectors and storing them in FAISS***

In [None]:
from langchain.vectorstores import FAISS
embeddings = HuggingFaceEmbeddings(
 model_name="sentence-transformers/all-MiniLM-L6-v2"
)
#embed each chunk and load it into the FAISS vector store.
faiss_db = FAISS.from_documents(docs, embeddings)

***Initialising chain without memory***

In [None]:
template = """
<s>[INST] <<SYS>>
Welcome to Pennsylvania State University! As your academic advisor, I'm here to guide you through your educational journey and provide support for a successful academic experience.
Your questions and concerns are not just welcomed; they are integral to your academic success.
Below, you'll find essential information and guidance to help you navigate your academic path effectively.
<</SYS>>

{context}

{question} [/INST]
"""

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

qa_chain = RetrievalQA.from_chain_type(
    llm=llm_13B,
    chain_type="stuff",
    retriever=faiss_db.as_retriever(search_kwargs={"k": 1}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)

***Initialising chain with memory***

In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

template = """
<s>[INST] <<SYS>>
Welcome to Pennsylvania State University! As your academic advisor, I'm here to guide you through your educational journey and provide support for a successful academic experience.
Your questions and concerns are not just welcomed; they are integral to your academic success.
Below, you'll find essential information and guidance to help you navigate your academic path effectively.
<</SYS>>

{context}

{question} [/INST]
"""

prompt = PromptTemplate(template=template, input_variables=["context", "question"])
memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm_13B,
    memory=memory,
    retriever=faiss_db.as_retriever(search_kwargs={"k": 1}),
    combine_docs_chain_kwargs={"prompt": prompt}
)

***Output without memory***

In [None]:
result = qa_chain(
    "give me information on the master's degree in applied statistics. Also, tell me the application deadlines"
)
print(result["result"].strip())

***Output with memory***

In [None]:
result = conversation_chain(
    "give me information on the master's degree in applied statistics. Also, tell me the application deadlines"
)
print(result["answer"].strip())

In [None]:
from google.colab.output import eval_js
print(eval_js('google.colab.kernel.proxyPort(5000)'))

***Setting up a Web Server***

In [None]:
from flask import Flask, render_template, request,jsonify

app = Flask(__name__, template_folder = '/content/sample_data/Templates')
@app.route('/', methods=['GET','POST'])
def index():
  if request.method == 'POST':
        input_text = request.form.get('user_text')
        result = qa_chain(input_text)
        final_result = result["result"].strip()
        return jsonify(result = final_result)

  return render_template('index2.html')

if __name__ == '__main__':
    app.run()