In [None]:
# Let us import all the required libraries. Ensure that you've run requirements.txt before.
import torch
from auto_gptq import AutoGPTQForCausalLM
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from pdf2image import convert_from_path
from transformers import AutoTokenizer, TextStreamer, pipeline

In [None]:
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

In [None]:
# Let us define the data path where we have the Resumés
data_loader = PyPDFDirectoryLoader("/data/resume/")
all_resume = data_loader.load()
len(all_resume)

In [None]:
# Check if we are able to convert PDF to image
sample_resume = convert_from_path("/data/resume/sampleResume.pdf", dpi=88)
sample_resume[0]

In [None]:
# We will build embeddings using the "hkunlp/instructor-large" model from HuggingFace
embeddings = HuggingFaceInstructEmbeddings(
    model_name="hkunlp/instructor-large", model_kwargs={"device": DEVICE}
)

In [None]:
# We will split the collection of resumes (all_resume) into smaller text chunks 
split_text = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
all_texts = split_text.split_documents(all_resume)
len(all_texts)

In [None]:
# Let us reates a Chroma database by indexing the embeddings of the input texts
# This will facilitate efficient similarity search and retrieval

%%time
resume_db = Chroma.from_documents(all_texts, embeddings, persist_directory="resume_db")

In [None]:
# Let us initialize GPT-based conversational model from the Hugging Face Transformers
# We will be using the Llama-2's chat based model for our experiment
model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
model_basename = "model"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
model = AutoGPTQForCausalLM.from_quantized(
    model_name_or_path,
    revision="gptq-4bit-128g-actorder_True",
    model_basename=model_basename,
    use_safetensors=True,
    trust_remote_code=True,
    inject_fused_attention=False,
    device=DEVICE,
    quantize_config=None,
)

In [None]:
# Let us define a default system prompt
system_prompt = "Use the following pieces of context to answer questions about the candidate's Resume., "

def generate_prompt(prompt, system_prompt):
    return f"""[INST] <<SYS>> 
    {system_prompt} 
    <</SYS>> 
    {prompt} [/INST]
    """.strip()

template = generate_prompt(
    """
    {context}
    Question: {question}
    """,
    system_prompt=SYSTEM_PROMPT,
)

In [None]:
# Initialize a text streamer object using the tokenizer
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

In [None]:
# Let us initalize text generation pipeline with (i) max 1024 tokens, (ii) temperature 0
# (iii) top-p sampling threshold of 0.95 and repetition penalty of 1.15
text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    temperature=0,
    top_p=0.95,
    repetition_penalty=1.15,
    streamer=streamer,
)

In [None]:
# Initialize HuggingFace pipeline with temperature of 0 for deterministic text generation
llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})

In [None]:
# Let us create the prompt template pipe
prompt = PromptTemplate(template=template, input_variables=["context", "question"])

In [None]:
# Let us now define the QA chain with our resume database as db
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=resume_db.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)

In [None]:
# Let us query with some sample questions
sample_questions = [
    "What is the candidate's education?",
    "Give me the top 3 skills from the Resume.",
    "What are the key technologies the candidate has worked on?"
    "Of the lot, pick the best Resume that matches the input job description"
]

for question in sample_questions:
    qa_chain(question)