# Loading model in local

In [None]:
!nvidia-smi

In [None]:
!lscpu

In [None]:
!pip install ctransformers

In [None]:
# choose your champion
#model_id = "TheBloke/Llama-2-7B-GGML"
model_id = "model/TheBloke/Llama-2-7B-chat-GGML"
#model_id = "TheBloke/Llama-2-13B-GGML"
# model_id = "TheBloke/Llama-2-13B-chat-GGML"
model_path = "model/llama-2-7b-chat.ggmlv3.q5_0.bin"

In [None]:
from ctransformers import AutoModelForCausalLM

config = {'max_new_tokens': 256, 'repetition_penalty': 1.1, 'temperature': 0.1, 'stream': True}

llm = AutoModelForCausalLM.from_pretrained(model_path,
                                           model_type="llama",
                                           lib='avx2', #for cpu use
                                          #  gpu_layers=110, #110 for 7b, 130 for 13b,
                                          #  cache_dir="llama model",
                                           **config
                                           )

In [None]:
prompt="""Write a poem to help me remember the first 10 elements on the periodic table, giving each
element its own line."""

In [None]:
tokens = llm.tokenize(prompt)

In [None]:
tokens

In [None]:
# 'pipeline' execution
llm(prompt, stream=True)

In [None]:
prompt2 = """who is pm of india?"""
llm(prompt2, stream=False)

In [None]:
import time
start = time.time()
NUM_TOKENS=0
print('-'*4+'Start Generation'+'-'*4)
for token in llm.generate(tokens):
    print(llm.detokenize(token), end='', flush=True)
    NUM_TOKENS+=1
time_generate = time.time() - start
print('\n')
print('-'*4+'End Generation'+'-'*4)
print(f'Num of generated tokens: {NUM_TOKENS}')
print(f'Time for complete generation: {time_generate}s')
print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')

# Loading model from openai

In [None]:
# %pip install langchain_openai 

In [1]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0, openai_api_key="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")

# Set up vector DB

In [None]:
# !pip install -q langchain sentence-transformers

In [2]:
from langchain_community.embeddings import HuggingFaceEmbeddings

In [3]:
from torch import cuda

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

In [4]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
    model_kwargs={'device': device})

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# !pip install -q faiss-cpu
# !pip install -q lark chromadb

In [5]:
from langchain_community.vectorstores import FAISS, Chroma
from langchain.schema import Document

In [6]:
vectordb = Chroma(persist_directory="chroma_db", embedding_function=embeddings)
# results_with_scores = vectordb.similarity_search_with_score("Andrei Tarkovsky")
# for doc, score in results_with_scores:
#     print(f"Content: {doc.page_content}, Metadata: {doc.metadata}, Score: {score}")

# fetching a JD for similarity search using Hyde

In [7]:
from langchain.chains import LLMChain, HypotheticalDocumentEmbedder
from langchain.prompts import PromptTemplate
import langchain

In [8]:
langchain.debug = True

In [9]:
prompt_template = """Please make a job description based on the job topic
topic: {topic}
"""

prompt = PromptTemplate(input_variables=["topic"], template=prompt_template)

hyDE_llm_chain = LLMChain(llm=llm, prompt=prompt)

In [None]:
# embeddings = HypotheticalDocumentEmbedder(
#     llm_chain=llm_chain,
#     base_embeddings=embeddings
# )

In [10]:
query = "Software engineer"
hyDE_llm_chain.invoke(query)

[32;1m[1;3m[chain/start][0m [1m[1:chain:LLMChain] Entering Chain run with input:
[0m{
  "topic": "Software engineer"
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:LLMChain > 2:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Please make a job description based on the job topic\ntopic: Software engineer"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[1:chain:LLMChain > 2:llm:ChatOpenAI] [3.68s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "Job Description: \n\nWe are seeking a highly skilled and experienced Software Engineer to join our dynamic team. The ideal candidate will be responsible for designing, developing, and implementing software solutions to meet the needs of our clients. \n\nKey responsibilities include:\n\n- Collaborating with cross-functional teams to define, design, and ship new features\n- Writing clean, maintainable, and efficient code\n- Troubleshooting and debugging software issues\n- Conducting code revi

{'topic': 'Software engineer',
 'text': "Job Description: \n\nWe are seeking a highly skilled and experienced Software Engineer to join our dynamic team. The ideal candidate will be responsible for designing, developing, and implementing software solutions to meet the needs of our clients. \n\nKey responsibilities include:\n\n- Collaborating with cross-functional teams to define, design, and ship new features\n- Writing clean, maintainable, and efficient code\n- Troubleshooting and debugging software issues\n- Conducting code reviews and providing feedback to team members\n- Keeping up-to-date with the latest industry trends and technologies\n- Participating in the full software development lifecycle, from concept to deployment\n\nQualifications:\n\n- Bachelor's degree in Computer Science or related field\n- Proven work experience as a Software Engineer\n- Strong proficiency in programming languages such as Java, C++, or Python\n- Experience with software development tools and methodol

In [None]:

def create_filter(field_name, value, operator):
    return {field_name:{
        operator: value
        }
    }
            
def create_and_filter(salary = None, experience_level = None, job_type = None):
    filter = {}
    and_filters = []
    if salary:
        and_filters.append(create_filter("salary", salary, "$eq"))
        
    if experience_level:
        and_filters.append(create_filter("experience_level", experience_level, "$eq"))
        
    if job_type:
        and_filters.append(create_filter("job_type", job_type, "$eq"))

    if len(and_filters) > 1:
        filter["$and"] = and_filters
        return filter
    else:
        return and_filters[0]



In [None]:

query = "software engineer who can design websites"
# salary, experience_level, job_type, start_date, skill = None, "ENTRY", None, None, ["Python"]
salary, experience_level, job_type, start_date, skill = None, "ENTRY", "FULL TIME", None, None
salary, experience_level, job_type, start_date, skill = None, None, None, None, None
# salary, experience_level, job_type = None, None, None
filter = create_and_filter(experience_level=experience_level, job_type="FULL TIME")
print(filter)

# filter = {"experience": "ENTRY"}

hyde = hyDE_llm_chain.invoke(query)["text"]
print(hyde)
vectordb.similarity_search_with_score(query=hyde, filter=filter, k=20)

# Self query retriver

In [None]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever

In [None]:
# %pip install lark 

In [None]:

metadata_field_info = [
    AttributeInfo(
        name="title",
        description="title of the job posted",
        type="string",
    ),
    AttributeInfo(
        name="company",
        description="Name of the company hiring",
        type="string",
    ),
    AttributeInfo(
        name="company_description",
        description="description of the hiring company",
        type="string",
    ),
    AttributeInfo(
        name="location",
        description="location for which the comapy is hiring",
        type="string",
    ),
    AttributeInfo(
        name="job_type",
        description="Type of the job. One of ['FULL TIME', 'PART TIME', 'CONTRACT']",
        type="string",
    ),
    AttributeInfo(
        name="experience_level ",
        description="level of experience required of the candidate. One of ['ENTRY', 'INTERMEDIATE', 'EXPERT']",
        type="string",
    ),
    AttributeInfo(
        name="requirement",
        description="requirements of the hirer for the posted job",
        type="string",
    ),
    AttributeInfo(
        name="skill",
        description="skills required for the posted job",
        type="string",
    ),
    AttributeInfo(
        name="salary",
        description="salary offered for the job in usd. One of ['under $5000', '$5000 - $10000', 'above $10000']",
        type="string",
    )
]
document_content_description = "description about the job"
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_content_description,
    metadata_field_info,
    # enable_limit=True
)

In [None]:
from langchain.chains.query_constructor.base import (
    StructuredQueryOutputParser,
    get_query_constructor_prompt,
)

prompt = get_query_constructor_prompt(
    document_content_description,
    metadata_field_info,
)
output_parser = StructuredQueryOutputParser.from_components()
query_constructor = prompt | llm | output_parser



In [None]:
print(prompt.format(query="dummy question"))

In [None]:
from langchain.retrievers.self_query.chroma import ChromaTranslator

args = query_constructor.invoke(
    {
        "query": "experience as fresher"
    }
)
print(args)

filter = {}

# salary, experience, job_type = None, "ENTRY", None
salary, experience, job_type = None, None, None

if salary:
    filter["salary"] = {
            "$eq": salary
        }
    
if experience:
    filter["experience"] = {
            "$eq": experience
        }
    
if job_type:
    filter["job_type"] = {
            "$eq": job_type
        }

print(filter)

vectordb.similarity_search_with_score(query=args.query, filter=filter, k=10)

# Testing the results

In [None]:
# This example only specifies a filter
retriever.invoke("software")

In [None]:
# %pip install gradio

In [None]:
from operator import eq
import gradio as gr

def fetch_jobs(question, salary, experience, job_type):
    filter = {}
    if salary:
        filter["salary"] = {
                "$eq": salary
            }
        
    if experience:
        filter["experience"] = {
                "$eq": experience
            }
        
    if job_type:
        filter["job_type"] = {
                "$eq": job_type
            }
    
    return retriever.invoke(question)

outputs = gr.List()

demo = gr.Interface(fn=fetch_jobs, inputs="text", outputs="text")
demo.launch()