# use gradio to make an Agent that answers questions

In [79]:
import gradio as gr
from dotenv import load_dotenv
import os
import glob
import requests
from bs4 import BeautifulSoup

from openai import OpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [80]:
# Set the Model and db name
MODEL = "gpt-4o-mini"
db_name = "cv_db"
# Load all the necesssary keys
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
LinkedIN_Username = os.getenv('LinkedIN_Username')
LinkedIN_Password = os.getenv('LinkedIN_Password')

openai = OpenAI()

cvs = glob.glob("cv_base/*")
#print(cvs)

In [None]:
# Use selenium to scrap the LinkedIN page.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import json, time

# The url to the job dessertation on LinkedIN
url = "https://www.linkedin.com/jobs/view/4226197789/?trackingId=zk%2ByEzpoTFiTsP634Rm9zw%3D%3D&refId=AaSi7zWcRzWrMGUNJ9ylVg%3D%3D&midToken=AQFvwOwdGoBHsg&midSig=2MRozsjx59vrM1&trk=eml-jobs_jymbii_digest-job_card-0-jobcard_body&trkEmail=eml-jobs_jymbii_digest-job_card-0-jobcard_body-null-ajjmth~maxh1xlm~iz-null-null&eid=ajjmth-maxh1xlm-iz&otpToken=MTQwNjE5ZTQxYjJlY2NjNGI1MjQwNGVkNDIxOWVmYjQ4Y2NhZDY0MDlkYWI4ZDYxNzdjMTAxNmE0OTUzNTlmN2Y2ZDZkZmE1NzhlNGJkZTY0M2ZlZjg2NTY1YWE5NjM4MThmYmVmYWVjYTY2ZGRlNjgyYTdiYSwxLDE%3D"

#class_name = "jobs-box__html-content"
class_name = "application-outlet"

driver = webdriver.Chrome()
driver.get("https://www.linkedin.com/login")

# Enter username and password
driver.find_element(By.ID, "username").send_keys(LinkedIN_Username)
driver.find_element(By.ID, "password").send_keys(LinkedIN_Password)
driver.find_element(By.XPATH, "//button[@type='submit']").click()
driver.get(url)

# Wait for job description to load
WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CLASS_NAME, class_name))
)

# Get entire page source
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")

jd = soup.body.get_text(separator="/n", strip=True).split("}")[-1]
parts = jd.split("Details zum Jobangebot")
jd = parts[-1]

In [None]:
# Since LinkedIN is doing something actively avoiding scrapping, there might be some problems..
# print(jd)

In [99]:
# Create a system prompt for deciding wheather there are appropritate candidate for a given job. 
system_prompt = "You are provided with a job advertisement.\
You are able to suggest good candidates for this job. "
system_prompt += "Seperate the candidate(s) in the following groups: \n 1. invite to an interview. \n 2. not invite to an interview. \n"
system_prompt += "Also give brief reasoning for you decision. " 
print(system_prompt)

You are provided with a job advertisement.You are able to suggest good candidates for this job. Seperate the candidate(s) in the following groups: 
 1. invite to an interview. 
 2. not invite to an interview. 
Also give brief reasoning for you decision. 


In [100]:
# Create a user prompt for deciding wheather there are appropritate candidate for a given job. 
def user_prompt_for_cv_check(jd):
    user_prompt = f"Thejob description is as follows:\n {jd}. \n"
    user_prompt += "\nThe description could be either written in English or German. But you should always respond in English. "
    user_prompt += "\n Tell me the candidates you know are appropriate."
    return user_prompt

print(user_prompt_for_cv_check(jd))

Thejob description is as follows:
 /nWeiter zur Suche/nWeiter zum Hauptinhalt/nTastaturkurzbefehle/nWechselmenü schließen/nSuche/nStart/nIhr Netzwerk/nJobs/nNachrichten/n15/n15 neue Mitteilungen/nMitteilungen/nSie/nProdukte/nPremium erneuern: 50 % Rabatt/nProfilbesucher:innen/n22/nImpressions von Beiträgen/n1/nZugang zu exklusiven Tools und Analysen/nPremium erneuern: 50 % Rabatt/nGespeicherte Artikel/nGruppen/nNewsletter/nEvents/nBewegen Sie Ihre Datei hierhin/nZiehen Sie Ihre Dateien hierher/nBeitrag beginnen/nVideo/nFoto/nArtikel schreiben/nFeed-Ansicht auswählen:/nRelevanteste zuerst/nFeed-Updates/nNummer des Feedbeitrags 1/nEd Donner/nEd Donner/n• 1./nPremium • 1./nCo-Founder & CTO at Nebula.io, repeat Co-Founder of AI startups, speaker & advisor on Gen AI and LLM Engineering/nCo-Founder & CTO at Nebula.io, repeat Co-Founder of AI startups, speaker & advisor on Gen AI and LLM Engineering/n5 Std. •/nvor 5 Stunden • Alle Mitglieder und Nicht-Mitglieder von LinkedIn/nHere's something

In [101]:
# User prompt for retrieving name from a CV.
def user_prompt_for_name_retrievement(cv):
    """
    Input:
        cv: str of pdf content
    """
    user_prompt = "You are looking at a CV of a Job applicant \n"
    user_prompt += "\n The contents of this CV is as follows; \
    Please give the name of the applicant. \n\n"
    user_prompt += cv
    return user_prompt

In [102]:
# Retrieve name from a CV.
def retrieve_name(cv_content: str) -> str:
    """
    Given the content of an applicant CV, return the name of the applicant.
    Args:
        cv_content (str): A str of CV.
    Returns:
        str: The name of the applicant.
    """
    system_prompt_name_retrievement = "You are an Assistant analyzes the contents of a CV \
    and provides the name of the CV holder. Ignoring text that might be part of a symbol. \
    Output only the name of the CV holder, nothing else."

    user_prompt = user_prompt_for_name_retrievement(cv_content)

    messages = [
        
        {"role": "system", "content": system_prompt_name_retrievement},
        {"role": "user", "content": user_prompt}
    ]

    response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)

    return response.choices[0].message.content

In [103]:
# Read in the content of CVs in the folder.
cv_contents = []  # contents from all the cvs in the folder
for cv in cvs:
    loader = PyPDFLoader(cv)
    pages = loader.load() # 2 objects for 2 pages
    applicant_name = retrieve_name(pages[0].page_content)
    for page in pages:
        page.metadata["applicant_name"] = applicant_name
        cv_contents.append(page)

# cv_contents[0].page_content

In [104]:
# Divide texts into chunks.
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(cv_contents)
len(chunks)
#chunks[1]

4

In [105]:
# Convert chunks into embeddings.
embeddings = OpenAIEmbeddings()
if os.path.exists(db_name):
    Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()

vectorstore = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_name)

In [113]:
# Initialization
llm = ChatOpenAI(temperature=0.7, model_name=MODEL)

memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)

In [114]:
query = "Please tell me whether it is a good candidate for the job and why."
query_1 = user_prompt_for_cv_check(jd)
query_2 = "How many candidates do you have? Tell me their names. "
result = conversation_chain.invoke({"question": query_2})
print(result["answer"])

I have information on two candidates: John Doe and Zeyuan Sun.
