In [None]:
import numpy as np
from langchain_core.language_models import BaseChatModel
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate

from langchain_core.output_parsers import StrOutputParser

import sys

sys.path.append(".")  # Adjust the path as necessary to import from app.agents
sys.path.append("..")
from app.agents.base import BaseAgent
import app.agents.utils as utils
import app.schema.agent as schema_agent
import os

In [8]:
os.environ["GOOGLE_API_KEY"] = "AIzaSyC0q7rb1kXYKQeGlc2iiHh7HHqUQjAv_jA"

In [9]:
class JDExtractor(BaseAgent):
    _NAME = "job_description_extractor"

    def __init__(self, llm: BaseChatModel):
        super().__init__(name=self._NAME, llm=llm)
        self.model = llm
        self.prompt = """
You are an expert at extracting key information from job descriptions. Your task is to meticulously analyze the provided job description and extract all relevant details, categorizing them under the following specific categories: Experience, Education, Skill, CompanyInfo, OtherRequirement, and OtherInfomation.

Here's how to approach the task:

1.  **Read the job description carefully.** Comprehend all requirements, responsibilities, and contextual information.
2.  **Identify and extract key phrases and sentences** that precisely fit each category.
    *   **Experience:** Focus on the required or preferred years of experience, specific prior job titles, industry exposure, and types of professional tasks or environments sought (e.g., "5+ years of experience in software development," "Proven track record in managing cross-functional teams," "Experience with Agile methodologies").
    *   **Education:** Extract all required or preferred educational qualifications, degrees, certifications, and fields of study (e.g., "Bachelor's degree in Computer Science or a related field," "Master's degree preferred," "AWS Certified Developer certification").
    *   **Skill:** Identify both technical (hard) and interpersonal (soft) skills explicitly mentioned as necessary or beneficial (e.g., "Proficiency in Python, Java, and SQL," "Strong communication and collaboration skills," "Experience with data analysis tools like Tableau," "Problem-solving abilities").
    *   **CompanyInfo:** Extract details about the hiring company itself, its mission, values, culture, industry, size, team structure, or general description (e.g., "Join a fast-paced startup environment," "Our mission is to revolutionize healthcare," "Collaborative and inclusive culture," "Leader in AI-driven solutions").
    *   **OtherRequirement:** Capture any explicit, non-negotiable requirements that don't fit into Experience, Education, or Skill, such as legal eligibility, specific work conditions, travel expectations, security clearances, or specific soft skills framed as mandatory requirements.
    *   **OtherInfomation:** Include any remaining important contextual information, benefits, perks, application process details, EEO statements, or any other descriptive text that provides additional context but isn't a direct requirement (e.g., "Competitive salary and benefits package," "Opportunity for professional growth," "We are an equal opportunity employer," "Flexible work arrangements").

3.  **Output the extracted information** in a JSON format, strictly adhering to the schema below. Each category should contain a list of strings. If a category has no relevant information, the list *must* be empty (`[]`). Do not use `null` for empty lists.

**Data Input:**

You will receive a dictionary with two keys: "id" and "value". The "id" is a unique identifier for the job description, and the "value" contains the job description text.

**Output Format:**

Return a dictionary where the key is the "id" from the input and the value is a dictionary representing the parsed content.
```json
{{
    "xxx": {{
        "Experience": ["list of experience-related strings"],
        "Education": ["list of education-related strings"],
        "Skill": ["list of skill-related strings"],
        "CompanyInfo": ["list of company information strings"],
        "OtherRequirement": ["list of other requirement strings"],
        "OtherInfomation": ["list of other general information strings"]
    }}
}}
```
INPUT: {content}
RESPONSE:
"""
        self.chain = (
            PromptTemplate(
                input_variables=["content"],
                template=self.prompt,
            )
            | self.model
            | StrOutputParser()
            | utils.extract_json_from_string
        )

    def __call__(self, content: list[schema_agent.DataInput]) -> dict:
        """
        Classify the content into predefined categories.

        Args:
            content (str): The text content to classify.

        Returns:
            dict: A dictionary with categories as keys and lists of sentences as values.
        """
        return self.chain.invoke({"content": content})


In [10]:
class ResumeExtractor(BaseAgent):
    _NAME = "resume_extractor"

    def __init__(self, llm: BaseChatModel):
        super().__init__(name=self._NAME, llm=llm)
        self.model = llm
        self.prompt = """
You are an expert at extracting key information from resumes. Your task is to analyze the provided resume and extract relevant details, categorizing them under the following categories: Experience, Education, Skills, Projects, PersonalInformation and Others.

Here's how to approach the task:

1.  **Read the resume carefully.** Understand the candidate's background and qualifications.
2.  **Identify key phrases and sentences** that fall under each category.
    *   **Experience:**  Focus on job titles, company names, dates of employment, and responsibilities. Extract specific accomplishments and quantifiable results whenever possible (e.g., "Software Engineer at Google, 2018-2023, Developed and maintained key features for the Android operating system, resulting in a 15% increase in user engagement.").
    *   **Education:** Extract the degrees earned, institutions attended, dates of attendance, GPA (if provided), and any relevant honors or awards (e.g., "Bachelor of Science in Computer Science, Stanford University, 2014-2018, GPA: 3.9, Summa Cum Laude").
    *   **Skills:** Identify both hard and soft skills mentioned in the resume. Pay attention to technical skills, programming languages, software proficiency, and interpersonal skills (e.g., "Python, Java, C++, Machine Learning, Data Analysis, Communication, Teamwork, Problem-solving").
    *   **Projects:** Look for descriptions of personal or academic projects, especially those that demonstrate relevant skills and experience (e.g., "Developed a machine learning model to predict customer churn using Python and scikit-learn," "Designed and implemented a web application using React and Node.js").
    *   **PersonalInformation:** Extract information such as name, contact information (phone number, email address, LinkedIn profile), location, and any other personal details provided in the resume (e.g., "John Doe, johndoe@email.com, (123) 456-7890, San Francisco, CA"). Do not include information about race, religion, gender or age.
    *   **Others:** Include any information that doesn't fit into the above categories but is still important, such as awards, certifications, publications, or volunteer experience.

3.  **Output the extracted information** in a JSON format, following the schema below. Each category should contain a list of strings. If a category has no relevant information, the list should be empty or null.

**Data Input:**

You will receive a list of dictionaries, where each dictionary has two keys: "id" and "value". The "id" is a unique identifier for the resume, and the "value" contains the resume text.
{{
    "id": "xxx",
    "value": "Resume Text"
}}

**Output Format:**

Return a dictionary where the key is the "id" from the input and the value is a ParsedContent object (represented as a dictionary).

```json
{{
    "xxx": {{
        "Experience": ["list of experience-related strings"],
        "Education": ["list of education-related strings"],
        "Skill": ["list of skill-related strings"],
        "Project": ["list of project-related strings"],
        "PersonalInformation": ["list of personal information strings or null"],
        "Others": ["list of other relevant strings or null"]
    }}
}}
INPUT: {content}
RESPONSE:
"""

        self.chain = (
            PromptTemplate(
                input_variables=["content"],
                template=self.prompt,
            )
            | self.model
            | StrOutputParser()
            | utils.extract_json_from_string
        )

    def __call__(self, content: list[schema_agent.DataInput]) -> dict:
        """
        Classify the content into predefined categories.

        Args:
            content (str): The text content to classify.

        Returns:
            dict: A dictionary with categories as keys and lists of sentences as values.
        """
        return self.chain.invoke({"content": content})


# Ensure your API key is set as an environment variable
# os.environ["GOOGLE_API_KEY"] = "YOUR_API_KEY"
# model = init_chat_model(
#     model="gemma-3n-e2b-it", model_provider="google_genai", temperature=0
# )


# agent = SentenceClassificationAgent(llm=model)
# # Example usage
# content = "Deployed the applications on Web Logic Application Server. Used Java Messaging Services (JMS) and Backend messaging for reliable and asynchronous exchange of important information such as payment status report."
# result = agent(content)
# print(result)  # Should print the classified sentences in JSON format


In [11]:
class EvaluationAgent(BaseAgent):
    def __init__(self, llm: BaseChatModel):
        self.model = llm
        self.prompt = """
You are an expert in evaluating job applications and assessing candidate suitability. Your task is to analyze a Job Description (JD) and a Resume and assign a match score (0-10) reflecting the degree of compatibility. Provide a detailed justification for the score, explaining the reasoning behind your assessment. The output should be a JSON formatted list containing the score and the corresponding reasoning.

**Instructions:**

1. **Analyze the Job Description (JD) and Resume:** Carefully read both documents to understand the requirements of the job and the candidate's qualifications.  The JD and Resume are provided in a structured format, with key information already extracted and categorized.
2. **Calculate the Match Score:** Assign a score between 0 and 10, where:
    * **0-2:** Poor or irrelevant resume. The resume's skills and experience are largely unrelated to the job requirements.
    * **3-4:** Weak match. The resume demonstrates some relevant skills or experience, but there are significant gaps.
    * **5-6:** Moderate match. The resume demonstrates a reasonable alignment with the job requirements, but there are notable areas for improvement.
    * **7-8:** Good match. The resume demonstrates a strong alignment with the job requirements, with only minor skill differences or gaps in experience.
    * **9-10:** Strong match. The resume is highly relevant to the job requirements, demonstrating a comprehensive alignment of skills and experience.
3. **Provide a Detailed Justification:** Explain *why* you assigned the score. Specifically, consider the following, referencing the provided structured data:
    * **Matching Skills:** Identify skills present in both the JD's "Skill" list and the Resume's "Skill" list.  Quantify the overlap (e.g., "3 out of 5 required skills are present").
    * **Relevant Experience:** Identify experience entries in the Resume's "Experience" list that align with the requirements described in the JD's "Experience" list.  Focus on matching job titles, responsibilities, and quantifiable achievements.
    * **Gaps in Qualifications:** Identify skills and experience listed in the JD's "Skill" and "Experience" lists that are *not* present in the corresponding lists in the Resume.
    * **Education Alignment:** Compare the education requirements in the JD's "Education" list with the candidate's education listed in the Resume's "Education" list.
    * **Project Relevance:** Assess the relevance of projects listed in the Resume's "Project" list to the requirements or desired experience outlined in the JD.
    * **Overall Fit:** Provide a holistic assessment of how well the candidate's profile aligns with the overall requirements of the role, considering all the above factors.
4. **Maintain a Professional and Objective Tone:** Avoid subjective opinions or personal biases. Focus on factual evidence from the JD and Resume, as presented in the structured data.  Use specific examples from the data to support your reasoning.

**Input:**
[
    {{
        "id": "123",
        "job_description": {{
            "Experience": ["5+ years of software development experience", "Experience with Agile methodologies"],
            "Education": ["Bachelor's degree in Computer Science"],
            "Skill": ["Python", "Java", "Communication", "Problem-solving"],
            "Project": ["Experience with cloud-based applications"],
            "PersonalInformation": null,
            "Others": []
        }},
        "resume": {{
            "Experience": ["Software Engineer at Google, 2018-2023", "Developed and maintained Android applications"],
            "Education": ["Bachelor of Science in Computer Science, Stanford University"],
            "Skill": ["Java", "Communication", "Teamwork"],
            "Project": ["Developed a mobile application for Android"],
            "PersonalInformation": ["John Doe"],
            "Others": []
        }}
    }}
]

**Output:**

```json
[
  {{
    "id": "123",
    "score": 8,
    "reasoning": "The candidate demonstrates a good match with the job description. Matching Skills: Python, Java, Communication. Relevant Experience: The candidate has 5 years of software development experience at Google, aligning with the requirement of 5+ years. Education Alignment: The candidate holds a BS in Computer Science, fulfilling the education requirement. Gaps in Qualifications: The resume does not explicitly mention experience with Agile methodologies. Overall Fit: The candidate's profile aligns well with the requirements, with a minor gap in Agile experience."
  }}
]
```

Input: {data}
Response: 
"""
        self.chain = (
            PromptTemplate(
                input_variables=["data"],
                template=self.prompt,
            )
            | self.model
            | StrOutputParser()
            | utils.extract_json_from_string
        )

    def __call__(self, data: list[dict]) -> list[dict]:
        """
        Evaluate the job descriptions and resumes, returning a structured response.

        Args:
            job_descriptions: The job descriptions to evaluate.
            resumes: The resumes to evaluate.

        Returns:
            dict: A structured response containing evaluation results.
        """
        return self.chain.invoke(
            {
                "data": data,
            }
        )