In [1]:
import os, getpass
env_path = '.env'
from dotenv import load_dotenv
import json
from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
load_dotenv(env_path)
from datetime import datetime
from langchain_core.pydantic_v1 import constr, BaseModel, Field, validator
from langgraph.graph import MessagesState
from langchain_core.messages import HumanMessage, SystemMessage,AIMessage
from langchain_core.prompts.chat import ChatPromptTemplate,MessagesPlaceholder
from langchain.pydantic_v1 import BaseModel, Field
from typing_extensions import TypedDict, Annotated
from langgraph.graph import MessagesState, END
from langgraph.types import Command
from langgraph.checkpoint.memory import MemorySaver
from langchain.tools import StructuredTool
from langgraph.graph import START, StateGraph
from langgraph.prebuilt import tools_condition
from langgraph.prebuilt import ToolNode
from langchain_core.runnables import Runnable, RunnableConfig
from langchain_community.tools import TavilySearchResults
from langgraph.prebuilt import create_react_agent
from typing import Literal, Optional, List, Dict, Any
from langchain_core.tools import tool
import functools
import pandas as pd
import pymupdf4llm
from docx import Document
import win32com.client


llm = AzureChatOpenAI(temperature=0.7,
                        api_key=os.getenv('AZURE_OPENAI_API_KEY'),
                        azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT'),
                        openai_api_version=os.getenv('AZURE_OPENAI_VERSION'),
                        azure_deployment=os.getenv('AZURE_GPT35_MODEL')
                        )


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)

For example, replace imports like: `from langchain.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [2]:

from typing import List, Optional
from pydantic import BaseModel, Field

class CountryCode(BaseModel):
    IsoAlpha2: Optional[str]
    IsoAlpha3: Optional[str]
    UNCode: Optional[str]

class ResumeCountry(BaseModel):
    Country: Optional[str]
    Evidence: Optional[str]
    CountryCode: Optional[CountryCode]

class Email(BaseModel):
    EmailAddress: str
    ConfidenceScore: int

class PhoneNumber(BaseModel):
    Number: str
    ISDCode: str
    OriginalNumber: str
    FormattedNumber: str
    Type: str
    ConfidenceScore: int

class Name(BaseModel):
    FullName: str
    TitleName: str
    FirstName: str
    MiddleName: str
    LastName: str
    FormattedName: str
    ConfidenceScore: int

class Address(BaseModel):
    City: Optional[str]
    State: Optional[str]
    StateIsoCode: Optional[str]
    Country: Optional[str]
    CountryCode: Optional[CountryCode]
    FormattedAddress: Optional[str]
    Type: Optional[str]
    ConfidenceScore: Optional[int]

class Degree(BaseModel):
    DegreeName: str
    NormalizeDegree: str
    Specialization: List[str]
    ConfidenceScore: int

class Institution(BaseModel):
    Name: str
    Type: str
    Location: Address
    ConfidenceScore: int

class SegregatedQualification(BaseModel):
    Institution: Institution
    Degree: Degree
    FormattedDegreePeriod: str
    StartDate: str
    EndDate: str

class SegregatedSkill(BaseModel):
    Skill: str
    Type: str
    ExperienceInMonths: int
    LastUsed: Optional[str]
    Evidence: Optional[str]

class SegregatedExperience(BaseModel):
    EmployerName: str
    JobTitle: str
    JobDescription: str
    City: Optional[str]
    State: Optional[str]
    Country: Optional[str]
    StartDate: str
    EndDate: Optional[str]

class ResumeParserData(BaseModel):
    ResumeFileName: str
    ResumeLanguage: dict
    ParsingDate: str
    ResumeCountry: ResumeCountry
    Name: Name
    DateOfBirth: Optional[str]
    Email: List[Email]
    PhoneNumber: List[PhoneNumber]
    Address: List[Address]
    SegregatedQualification: List[SegregatedQualification]
    SegregatedSkill: List[SegregatedSkill]
    SegregatedExperience: List[SegregatedExperience]
    Certification: Optional[str]
    Summary: Optional[str]

class FinalOutput(BaseModel):
    ResumeParserData: ResumeParserData


In [3]:
strctured_llm = llm.with_structured_output(schema=FinalOutput)



In [6]:
def pdf_to_txt_convertor(file_path_name):
    """
    Converting pdf to Markdown text.
    Text is provided to LLM for exraction
    """
    try:
        md_text = pymupdf4llm.to_markdown(file_path_name)
        return md_text
    except Exception as e:
        print(f'pdf extraction error {file_path_name}:{e}')
        return None
            

In [7]:
def docx_to_txt_convertor(file_path_name):
    """
    Converting doc to text.
    Text is provided to LLM for exraction
    """
    try:
        doc = Document(file_path_name)
        full_txt = []
        for paragraph in doc.paragraphs:
            full_txt.append(paragraph.text)
        return '\n'.join(full_txt)
    except Exception as e:
        print(f'Docx Conversion Error:{file_path_name}:{e}')
        return None

In [8]:
'''
def doc_to_txt_convertor(file_path_name):
    """
    Converting doc to text.
    Text is provided to LLM for exraction
    """
    try:
        word = win32com.client.Dispatch('word.Application')
        word.visile = False
        doc = word.Docment.open(file_path_name)
        txt = doc.Content.Text
        doc.Close()
        word.Quit()
    except Exception as e:
        print(f'Doc Conversion Error:{file_path_name}:{e}')
        return None
'''

'\ndef doc_to_txt_convertor(file_path_name):\n    """\n    Converting doc to text.\n    Text is provided to LLM for exraction\n    """\n    try:\n        word = win32com.client.Dispatch(\'word.Application\')\n        word.visile = False\n        doc = word.Docment.open(file_path_name)\n        txt = doc.Content.Text\n        doc.Close()\n        word.Quit()\n    except Exception as e:\n        print(f\'Doc Conversion Error:{file_path_name}:{e}\')\n        return None\n'

In [9]:
def extract_txt_from_resume(file_path_name):
    try:
        if file_path_name.endswith('.pdf'):
            return pdf_to_txt_convertor(file_path_name)
        elif file_path_name.endswith('.docx'):
            return docx_to_txt_convertor(file_path_name)
        else:
            print(f'unsuported file format: {file_path_name}')
            return None
    except Exception as e:
        print(f'Resume text extraction error:{e}')
        return None

In [10]:
prompt_template = ChatPromptTemplate.from_messages([
    (
        "system",
        "You are specialized agent to provide extracted information from resume."
        "If the value is not known fillvalue with null."
        "Do not make or create or generate any information which is not provided"
    ),
    (
        "human","{text}"
    )
])

In [11]:
prompt = prompt_template.invoke({"text":extract_txt_from_resume("john doe.pdf")})

In [12]:
llm_response = strctured_llm.invoke(prompt)

In [13]:
response_dict = llm_response.dict()
response_json = json.dumps(response_dict,indent = 4)

In [14]:
print(response_json)

{
    "name": "John Doe",
    "contact": "+91-9008198377",
    "email": "john.doe@hotmail.com",
    "dob": null,
    "address": "Mumbai",
    "job_role": null,
    "skills": null,
    "years_of_experience": null,
    "company": null,
    "education": null,
    "education_institute": null,
    "education_year": null,
    "education_degree": null,
    "course_startdate": null,
    "course_enddate": null,
    "certification": null,
    "number_of_certifications": null,
    "awards": null,
    "refernces": null,
    "miscellaneous": null,
    "summary": "John Doe is a highly experienced professional with 19 years of international leadership experience in project and process management. He is skilled in procurement solutions, category management, sourcing, and supplier relationship management. John holds a Master's in Information Management from Jamnalal Bajaj Institute of Management Studies, a Bachelor of Computer Application from Madurai Kamaraj University, and a Diploma in Electronics & 