In [34]:
from langgraph.graph import StateGraph, END
from backend.prompt.prompt import OFFER_EXTRACTION_PROMPT
from typing import TypedDict
from backend.utils.parsers import parse_offer_file
from backend.llm.gemini import gemini_chat

In [35]:
class OfferAnalyzerState(TypedDict):
    file_path: str
    raw_text: str
    offer_components: dict

In [36]:
def extract_offer_text(state: OfferAnalyzerState) -> OfferAnalyzerState:
    file_path = state["file_path"]
    raw_text = parse_offer_file(file_path)
    return {**state, "raw_text": raw_text}

# Step 2: Use Gemini to extract structured components
def analyze_offer_with_llm(state: OfferAnalyzerState) -> OfferAnalyzerState:
    
    response = gemini_chat(
        OFFER_EXTRACTION_PROMPT.format(offer_text=state["raw_text"]),
    )
    
    print("==========", response)
    try:
        structured_data = eval(response) if isinstance(response, str) else response
    except Exception as e:
        structured_data = {"error": str(e)}

    return {**state, "offer_components": structured_data}

In [37]:
builder = StateGraph(OfferAnalyzerState)
builder.add_node("extract_text", extract_offer_text)
builder.add_node("llm_analysis", analyze_offer_with_llm)

builder.set_entry_point("extract_text")
builder.add_edge("extract_text", "llm_analysis")
builder.add_edge("llm_analysis", END)
app = builder.compile()

In [38]:
result = app.invoke({"file_path": "uploads/two.pdf"})
result["offer_components"]



AIMessage(content='```python\njob_offer_data = {\n    "ctc": None,  # Not mentioned in the provided text\n    "bonuses": None,  # Not mentioned in the provided text\n    "job_title": "Software Engineer",\n    "responsibilities": "Worked in a fast-paced startup environment...building scalable web applications by working across both frontend and backend development. Integrated technologies such as Elasticsearch for advanced search capabilities and RabbitMQ for reliable asynchronous communication.",\n    "location": "Coimbatore",\n    "bond_duration": None,  # Not mentioned in the provided text\n    "benefits": None,  # Not mentioned in the provided text\n}\n\nprint(job_offer_data)\n\n```', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-1.5-flash', 'safety_ratings': []}, id='run--3af5b95a-26f4-4582-a546-a7114c376af7-0', usage_metadata={'input_tokens': 757, 'output_tokens': 154, 'total_to