In [1]:
import os
import json
import fitz
import re
import requests
import yaml
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from langchain.chat_models import ChatOpenAI
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser

# Load environment
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
GITHUB_TOKEN = os.getenv("GITHUB_ACCESS_TOKEN")

# Load prompts from YAML
with open("prompts.yaml", "r", encoding="utf-8") as f:
    prompts = yaml.safe_load(f)


In [2]:
llm = None

if OPENAI_API_KEY:
    try:
        llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-4", temperature=0)
        _ = llm.invoke("Test call")
        print("Using OpenAI GPT-4")
    except Exception as e:
        print(f"OpenAI failed: {e}")
        llm = None

if not llm and GROQ_API_KEY:
    try:
        llm = ChatGroq(groq_api_key=GROQ_API_KEY, model_name="llama-3.3-70b-versatile", temperature=0)
        _ = llm.invoke("Test call")
        print(" Fallback to Groq LLaMA3-8B")
    except Exception as e:
        print(f"Groq also failed: {e}")
        llm = None

if not llm:
    raise RuntimeError("Both OpenAI and Groq initialization failed.")


  llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-4", temperature=0)


OpenAI failed: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-4Qj7v***************************************b5aA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
 Fallback to Groq LLaMA3-8B


In [3]:
class RepoSummary(BaseModel):
    summary: str
    skills_used: dict = Field(default_factory=dict)

class ResumeAnalysis(BaseModel):
    name: str
    phone_number: str = ""
    gmail_email: str = ""
    linkedin_url: str = ""
    github_url: str = ""
    technical_skills: dict = Field(default_factory=dict)
    experience: dict = Field(default_factory=dict)
    projects: dict = Field(default_factory=dict)
    education: dict = Field(default_factory=dict)
    certifications: dict = Field(default_factory=dict)
    algorithms: list = Field(default_factory=list)
    statistics_concepts: list = Field(default_factory=list)
    soft_skills: list = Field(default_factory=list)

class FullAnalysis(BaseModel):
    resume_analysis: ResumeAnalysis
    github_analysis: list[RepoSummary]


In [4]:
from langchain_core.runnables import RunnableLambda

resume_parser = PydanticOutputParser(pydantic_object=ResumeAnalysis)
repo_parser = PydanticOutputParser(pydantic_object=RepoSummary)

resume_prompt = ChatPromptTemplate.from_messages([
    ("system", prompts["resume"]["system"]),
    ("human", prompts["resume"]["human"])
])
resume_chain = resume_prompt.partial(format_instructions=resume_parser.get_format_instructions()) | llm | resume_parser

repo_prompt = ChatPromptTemplate.from_messages([
    ("system", prompts["github"]["system"]),
    ("human", prompts["github"]["human"])
])
repo_chain = repo_prompt.partial(format_instructions=repo_parser.get_format_instructions()) | llm | repo_parser


In [5]:
def extract_text_and_links(pdf_path):
    doc = fitz.open(pdf_path)
    text = "".join(page.get_text() for page in doc)
    links = [link["uri"] for page in doc for link in page.get_links() if "uri" in link]
    return text, links

def extract_github_username(url):
    match = re.search(r"github\.com/([A-Za-z0-9_-]+)", url)
    return match.group(1) if match else None

def fetch_repos(username):
    r = requests.get(f"https://api.github.com/users/{username}/repos", headers={
        "Authorization": f"token {GITHUB_TOKEN}",
        "Accept": "application/vnd.github.v3+json"
    })
    return r.json() if r.status_code == 200 else []

def fetch_readme(username, repo):
    url = f"https://api.github.com/repos/{username}/{repo}/readme"
    r = requests.get(url, headers={"Authorization": f"token {GITHUB_TOKEN}", "Accept": "application/vnd.github.v3.raw"})
    return r.text if r.status_code == 200 else ""

def fetch_languages(username, repo):
    url = f"https://api.github.com/repos/{username}/{repo}/languages"
    r = requests.get(url, headers={"Authorization": f"token {GITHUB_TOKEN}"})
    return r.json() if r.status_code == 200 else {}


In [6]:
input_dir = "C:/Resume_testing/samples"
output_dir = "C:/Resume_testing/newoutputs"
os.makedirs(output_dir, exist_ok=True)

for file in os.listdir(input_dir):
    if not file.endswith(".pdf"):
        continue

    path = os.path.join(input_dir, file)
    text, links = extract_text_and_links(path)

    print(f"Analyzing: {file}")
    resume_result = resume_chain.invoke({"text": text, "links": str(links)})

    # GitHub Repo Analysis
    username = extract_github_username(resume_result.github_url)
    github_results = []

    if username:
        for repo in fetch_repos(username):
            try:
                readme = fetch_readme(username, repo["name"])
                langs = fetch_languages(username, repo["name"])
                result = repo_chain.invoke({
                    "name": repo["name"],
                    "description": repo.get("description", ""),
                    "languages": langs,
                    "readme": readme[:10000]
                })
                github_results.append(result)
            except Exception as e:
                print(f"Error analyzing {repo['name']}: {e}")

    full_analysis = FullAnalysis(resume_analysis=resume_result, github_analysis=github_results)

    name_slug = resume_result.name.replace(" ", "_").lower() if resume_result.name else file.replace(".pdf", "")
    with open(os.path.join(output_dir, f"{name_slug}_analysis.json"), "w", encoding="utf-8") as f:
        json.dump(full_analysis.dict(), f, indent=2)

    print(f"Saved: {name_slug}_analysis.json")


Analyzing: Abhishek Resume.pdf


C:\Users\Vamshi\AppData\Local\Temp\ipykernel_20900\422123532.py:38: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  json.dump(full_analysis.dict(), f, indent=2)


Saved: abhishek_kumar_analysis.json
Analyzing: AkshayKumar_resume_Intellentech.pdf
Error analyzing Stock-Price-Prediction-Google: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jndj38r7fyy9dkzwxhgm1gny` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 100152, Requested 1108. Please try again in 18m9.492999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
Error analyzing YouTube_Data_Harvesting: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jndj38r7fyy9dkzwxhgm1gny` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 100151, Requested 1223. Please try again in 19m47.4s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
Save

C:\Users\Vamshi\AppData\Local\Temp\ipykernel_20900\422123532.py:38: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  json.dump(full_analysis.dict(), f, indent=2)


RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jndj38r7fyy9dkzwxhgm1gny` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 100151, Requested 1912. Please try again in 29m42.625s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}