In [1]:
"""
Streamlit app: CV Creation using LLMs (minimal, runnable single-file demo)

Features included:
- Upload PDF or DOCX resume -> extract text
- Paste job description
- Call local Ollama (Gemma 3 1B) for: resume extraction JSON, JD parsing, tailoring
- Create a DOCX of the tailored resume and provide download link

Requirements (install):
pip install streamlit pdfplumber python-docx requests pydantic

Notes:
- Assumes Ollama running locally at http://localhost:11434 and model 'gemma-3-1b' available.
- This is a minimal demo for capstone work; replace prompt templates & safety checks in production.

Run: streamlit run streamlit_cv_llm_demo.py
"""

import streamlit as st
import pdfplumber
from docx import Document
import requests
import json
import io
from typing import Dict, Any
from pydantic import BaseModel

OLLAMA_URL = st.secrets.get("OLLAMA_URL", "http://localhost:11434")
MODEL = st.secrets.get("OLLAMA_MODEL", "gemma-3-1b")

# ---------------------- Utilities: extraction ----------------------

def pdf_to_text(file_bytes: bytes) -> str:
    text_pages = []
    with pdfplumber.open(io.BytesIO(file_bytes)) as pdf:
        for p in pdf.pages:
            text_pages.append(p.extract_text() or "")
    return "\n".join(text_pages)


def docx_to_text(file_bytes: bytes) -> str:
    doc = Document(io.BytesIO(file_bytes))
    return "\n".join([p.text for p in doc.paragraphs])


# ---------------------- Ollama minimal client ----------------------

def call_ollama(prompt: str, model: str = MODEL, max_tokens: int = 800) -> str:
    """Call Ollama's text generation API. Returns textual output (string).
    Minimal POST to /api/generate per Ollama HTTP API conventions.
    """
    url = f"{OLLAMA_URL}/api/generate"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "prompt": prompt,
        "max_tokens": max_tokens,
        "temperature": 0.0
    }
    try:
        r = requests.post(url, headers=headers, data=json.dumps(payload), timeout=30)
        r.raise_for_status()
        data = r.json()
        # Ollama's API returns a `choices` list depending on version; handle common shapes
        if "choices" in data and len(data["choices"])>0:
            # many Ollama versions put content in choices[0].content
            c = data["choices"][0]
            if isinstance(c, dict) and "message" in c:
                return c["message"].get("content","")
            return c.get("content", "")
        # fallbacks
        return data.get("text", "")
    except Exception as e:
        return f"ERROR_CALLING_OLLAMA: {e}"


# ---------------------- Prompts & LLM wrappers ----------------------

RESUME_EXTRACTION_PROMPT = """
You are a JSON extractor. Given the text of a resume, extract and output only a valid JSON object with the fields:
name, contact {email, phone}, location, summary, education [ {school, degree, start, end} ],
experience [ {title, company, start, end, bullets} ], skills [list], projects [list], achievements [list].
If a field is missing, return an empty string or empty list.
Resume text:
"""

JD_EXTRACTION_PROMPT = """
You are a job description parser. Given the job description text, extract and output only a valid JSON object with fields:
job_title, seniority, location, required_skills (list), preferred_skills (list), responsibilities (list), keywords (list).
Job description:
"""

TAILOR_PROMPT = """
You are an ATS-aware resume writer. Given resume JSON and job JSON (below), rewrite the resume JSON to align better with the job.
- Emphasize measurable achievements, use active verbs, and include keywords from the job where relevant.
- Do NOT invent facts; if unsure, leave original text.
- Output only the updated resume JSON.

Resume JSON:
{resume_json}

Job JSON:
{job_json}
"""


def extract_resume_json(resume_text: str) -> Dict[str, Any]:
    prompt = RESUME_EXTRACTION_PROMPT + "\n" + resume_text
    out = call_ollama(prompt)
    # try to parse returned JSON safely
    try:
        parsed = json.loads(out)
        return parsed
    except Exception:
        # attempt to extract json substring
        start = out.find('{')
        end = out.rfind('}')
        if start != -1 and end != -1:
            try:
                return json.loads(out[start:end+1])
            except Exception:
                return {"error":"could_not_parse_llm_output","raw":out}
        return {"error":"no_json","raw":out}


def parse_jd(jd_text: str) -> Dict[str, Any]:
    prompt = JD_EXTRACTION_PROMPT + "\n" + jd_text
    out = call_ollama(prompt)
    try:
        return json.loads(out)
    except Exception:
        start = out.find('{')
        end = out.rfind('}')
        if start != -1 and end != -1:
            try:
                return json.loads(out[start:end+1])
            except Exception:
                return {"error":"could_not_parse_jd","raw":out}
        return {"error":"no_json","raw":out}


def tailor_resume(resume_json: Dict[str, Any], job_json: Dict[str, Any]) -> Dict[str, Any]:
    prompt = TAILOR_PROMPT.format(resume_json=json.dumps(resume_json, ensure_ascii=False), job_json=json.dumps(job_json, ensure_ascii=False))
    out = call_ollama(prompt, max_tokens=1200)
    try:
        return json.loads(out)
    except Exception:
        start = out.find('{')
        end = out.rfind('}')
        if start != -1 and end != -1:
            try:
                return json.loads(out[start:end+1])
            except Exception:
                return {"error":"could_not_parse_tailored","raw":out}
        return {"error":"no_json_tailored","raw":out}


# ---------------------- Document generation ----------------------

def create_docx(resume_json: Dict[str, Any]) -> bytes:
    doc = Document()
    name = resume_json.get('name','')
    doc.add_heading(name, level=0)
    contact = resume_json.get('contact', {})
    contact_line = []
    if contact.get('email'): contact_line.append(contact.get('email'))
    if contact.get('phone'): contact_line.append(contact.get('phone'))
    if resume_json.get('location'): contact_line.append(resume_json.get('location'))
    if contact_line:
        doc.add_paragraph(' | '.join(contact_line))
    if resume_json.get('summary'):
        doc.add_heading('Summary', level=1)
        doc.add_paragraph(resume_json.get('summary'))

    if resume_json.get('education'):
        doc.add_heading('Education', level=1)
        for edu in resume_json.get('education'):
            line = f"{edu.get('degree','')} — {edu.get('school','')}"
            dates = f" ({edu.get('start','')} - {edu.get('end','')})" if (edu.get('start') or edu.get('end')) else ''
            p = doc.add_paragraph()
            p.add_run(line).bold = True
            p.add_run(dates)

    if resume_json.get('experience'):
        doc.add_heading('Experience', level=1)
        for exp in resume_json.get('experience'):
            p = doc.add_paragraph()
            title_company = f"{exp.get('title','')} — {exp.get('company','')}"
            p.add_run(title_company).bold = True
            if exp.get('start') or exp.get('end'):
                p.add_run(f" ({exp.get('start','')} - {exp.get('end','')})")
            bullets = exp.get('bullets', []) or []
            for b in bullets:
                doc.add_paragraph(b, style='List Bullet')

    if resume_json.get('skills'):
        doc.add_heading('Skills', level=1)
        doc.add_paragraph(', '.join(resume_json.get('skills')))

    # save to bytes
    f = io.BytesIO()
    doc.save(f)
    f.seek(0)
    return f.read()


# ---------------------- Streamlit UI ----------------------

st.set_page_config(page_title="CV Creation using LLMs — Demo", layout="wide")
st.title("CV Creation using LLMs — Minimal Demo")

col1, col2 = st.columns([1,2])

with col1:
    st.header("1) Upload resume")
    uploaded = st.file_uploader("Upload PDF or DOCX resume", type=["pdf","docx"]) 
    if uploaded:
        raw = uploaded.read()
        if uploaded.type == "application/pdf" or uploaded.name.lower().endswith('.pdf'):
            resume_text = pdf_to_text(raw)
        else:
            resume_text = docx_to_text(raw)
        st.text_area("Extracted resume text (editable)", value=resume_text, height=300, key='res_text')
    else:
        resume_text = st.session_state.get('res_text', '')

    st.header("2) Paste job description")
    jd_text = st.text_area("Job description", height=200)

    run_extract = st.button("Extract Resume -> JSON")
    if run_extract:
        if not resume_text.strip():
            st.error("Please upload a resume first.")
        else:
            with st.spinner("Calling local LLM to extract resume JSON..."):
                res_json = extract_resume_json(resume_text)
                st.session_state['resume_json'] = res_json
                st.success("Extraction complete — check JSON on the right.")

    run_parse_jd = st.button("Parse Job Description -> JSON")
    if run_parse_jd:
        if not jd_text.strip():
            st.error("Please paste job description text.")
        else:
            with st.spinner("Parsing job description with LLM..."):
                job_json = parse_jd(jd_text)
                st.session_state['job_json'] = job_json
                st.success("Job parsed — check JSON on the right.")

    run_tailor = st.button("Tailor Resume to Job")
    if run_tailor:
        if 'resume_json' not in st.session_state:
            st.error("Run resume extraction first.")
        elif 'job_json' not in st.session_state:
            st.error("Parse the job description first.")
        else:
            with st.spinner("Generating tailored resume..."):
                tailored = tailor_resume(st.session_state['resume_json'], st.session_state['job_json'])
                st.session_state['tailored_json'] = tailored
                st.success("Tailoring complete — download on the right.")

with col2:
    st.header("JSON outputs & Download")
    st.subheader("Extracted Resume JSON")
    st.code(json.dumps(st.session_state.get('resume_json', {}), indent=2, ensure_ascii=False))

    st.subheader("Parsed Job JSON")
    st.code(json.dumps(st.session_state.get('job_json', {}), indent=2, ensure_ascii=False))

    st.subheader("Tailored Resume JSON")
    st.code(json.dumps(st.session_state.get('tailored_json', {}), indent=2, ensure_ascii=False))

    if st.session_state.get('tailored_json'):
        st.download_button("Download tailored CV (DOCX)", data=create_docx(st.session_state['tailored_json']), file_name="tailored_cv.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document")

    st.markdown("---")
    st.markdown("**Tips & next steps:**\n\n- For production, add verification UI so users confirm dates and claims before finalizing.\n- Replace zero-temp LLM calls with controlled prompting and add unit tests for parser outputs.\n- Add ATS-simulated scoring and a lightweight UI to highlight missing keywords.")

st.markdown("---")
st.caption("This demo calls a local Ollama instance. If you get errors referencing Ollama, make sure Ollama is running and the model is pulled. See app header comments for requirements.")


StreamlitSecretNotFoundError: No secrets found. Valid paths for a secrets.toml file or secret directories are: C:\Users\LENOVO\.streamlit\secrets.toml, c:\Users\LENOVO\Documents\1.Personal\0.Projects\Github\IIT-Kharagpur-AI4ICPS-Certificate-Programme\HAAI++\Week-9\.streamlit\secrets.toml