In [1]:
!pip install pymupdf

Collecting pymupdf
  Downloading pymupdf-1.26.7-cp310-abi3-macosx_11_0_arm64.whl.metadata (3.4 kB)
Downloading pymupdf-1.26.7-cp310-abi3-macosx_11_0_arm64.whl (22.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.5/22.5 MB[0m [31m29.3 MB/s[0m  [33m0:00:00[0mm0:00:01[0m00:01[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.26.7

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [14]:
import fitz  # PyMuPDF
import re

def clean_text_for_pdf(text):
    """
    Replaces characters that often break standard PDF fonts.
    """
    replacements = {
        "\u2013": "-",    # En-dash to hyphen
        "\u2014": "-",    # Em-dash to hyphen
        "\u2018": "'",    # Smart single quote left
        "\u2019": "'",    # Smart single quote right
        "\u201c": '"',    # Smart double quote left
        "\u201d": '"',    # Smart double quote right
        "\u2022": "*",    # Bullet point char
        "**": "",         # REMOVE BOLD MARKERS entirely
    }
    for char, replacement in replacements.items():
        text = text.replace(char, replacement)
    return text

def parse_resume_robust(raw_text):
    lines = raw_text.split('\n')

    data = {
        "header": [], 
        "sections": {}
    }
    
    KNOWN_HEADERS = [
        "summary", "experience", "education", "skills", 
        "projects", "languages", "certifications", "technical skills"
    ]
    
    current_section = "header"
    
    for line in lines:
        line = line.strip()
        if not line:
            continue

        # Check for headers
        clean_check = line.replace("*", "").replace(":", "").strip().lower()
        
        if "important considerations" in clean_check:
            break
            
        if clean_check == "plan" or clean_check == "resume":
            continue

        is_new_section = False
        section_name = ""
        
        if clean_check in KNOWN_HEADERS:
            is_new_section = True
            section_name = line.replace("*", "").replace(":", "").strip()
        elif line.startswith("**") and line.endswith("**") and len(clean_check) < 40:
            is_new_section = True
            section_name = line.replace("*", "").replace(":", "").strip()
            
        if is_new_section:
            current_section = section_name
            data["sections"][current_section] = []
        else:
            if current_section == "header":
                # Filter out junk lines in header
                if line.startswith("*") or line.startswith("-"):
                    continue
                if "plan:" in line.lower():
                    continue
                data["header"].append(line)
            else:
                data["sections"][current_section].append(line)
                
    return data

def create_resume_pdf(parsed_data, output_filename="generated_resume.pdf"):
    doc = fitz.open()
    page = doc.new_page()
    width, height = page.rect.width, page.rect.height
    
    margin_left = 50
    margin_right = 50
    margin_top = 50
    y_position = margin_top
    
    font_reg = "Helvetica"
    font_bold = "Helvetica-Bold"

    def check_page_break(needed_height):
        nonlocal y_position, page
        if y_position + needed_height > height - 50:
            page = doc.new_page()
            y_position = margin_top

    def write_text(text, size, font, is_bullet=False, indent=0):
        nonlocal y_position
        
        # 1. CLEAN THE TEXT before writing to fix '?' and '**'
        text = clean_text_for_pdf(text)
        
        rect = fitz.Rect(margin_left + indent, y_position, width - margin_right, height - 50)
        
        check_page_break(20)
        
        try:
            rc = page.insert_textbox(rect, text, fontsize=size, fontname=font, align=0)
        except:
            rc = page.insert_textbox(rect, text, fontsize=size, align=0)
        
        # Calculate rough height usage
        line_length = (width - margin_left - margin_right - indent) / (size * 0.5)
        # Avoid division by zero if text is empty
        if len(text) == 0: 
            lines_count = 1
        else:
            lines_count = (len(text) / line_length) + 1
            
        height_inc = lines_count * size * 1.4
        y_position += height_inc + 1

    # --- RENDER HEADER ---
    if parsed_data["header"]:
        # Name
        write_text(parsed_data["header"][0], 18, font_bold)
        y_position += 1
        # Contact info
        contact_info = " | ".join(parsed_data["header"][1:])
        write_text(contact_info, 10, font_reg)
        y_position += 5

    # --- RENDER SECTIONS ---
    for section, lines in parsed_data["sections"].items():
        check_page_break(30)

        # Section Title
        write_text(section.upper(), 12, font_bold)
        page.draw_line((margin_left, y_position-2), (width - margin_right, y_position-2))
        y_position += 2 # Small space after line
        
        for line in lines:
            clean_line = line.strip()
            
            # Logic to handle formatting
            if "|" in clean_line: 
                # Job Titles / Education lines
                # The clean_text_for_pdf function inside write_text will strip the ** automatically
                write_text(clean_line, 10, font_bold)
                
            elif clean_line.startswith("*") or clean_line.startswith("-"):
                # Bullet points
                # Remove the leading *, -, and space
                txt = clean_line.lstrip("*- ").strip()
                write_text(f"• {txt}", 10, font_reg, indent=10)
                
            elif clean_line.startswith("**"):
                # Sub-headers (like bolded items)
                write_text(clean_line, 10, font_bold)
                
            else:
                # Normal text
                write_text(clean_line, 10, font_reg)
        
        y_position += 5 # Space between sections

    doc.save(output_filename)
    print(f"PDF Generated: {output_filename}")




raw_input_text = """
**Plan:**

*   Highlight Python, scikit-learn, and PyTorch as core technical skills.
*   Emphasize mathematical foundation and its relevance to ML.
*   Showcase contribution to the CV writing project to demonstrate practical application.
*   Mention UCU CS education to establish a solid academic background.

**Resume:**

[Your Name]
[Your City, State] | [Your Phone Number] | [Your Email Address] | [Your LinkedIn Profile URL (Optional)] | [Your GitHub Profile URL (Optional)]

**Summary**

Results-oriented Machine Learning Engineer with one year of experience developing and implementing machine learning solutions using Python, scikit-learn, and PyTorch.  Possessing a strong mathematical foundation and demonstrated ability to apply machine learning techniques to solve real-world problems.  Eager to contribute to innovative projects and further develop expertise in the field.

**Experience**

**Machine Learning Engineer** | [Company Name - If Applicable, Otherwise Remove] | [City, State] | [Month, Year] – [Present]

*   Developed and evaluated machine learning models using Python, scikit-learn, and PyTorch for [briefly mention project type if available, otherwise omit].
*   Leveraged a strong mathematical background to optimize model performance and interpret results effectively.
*   Contributed to a CV writing project, applying machine learning techniques to [specify tasks performed, e.g., automated skill extraction, resume scoring].
*   [Add another bullet if you have more specific accomplishments from the role. Example: Implemented data preprocessing pipelines for improved model accuracy.]

**Education**

**Bachelor of Computer Science** | Ukrainian Catholic University (UCU) | Lviv, Ukraine | [Year of Graduation]

*   Focused on [mention relevant coursework, e.g., algorithms, data structures, linear algebra, probability and statistics].
*   [Optional: Mention relevant projects or thesis topic if impactful]

**Skills**

*   **Programming Languages:** Python
*   **Machine Learning Libraries:** scikit-learn, PyTorch, Pandas, NumPy
*   **Mathematics:** Linear Algebra, Calculus, Probability & Statistics
*   **Data Manipulation:** Data Cleaning, Feature Engineering
*   **Tools:** Git, [Any other relevant tools you used]

**Projects**

*   **CV Writing Project:** Developed and implemented [briefly describe the machine learning techniques used and the results achieved in the CV writing project]. [Link to GitHub repo if available]



**Important Considerations:**

*   **Replace placeholders:** Fill in the bracketed information with your specific details.
*   **Quantify achievements:** Whenever possible, try to quantify your achievements with numbers (e.g., "Improved model accuracy by X%").
*   **Tailor to the job description:**  Carefully review the job description for the specific role you're applying for and adjust the resume accordingly.  Highlight the skills and experiences that are most relevant to the position.
*   **GitHub/Portfolio:**  If you have a GitHub profile or online portfolio showcasing your projects, include the link.
*   **Formatting:** Ensure the resume is well-formatted, easy to read, and ATS-friendly (avoiding tables or complex layouts).  Use clear section headings and consistent formatting throughout.

"""

if __name__ == "__main__":
    data = parse_resume_robust(raw_input_text)

    print("Detected Header Lines:", len(data['header']))
    print("Detected Sections:", list(data['sections'].keys()))
    
    if not data['sections']:
        print("WARNING: No sections found. Check if your text uses standard headers like 'Experience' or 'Summary'.")
    else:
        create_resume_pdf(data)

Detected Header Lines: 2
Detected Sections: ['Summary', 'Experience', 'Education', 'Skills', 'Projects']
PDF Generated: generated_resume.pdf
