<a href="https://colab.research.google.com/github/vaibhavjain2005/ResumeAiGenModel/blob/main/final_resume.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
"""
T5 Fine-Tuning Script for Resume Generation (Stable Version)
Optimized for Google Colab Free Tier
- FP32 precision for stability
- Gradient accumulation
- Safe checkpoints and NaN protection
"""

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import json
import torch

try:
    from transformers import (
        T5Tokenizer,
        T5ForConditionalGeneration,
        Trainer,
        TrainingArguments,
        DataCollatorForSeq2Seq
    )
    from datasets import Dataset
except Exception as e:
    print("\n" + "="*70)
    print("ERROR: Missing libraries. Run this first:")
    print("!pip install --upgrade transformers datasets torch accelerate sentencepiece")
    print("Then restart runtime and re-run this cell.")
    print("="*70)
    raise

print("="*70)
print("T5 Resume Fine-Tuning Script (Stable)")
print("Optimized for Google Colab Free Tier")
print("="*70)

# ------------------------------------------------------------
# GPU Check
# ------------------------------------------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"\n✓ Using device: {device}")
if device == "cuda":
    print(f"✓ GPU: {torch.cuda.get_device_name(0)}")
    print(f"✓ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

torch.autograd.set_detect_anomaly(False)

# ------------------------------------------------------------
# CONFIGURATION
# ------------------------------------------------------------
MODEL_NAME = "google/flan-t5-base"       # Stable for Colab Free Tier
OUTPUT_DIR = "./flan-t5-base-resume-finetuned-stable"
BATCH_SIZE = 2
MAX_INPUT_LENGTH = 128
MAX_OUTPUT_LENGTH = 256
LEARNING_RATE = 2e-4  # safer and more stable

print(f"\n⚙️  MODEL: {MODEL_NAME}")
print(f"✓ Max input length: {MAX_INPUT_LENGTH} tokens")
print(f"✓ Max output length: {MAX_OUTPUT_LENGTH} tokens")

# ------------------------------------------------------------
# LOAD DATASETS
# ------------------------------------------------------------
print("\n" + "-"*70)
print("Loading dataset...")
print("-"*70)

with open("train_data.json", "r") as f:
    train_data = json.load(f)
with open("val_data.json", "r") as f:
    val_data = json.load(f)

print(f"✓ Training examples: {len(train_data)}")
print(f"✓ Validation examples: {len(val_data)}")

# ------------------------------------------------------------
# LOAD MODEL & TOKENIZER
# ------------------------------------------------------------
print("\n" + "-"*70)
print("Loading model and tokenizer...")
print("-"*70)

tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME)

print(f"✓ Model parameters: {model.num_parameters() / 1e6:.1f}M")

# ------------------------------------------------------------
# TOKENIZATION FUNCTION
# ------------------------------------------------------------
def preprocess_function(examples):
    inputs = examples["input"]
    targets = examples["output"]

    model_inputs = tokenizer(
        inputs,
        max_length=MAX_INPUT_LENGTH,
        truncation=True,
        padding=False
    )

    labels = tokenizer(
        targets,
        max_length=MAX_OUTPUT_LENGTH,
        truncation=True,
        padding=False
    )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

train_dataset = Dataset.from_list(train_data)
val_dataset = Dataset.from_list(val_data)

print("\n" + "-"*70)
print("Tokenizing dataset...")
print("-"*70)

train_dataset = train_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=["input", "output"]
)
val_dataset = val_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=["input", "output"]
)

print("✓ Tokenization complete")

# ------------------------------------------------------------
# DATA COLLATOR
# ------------------------------------------------------------
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True
)

# ------------------------------------------------------------
# TRAINING CONFIGURATION (Safe FP32 Mode)
# ------------------------------------------------------------
print("\n" + "-"*70)
print("Configuring training (FP32 safe mode)...")
print("-"*70)

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,

    num_train_epochs=8,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE * 2,
    gradient_accumulation_steps=8 // BATCH_SIZE,  # effective batch size = 8

    learning_rate=LEARNING_RATE,
    warmup_steps=100,
    weight_decay=0.01,
    max_grad_norm=1.0,

    fp16=False,  # <--- Disabled FP16 to fix NaN loss
    fp16_full_eval=False,

    eval_strategy="steps",
    eval_steps=100,
    save_steps=100,
    save_total_limit=3,

    logging_steps=50,
    logging_dir="./logs",
    logging_first_step=True,

    gradient_checkpointing=True,
    optim="adafactor",

    dataloader_num_workers=0,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,

    report_to="none",
    push_to_hub=False,
    seed=42,
)

print("Training Configuration:")
print(f"  • Model: {MODEL_NAME}")
print(f"  • Learning rate: {LEARNING_RATE}")
print(f"  • FP16: {training_args.fp16} (disabled for stability)")
print(f"  • Batch size: {training_args.per_device_train_batch_size}")
print(f"  • Gradient accumulation: {training_args.gradient_accumulation_steps}")

# ------------------------------------------------------------
# TRAINING
# ------------------------------------------------------------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

print("\n" + "="*70)
print("Starting training (FP32 mode)...")
print("="*70)
print("\nTip: Checkpoints will save every 100 steps.\n")

try:
    trainer.train()
    print("\n" + "="*70)
    print("✓ Training completed successfully!")
    print("="*70)
except KeyboardInterrupt:
    print("\nTraining interrupted by user. Checkpoint saved.")
except Exception as e:
    print("\n⚠ Training stopped due to error:")
    print(e)
    print("You can resume from the last checkpoint by re-running trainer.train().")

# ------------------------------------------------------------
# SAVE MODEL
# ------------------------------------------------------------
print("\n" + "-"*70)
print("Saving final model...")
print("-"*70)

final_dir = f"{OUTPUT_DIR}/final_model"
model.save_pretrained(final_dir)
tokenizer.save_pretrained(final_dir)
print(f"✓ Model saved to: {final_dir}")

# ------------------------------------------------------------
# EVALUATION
# ------------------------------------------------------------
print("\n" + "-"*70)
print("Running final evaluation...")
print("-"*70)

eval_results = trainer.evaluate()
print("\nValidation Results:")
for key, value in eval_results.items():
    print(f"  • {key}: {value:.4f}")

final_loss = eval_results.get('eval_loss', 999)
print("\n" + "="*70)
print("Training Quality Assessment:")
print("="*70)
if final_loss < 0.5:
    print("✓ EXCELLENT: Model trained very well (loss < 0.5)")
elif final_loss < 1.0:
    print("✓ GOOD: Model trained well (loss < 1.0)")
elif final_loss < 1.5:
    print("⚠ FAIR: Somewhat trained (loss < 1.5)")
else:
    print("❌ POOR: Model may need more epochs or tuning.")
print(f"\nFinal validation loss: {final_loss:.4f}")
print("="*70)

# ------------------------------------------------------------
# QUICK TEST INFERENCE
# ------------------------------------------------------------
print("\n" + "-"*70)
print("Quick Test Inference:")
print("-"*70)

test_input = "Rewrite professionally: Built web applications using Python"
inputs = tokenizer(test_input, return_tensors="pt", max_length=MAX_INPUT_LENGTH, truncation=True)
inputs = {k: v.to(device) for k, v in inputs.items()}

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_length=MAX_OUTPUT_LENGTH,
        num_beams=4,
        early_stopping=True,
        no_repeat_ngram_size=3
    )

result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"\nInput: {test_input}")
print(f"Output: {result}")

print("\n✓ All done! Your fine-tuned model is ready (FP32 stable mode).")


T5 Resume Fine-Tuning Script (Stable)
Optimized for Google Colab Free Tier

✓ Using device: cuda
✓ GPU: Tesla T4
✓ GPU Memory: 15.83 GB

⚙️  MODEL: google/flan-t5-base
✓ Max input length: 128 tokens
✓ Max output length: 256 tokens

----------------------------------------------------------------------
Loading dataset...
----------------------------------------------------------------------
✓ Training examples: 400
✓ Validation examples: 100

----------------------------------------------------------------------
Loading model and tokenizer...
----------------------------------------------------------------------


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


✓ Model parameters: 247.6M

----------------------------------------------------------------------
Tokenizing dataset...
----------------------------------------------------------------------


Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

✓ Tokenization complete

----------------------------------------------------------------------
Configuring training (FP32 safe mode)...
----------------------------------------------------------------------
Training Configuration:
  • Model: google/flan-t5-base
  • Learning rate: 0.0002
  • FP16: False (disabled for stability)
  • Batch size: 2
  • Gradient accumulation: 4


  trainer = Trainer(



Starting training (FP32 mode)...

Tip: Checkpoints will save every 100 steps.



`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
100,0.8354,0.210916
200,0.1419,0.047194
300,0.066,0.043868
400,0.0442,0.044552


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].



✓ Training completed successfully!

----------------------------------------------------------------------
Saving final model...
----------------------------------------------------------------------
✓ Model saved to: ./flan-t5-base-resume-finetuned-stable/final_model

----------------------------------------------------------------------
Running final evaluation...
----------------------------------------------------------------------



Validation Results:
  • eval_loss: 0.0439
  • eval_runtime: 1.1090
  • eval_samples_per_second: 90.1750
  • eval_steps_per_second: 22.5440
  • epoch: 8.0000

Training Quality Assessment:
✓ EXCELLENT: Model trained very well (loss < 0.5)

Final validation loss: 0.0439

----------------------------------------------------------------------
Quick Test Inference:
----------------------------------------------------------------------

Input: Rewrite professionally: Built web applications using Python
Output: Developed full-stack web platform leveraging Python and MySQL database that increased user engagement by 45% and reduced load times by 60%

✓ All done! Your fine-tuned model is ready (FP32 stable mode).


In [2]:
"""
Test Script for Fine-Tuned T5 Resume Model

Tests the model on various resume generation tasks.
This script is specifically designed to load a model fine-tuned from
Hugging Face's T5/FLAN-T5 architecture.
"""

import os
# Suppress TensorFlow logging if used, though this script uses PyTorch
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import torch

try:
    # We use T5 classes as Flan-T5 uses the same architecture and tokenization
    from transformers import T5Tokenizer, T5ForConditionalGeneration
except Exception as e:
    print("\n" + "="*70)
    print("ERROR: Failed to import transformers")
    print("="*70)
    print(f"\nError: {e}\n")
    print("Please ensure the 'transformers' and 'torch' libraries are installed.")
    print("="*70)
    raise

print("="*70)
print("T5/Flan-T5 Resume Model Testing")
print("="*70)

# Configuration
# IMPORTANT: This path MUST point to the directory where your fine-tuned model
# (including pytorch_model.bin and tokenizer_config.json) is saved.
MODEL_PATH = "./flan-t5-base-resume-finetuned-stable/final_model"
MAX_INPUT_LENGTH = 128
MAX_OUTPUT_LENGTH = 256

# Check if model exists
if not os.path.exists(MODEL_PATH) or not os.path.exists(os.path.join(MODEL_PATH, "pytorch_model.bin")):
    print(f"\n❌ Error: Model files not found in the expected directory: {MODEL_PATH}")
    print("\nPlease verify the path or run the training script first.")
    exit(1)

# Load model
print(f"\nLoading model from: {MODEL_PATH}")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}\n")

# Use T5Tokenizer for loading Flan-T5 models
tokenizer = T5Tokenizer.from_pretrained(MODEL_PATH)
model = T5ForConditionalGeneration.from_pretrained(MODEL_PATH)
model.to(device)
model.eval()

print("✓ Model loaded successfully\n")

# Test examples
test_cases = [
    {
        "category": "Professional Summary (Software)",
        "input": "Create professional summary: Software Engineering, 5 years, Python, AWS, Docker"
    },
    {
        "category": "Professional Summary (Data Science)",
        "input": "Create professional summary: Data Science, 7 years, Machine Learning, Python, TensorFlow"
    },
    {
        "category": "Experience Bullet (Development)",
        "input": "Rewrite professionally: Built web applications using Python, Django, PostgreSQL"
    },
    {
        "category": "Experience Bullet (Analysis)",
        "input": "Rewrite professionally: Analyzed data using Python, SQL, Tableau"
    },
    {
        "category": "Experience Bullet (Management)",
        "input": "Rewrite professionally: Managed team using Agile, Scrum"
    },
    {
        "category": "Project Description (Web)",
        "input": "Enhance project: E-commerce Platform using React, Node.js, MongoDB"
    },
    {
        "category": "Project Description (ML)",
        "input": "Enhance project: Machine Learning Model using Python, TensorFlow"
    },
]

print("="*70)
print("Running Test Cases")
print("="*70)

for i, test in enumerate(test_cases, 1):
    print(f"\n{'─'*70}")
    print(f"Test {i}/{len(test_cases)}: {test['category']}")
    print(f"{'─'*70}")
    print(f"Input:\n  {test['input']}")

    # Tokenize input
    inputs = tokenizer(
        test['input'],
        return_tensors="pt",
        max_length=MAX_INPUT_LENGTH,
        truncation=True
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Generate output
    # Recommended generation parameters for T5-based models:
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=MAX_OUTPUT_LENGTH,
            num_beams=4,
            early_stopping=True,
            no_repeat_ngram_size=3,
            length_penalty=1.2
        )

    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"\nOutput:\n  {result}")

print("\n" + "="*70)
print("Testing Complete!")
print("="*70)

# Interactive mode
print("\n" + "─"*70)
print("Interactive Mode")
print("─"*70)
print("\nYou can now test your own prompts.")
print("Type 'quit' to exit.\n")

while True:
    user_input = input("Enter your prompt: ").strip()

    if user_input.lower() in ['quit', 'exit', 'q']:
        print("\nGoodbye!")
        break

    if not user_input:
        continue

    inputs = tokenizer(
        user_input,
        return_tensors="pt",
        max_length=MAX_INPUT_LENGTH,
        truncation=True
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    try:
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=MAX_OUTPUT_LENGTH,
                num_beams=4,
                early_stopping=True,
                no_repeat_ngram_size=3,
                length_penalty=1.2
            )

        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"\nGenerated:\n{result}\n")
    except Exception as e:
        print(f"\nAn error occurred during generation: {e}\n")



T5/Flan-T5 Resume Model Testing

❌ Error: Model files not found in the expected directory: ./flan-t5-base-resume-finetuned-stable/final_model

Please verify the path or run the training script first.

Loading model from: ./flan-t5-base-resume-finetuned-stable/final_model
Using device: cuda

✓ Model loaded successfully

Running Test Cases

──────────────────────────────────────────────────────────────────────
Test 1/7: Professional Summary (Software)
──────────────────────────────────────────────────────────────────────
Input:
  Create professional summary: Software Engineering, 5 years, Python, AWS, Docker

Output:
  Results-driven Software Engineering professional with 5+ years of experience leveraging Python and AWS to deliver innovative solutions. Proven track record of improving efficiency and driving business outcomes through technical expertise.

──────────────────────────────────────────────────────────────────────
Test 2/7: Professional Summary (Data Science)
──────────────────

KeyboardInterrupt: Interrupted by user

In [6]:
# STEP 1: Install dependencies (run this in a cell first)

!pip install flask pyngrok transformers torch sentencepiece reportlab -q


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m72.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [7]:

import io
from flask import Flask, request, jsonify, send_file
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
from reportlab.lib.enums import TA_LEFT, TA_CENTER
from reportlab.lib import colors
from datetime import datetime
import json

In [12]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# STEP 2: Load the improved model
print("Loading FLAN-T5-Large model... (better quality, still free)")
model_path = "./flan-t5-base-resume-finetuned-stable/final_model"  # Upgraded from base for better results

# Use T5Tokenizer and T5ForConditionalGeneration directly
tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

print("✅ Model loaded successfully!")




Loading FLAN-T5-Large model... (better quality, still free)
✅ Model loaded successfully!


In [14]:
 #AI Functions for Resume Generation
def extract_keywords_from_job(job_description):
    """Extract important keywords from job description"""
    prompt = f"""Extract key skills and technologies from this job posting. List only the most important ones as comma-separated keywords.

Job Description: {job_description}

Important keywords:"""

    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(
        **inputs,
        max_length=100,
        num_beams=4,
        no_repeat_ngram_size=2,
        early_stopping=True
    )
    keywords = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return keywords.strip()


def tailor_resume_experience(experience_text, keywords):
    """Rewrite experience bullet to match job requirements"""
    prompt = f"""Rewrite this work experience bullet point to be professional and impactful. Use action verbs and be specific.

Original: {experience_text}

Improved bullet point:"""

    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(
        **inputs,
        max_length=100,
        num_beams=4,
        no_repeat_ngram_size=2,
        early_stopping=True
    )
    improved = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # If the output is too similar to input or just keywords, return improved original
    if len(improved) < 10 or improved == keywords:
        return experience_text
    return improved.strip()


def tailor_project_description(project_desc, project_tech, keywords):
    """Enhance project description to highlight relevant skills"""
    prompt = f"""Rewrite this project description to be more professional and highlight technical achievements.

Project: {project_desc}
Technologies: {project_tech}

Enhanced description:"""

    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(
        **inputs,
        max_length=100,
        num_beams=4,
        no_repeat_ngram_size=2,
        early_stopping=True
    )
    improved = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # If output is too short or generic, return original
    if len(improved) < 15:
        return project_desc
    return improved.strip()


def generate_professional_summary(user_data, keywords, job_description):
    """Generate a professional summary tailored to the job"""
    # Get actual experience details
    exp_count = len(user_data.get('experience', []))
    skills = user_data.get('skills', [])[:6]

    # Get first job title if available
    first_role = "Professional"
    if user_data.get('experience') and len(user_data['experience']) > 0:
        first_role = user_data['experience'][0].get('title', 'Professional')

    prompt = f"""Write a professional resume summary for someone with these qualifications:
Role: {first_role}
Years of experience: {exp_count} positions
Skills: {', '.join(skills)}

Write a 2-3 sentence professional summary:"""

    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(
        **inputs,
        max_length=120,
        num_beams=4,
        no_repeat_ngram_size=2,
        early_stopping=True,
        length_penalty=1.0
    )
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Fallback if AI generates poor summary
    if len(summary) < 30 or "job requirements" in summary.lower():
        summary = f"Results-driven {first_role} with experience in {', '.join(skills[:3])}. Proven track record of delivering high-quality solutions and collaborating with cross-functional teams."

    return summary.strip()



In [15]:
# PDF Generation Functions
def create_professional_resume_pdf(resume_data):
    """Generate a professional, ATS-friendly PDF resume"""
    buffer = io.BytesIO()
    doc = SimpleDocTemplate(buffer, pagesize=letter,
                           rightMargin=0.75*inch, leftMargin=0.75*inch,
                           topMargin=0.75*inch, bottomMargin=0.75*inch)

    story = []
    styles = getSampleStyleSheet()

    # Custom styles
    title_style = ParagraphStyle(
        'CustomTitle',
        parent=styles['Heading1'],
        fontSize=24,
        textColor=colors.HexColor('#2C3E50'),
        spaceAfter=6,
        alignment=TA_CENTER,
        fontName='Helvetica-Bold'
    )

    contact_style = ParagraphStyle(
        'Contact',
        parent=styles['Normal'],
        fontSize=10,
        textColor=colors.HexColor('#34495E'),
        alignment=TA_CENTER,
        spaceAfter=20
    )

    section_header_style = ParagraphStyle(
        'SectionHeader',
        parent=styles['Heading2'],
        fontSize=14,
        textColor=colors.HexColor('#2C3E50'),
        spaceAfter=12,
        spaceBefore=12,
        fontName='Helvetica-Bold',
        borderWidth=1,
        borderColor=colors.HexColor('#3498DB'),
        borderPadding=5,
        backColor=colors.HexColor('#ECF0F1')
    )

    body_style = ParagraphStyle(
        'CustomBody',
        parent=styles['Normal'],
        fontSize=10,
        textColor=colors.HexColor('#2C3E50'),
        spaceAfter=6,
        leading=14
    )

    # Personal Information
    personal = resume_data.get('personal_info', {})
    story.append(Paragraph(personal.get('name', 'Your Name'), title_style))

    contact_info = f"{personal.get('email', '')} | {personal.get('phone', '')} | {personal.get('location', '')}"
    if personal.get('linkedin'):
        contact_info += f" | LinkedIn: {personal.get('linkedin')}"
    story.append(Paragraph(contact_info, contact_style))

    # Professional Summary
    if resume_data.get('professional_summary'):
        story.append(Paragraph("PROFESSIONAL SUMMARY", section_header_style))
        story.append(Paragraph(resume_data['professional_summary'], body_style))
        story.append(Spacer(1, 0.2*inch))

    # Skills
    if resume_data.get('skills'):
        story.append(Paragraph("SKILLS", section_header_style))
        skills_text = " • ".join(resume_data['skills'])
        story.append(Paragraph(skills_text, body_style))
        story.append(Spacer(1, 0.2*inch))

    # Experience
    if resume_data.get('experience'):
        story.append(Paragraph("PROFESSIONAL EXPERIENCE", section_header_style))
        for exp in resume_data['experience']:
            job_title = f"<b>{exp.get('title', 'Position')}</b> | {exp.get('company', 'Company')}"
            story.append(Paragraph(job_title, body_style))

            duration = f"<i>{exp.get('start_date', '')} - {exp.get('end_date', 'Present')} | {exp.get('location', '')}</i>"
            story.append(Paragraph(duration, body_style))
            story.append(Spacer(1, 0.1*inch))

            for bullet in exp.get('bullets', []):
                bullet_text = f"• {bullet}"
                story.append(Paragraph(bullet_text, body_style))

            story.append(Spacer(1, 0.15*inch))

    # Projects
    if resume_data.get('projects'):
        story.append(Paragraph("PROJECTS", section_header_style))
        for project in resume_data['projects']:
            project_title = f"<b>{project.get('name', 'Project')}</b>"
            if project.get('technologies'):
                project_title += f" | <i>{project.get('technologies')}</i>"
            story.append(Paragraph(project_title, body_style))

            if project.get('description'):
                story.append(Paragraph(f"• {project['description']}", body_style))

            story.append(Spacer(1, 0.1*inch))

    # Education
    if resume_data.get('education'):
        story.append(Paragraph("EDUCATION", section_header_style))
        for edu in resume_data['education']:
            edu_text = f"<b>{edu.get('degree', 'Degree')}</b> | {edu.get('institution', 'Institution')}"
            story.append(Paragraph(edu_text, body_style))

            edu_details = f"<i>{edu.get('graduation_date', '')} | GPA: {edu.get('gpa', 'N/A')}</i>"
            story.append(Paragraph(edu_details, body_style))
            story.append(Spacer(1, 0.1*inch))

    # Build PDF
    doc.build(story)
    buffer.seek(0)
    return buffer



In [16]:
# Flask API
app = Flask(__name__)

@app.route("/")
def home():
    return """
    <h1>🚀 Enhanced Resume Generator API with PDF Export</h1>
    <p>API is running! Generate professional resumes with AI.</p>
    <h3>Endpoints:</h3>
    <ul>
        <li><b>POST /api/analyze-job</b> - Extract keywords from job description</li>
        <li><b>POST /api/generate-resume</b> - Generate complete tailored resume (JSON)</li>
        <li><b>POST /api/generate-resume-pdf</b> - Generate and download PDF resume</li>
    </ul>
    <p>Model: FLAN-T5-Large (Improved Quality)</p>
    """


@app.route("/api/analyze-job", methods=["POST"])
def analyze_job():
    """Extract keywords from job description"""
    try:
        data = request.json
        job_description = data.get("job_description", "")

        if not job_description:
            return jsonify({"error": "job_description is required"}), 400

        keywords = extract_keywords_from_job(job_description)

        return jsonify({
            "success": True,
            "keywords": keywords,
            "keywords_list": [k.strip() for k in keywords.split(",") if k.strip()]
        })

    except Exception as e:
        return jsonify({"error": str(e)}), 500


@app.route("/api/generate-resume", methods=["POST"])
def generate_resume():
    """Generate a complete tailored resume (JSON response)"""
    try:
        data = request.json

        job_description = data.get("job_description", "")
        personal_info = data.get("personal_info", {})
        education = data.get("education", [])
        experience = data.get("experience", [])
        projects = data.get("projects", [])
        skills = data.get("skills", [])

        if not job_description:
            return jsonify({"error": "job_description is required"}), 400

        # Extract keywords
        keywords = extract_keywords_from_job(job_description)
        keywords_list = [k.strip() for k in keywords.split(",") if k.strip()]

        # Generate professional summary
        user_background = {
            "experience": experience,
            "skills": skills
        }
        professional_summary = generate_professional_summary(user_background, keywords, job_description)

        # Tailor experience bullets
        tailored_experience = []
        for exp in experience:
            tailored_bullets = []
            for bullet in exp.get("bullets", []):
                # Only tailor if bullet is substantial
                if len(bullet) > 10:
                    improved = tailor_resume_experience(bullet, keywords)
                    tailored_bullets.append(improved)
                else:
                    tailored_bullets.append(bullet)

            tailored_experience.append({
                **exp,
                "bullets": tailored_bullets
            })

        # Tailor project descriptions
        tailored_projects = []
        for project in projects:
            tailored_desc = project.get("description", "")
            if len(tailored_desc) > 15:
                tailored_desc = tailor_project_description(
                    project.get("description", ""),
                    project.get("technologies", ""),
                    keywords
                )

            tailored_projects.append({
                **project,
                "description": tailored_desc
            })

        # Calculate match score
        match_score = 0
        if keywords_list and skills:
            matched = sum(1 for k in keywords_list if any(k.lower() in s.lower() for s in skills))
            match_score = (matched / len(keywords_list)) * 100

        resume_data = {
            "success": True,
            "keywords_extracted": keywords_list,
            "professional_summary": professional_summary,
            "personal_info": personal_info,
            "education": education,
            "experience": tailored_experience,
            "projects": tailored_projects,
            "skills": skills,
            "match_score": round(match_score, 1)
        }

        return jsonify(resume_data)

    except Exception as e:
        return jsonify({"error": str(e)}), 500


@app.route("/api/generate-resume-pdf", methods=["POST"])
def generate_resume_pdf():
    """Generate and download professional PDF resume"""
    try:
        data = request.json

        job_description = data.get("job_description", "")
        personal_info = data.get("personal_info", {})
        education = data.get("education", [])
        experience = data.get("experience", [])
        projects = data.get("projects", [])
        skills = data.get("skills", [])

        if not job_description:
            return jsonify({"error": "job_description is required"}), 400

        # Generate tailored resume content
        keywords = extract_keywords_from_job(job_description)
        user_background = {"experience": experience, "skills": skills}
        professional_summary = generate_professional_summary(user_background, keywords, job_description)

        # Tailor experience
        tailored_experience = []
        for exp in experience:
            tailored_bullets = []
            for bullet in exp.get("bullets", []):
                if len(bullet) > 10:
                    improved = tailor_resume_experience(bullet, keywords)
                    tailored_bullets.append(improved)
                else:
                    tailored_bullets.append(bullet)
            tailored_experience.append({**exp, "bullets": tailored_bullets})

        # Tailor projects
        tailored_projects = []
        for project in projects:
            tailored_desc = project.get("description", "")
            if len(tailored_desc) > 15:
                tailored_desc = tailor_project_description(
                    project.get("description", ""),
                    project.get("technologies", ""),
                    keywords
                )
            tailored_projects.append({**project, "description": tailored_desc})

        # Prepare resume data for PDF
        resume_data = {
            "professional_summary": professional_summary,
            "personal_info": personal_info,
            "education": education,
            "experience": tailored_experience,
            "projects": tailored_projects,
            "skills": skills
        }

        # Generate PDF
        pdf_buffer = create_professional_resume_pdf(resume_data)

        filename = f"Resume_{personal_info.get('name', 'User').replace(' ', '_')}_{datetime.now().strftime('%Y%m%d')}.pdf"

        return send_file(
            pdf_buffer,
            mimetype='application/pdf',
            as_attachment=True,
            download_name=filename
        )

    except Exception as e:
        return jsonify({"error": str(e)}), 500


In [17]:
from pyngrok import ngrok
import threading

# Set your ngrok auth token
ngrok.set_auth_token("34RuNCVR0o60Xl34SO01IPOJy0p_eu3CHpGA8gjg8sSYc1hR")

# Start ngrok tunnel
port = 5000
public_url = ngrok.connect(port).public_url

print("=" * 60)
print("✅ API is LIVE!")
print(f"🌐 Public URL: {public_url}")
print("=" * 60)

# Run Flask app
threading.Thread(target=app.run, kwargs={"port": port}).start()

✅ API is LIVE!
🌐 Public URL: https://machinable-skyla-consortable.ngrok-free.dev


In [20]:
import requests
import json

# Replace with the public URL from the ngrok output
# Example: api_url = "https://machinable-skyla-consortable.ngrok-free.dev/"
api_url = "https://machinable-skyla-consortable.ngrok-free.dev/"

# --- Test /api/analyze-job ---
print("="*50)
print("Testing /api/analyze-job")
print("="*50)

job_description_data = {
    "job_description": "We are looking for a skilled Software Engineer with experience in Python, Django, and AWS to join our team. Responsibilities include developing web applications and contributing to cloud infrastructure."
}

try:
    response = requests.post(f"{api_url}/api/analyze-job", json=job_description_data)
    response.raise_for_status() # Raise an exception for bad status codes
    print("Response Status Code:", response.status_code)
    print("Response JSON:")
    print(json.dumps(response.json(), indent=2))
except requests.exceptions.RequestException as e:
    print(f"Error testing /api/analyze-job: {e}")

print("\n")

# --- Test /api/generate-resume ---
print("="*50)
print("Testing /api/generate-resume (JSON output)")
print("="*50)

resume_input_data = {
    "job_description": "We are looking for a skilled Software Engineer with experience in Python, Django, and AWS to join our team. Responsibilities include developing web applications and contributing to cloud infrastructure.",
    "personal_info": {
        "name": "Jane Doe",
        "email": "jane.doe@example.com",
        "phone": "123-456-7890",
        "location": "San Francisco, CA",
        "linkedin": "linkedin.com/in/janedoe"
    },
    "education": [
        {
            "degree": "Master of Science in Computer Science",
            "institution": "University of California, Berkeley",
            "graduation_date": "May 2022",
            "gpa": "3.9"
        },
        {
            "degree": "Bachelor of Science in Electrical Engineering",
            "institution": "Stanford University",
            "graduation_date": "May 2020",
            "gpa": "3.8"
        }
    ],
    "experience": [
        {
            "title": "Software Engineer",
            "company": "Tech Solutions Inc.",
            "start_date": "June 2022",
            "end_date": "Present",
            "location": "San Francisco, CA",
            "bullets": [
                "Developed and maintained web applications using Python and Django.",
                "Managed cloud infrastructure on AWS.",
                "Collaborated with cross-functional teams."
            ]
        },
         {
            "title": "Intern",
            "company": "Data Analytics Corp.",
            "start_date": "Summer 2021",
            "end_date": "August 2021",
            "location": "San Francisco, CA",
            "bullets": [
                "Assisted in data analysis projects.",
                "Wrote scripts in Python.",
            ]
        }
    ],
    "projects": [
        {
            "name": "E-commerce Platform",
            "technologies": "React, Node.js, MongoDB",
            "description": "Built a full-stack e-commerce platform."
        },
        {
            "name": "Sentiment Analysis Tool",
            "technologies": "Python, TensorFlow",
            "description": "Developed a machine learning model for sentiment analysis."
        }
    ],
    "skills": ["Python", "Django", "AWS", "React", "Node.js", "MongoDB", "TensorFlow", "SQL", "Docker", "Kubernetes"]
}

try:
    response = requests.post(f"{api_url}/api/generate-resume", json=resume_input_data)
    response.raise_for_status() # Raise an exception for bad status codes
    print("Response Status Code:", response.status_code)
    print("Response JSON:")
    print(json.dumps(response.json(), indent=2))
except requests.exceptions.RequestException as e:
    print(f"Error testing /api/generate-resume: {e}")

print("\n")

# --- Test /api/generate-resume-pdf ---
print("="*50)
print("Testing /api/generate-resume-pdf (PDF download - requires manual check)")
print("="*50)

# This endpoint returns a PDF file. We will just check for a successful response.
try:
    response = requests.post(f"{api_url}/api/generate-resume-pdf", json=resume_input_data)
    response.raise_for_status() # Raise an exception for bad status codes
    print("Response Status Code:", response.status_code)
    print("Response Headers (check for Content-Disposition and Content-Type):")
    for header, value in response.headers.items():
        print(f"  {header}: {value}")

    # You can save the PDF content to a file to verify:
    # with open("generated_resume.pdf", "wb") as f:
    #     f.write(response.content)
    # print("\nPDF content received. You can uncomment the lines above to save it.")

except requests.exceptions.RequestException as e:
    print(f"Error testing /api/generate-resume-pdf: {e}")

print("\nTesting complete.")

Testing /api/analyze-job


INFO:werkzeug:127.0.0.1 - - [23/Oct/2025 08:51:07] "POST /api/analyze-job HTTP/1.1" 200 -


Response Status Code: 200
Response JSON:
{
  "keywords": "Engineer, Python, Django, contributing, cloud",
  "keywords_list": [
    "Engineer",
    "Python",
    "Django",
    "contributing",
    "cloud"
  ],
  "success": true
}


Testing /api/generate-resume (JSON output)


INFO:werkzeug:127.0.0.1 - - [23/Oct/2025 08:51:47] "POST /api/generate-resume HTTP/1.1" 200 -


Response Status Code: 200
Response JSON:
{
  "education": [
    {
      "degree": "Master of Science in Computer Science",
      "gpa": "3.9",
      "graduation_date": "May 2022",
      "institution": "University of California, Berkeley"
    },
    {
      "degree": "Bachelor of Science in Electrical Engineering",
      "gpa": "3.8",
      "graduation_date": "May 2020",
      "institution": "Stanford University"
    }
  ],
  "experience": [
    {
      "bullets": [
        "Developed and maintained web applications utilizing Python and Django. Improved bullet point to be professional and impactful.",
        "Enhanced managed cloud infrastructure on AWS. Improved bullet point to be professional and impactful.",
        "Enhanced collaborated with cross-functional teams. Improved collaboration speed and efficiency."
      ],
      "company": "Tech Solutions Inc.",
      "end_date": "Present",
      "location": "San Francisco, CA",
      "start_date": "June 2022",
      "title": "Softwar

INFO:werkzeug:127.0.0.1 - - [23/Oct/2025 08:52:26] "POST /api/generate-resume-pdf HTTP/1.1" 200 -


Response Status Code: 200
PDF content received and saved to Resume_Jane_Doe_20251023.pdf

Testing complete.


In [22]:
import shutil
import os
from google.colab import files

# Define the path to the directory you want to download
directory_to_download = "./flan-t5-base-resume-finetuned-stable"
zip_filename = "flan-t5-base-finetuned-stable.zip"

# Check if the directory exists
if os.path.exists(directory_to_download):
    print(f"Compressing directory: {directory_to_download}")
    # Create a zip archive of the directory
    shutil.make_archive(zip_filename.replace(".zip", ""), 'zip', directory_to_download)
    print(f"✓ Directory compressed to {zip_filename}")

    # Provide a download link for the zip file
    try:
        files.download(zip_filename)
        print(f"\n✓ Download initiated for {zip_filename}")
    except Exception as e:
        print(f"\nError initiating download: {e}")
        print(f"You can manually download the file '{zip_filename}' from the Colab file explorer.")
else:
    print(f"❌ Error: Directory not found at {directory_to_download}")

Compressing directory: ./flan-t5-base-resume-finetuned-stable
✓ Directory compressed to flan-t5-base-finetuned-stable.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


✓ Download initiated for flan-t5-base-finetuned-stable.zip
