In [None]:
import pandas as pd
from getpass import getpass
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch
import threading
from tqdm import tqdm
import json
import time
from datetime import datetime

print("Loading descriptions dataset...")
descriptions = pd.read_parquet("hf://datasets/lang-uk/recruitment-dataset-job-descriptions-english/data/train-00000-of-00001.parquet")
print("Descriptions head:")
print(descriptions.head())
print("-" * 30)

print("Loading profiles dataset...")
profile = pd.read_parquet("hf://datasets/lang-uk/recruitment-dataset-candidate-profiles-english/data/train-00000-of-00001.parquet")
print("Profiles head:")
print(profile.head())
print("-" * 30)

print("Logging in to Hugging Face Hub...")
try:
    HF_TOKEN = getpass()
    login(HF_TOKEN)
    print("Login successful.")
except Exception as e:
    print(f"Login failed: {e}")
print("-" * 30)

MODELS_TO_TEST = {
    "gemma-2-2b-it": "google/gemma-2-2b-it",
    "qwen2-1.5b-instruct": "Qwen/Qwen2-1.5B-Instruct",
    "mistral-7b-instruct": "mistralai/Mistral-7B-Instruct-v0.3",
    "phi-3-mini-4k-instruct": "microsoft/Phi-3-mini-4k-instruct"
}


In [None]:

device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
    print(f"Using GPU: {torch.cuda.get_device_name()}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True
else:
    print("Using CPU - це буде повільно!")

print("-" * 30)


In [None]:

def generate_resumes_for_model(model_name, model_path, prompts, device):
    print(f"\n{'='*50}")
    print(f"Loading {model_name}...")
    
    try:
        model = AutoModelForCausalLM.from_pretrained(
            model_path,
            device_map="auto",
            torch_dtype=torch.bfloat16,
            trust_remote_code=True
        ).eval()
        
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        
        # Додаємо pad_token якщо не існує
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        print(f"Model {model_name} loaded successfully")
        
        generated_resumes = []
        generation_times = []
        
        test_prompts = prompts[:2]
        
        for i, prompt_text in enumerate(tqdm(test_prompts, desc=f"Generating with {model_name}")):
            start_time = time.time()
            
            try:
                messages = [{"role": "user", "content": prompt_text}]
                
                if hasattr(tokenizer, 'apply_chat_template'):
                    try:
                        prompt_with_template = tokenizer.apply_chat_template(
                            messages, tokenize=False, add_generation_prompt=True
                        )
                    except:
                        prompt_with_template = prompt_text
                else:
                    prompt_with_template = prompt_text
                
                inputs = tokenizer(prompt_with_template, return_tensors="pt", truncation=True, max_length=2048)
                inputs = {k: v.to(device) for k, v in inputs.items()}
                
                with torch.no_grad():
                    outputs = model.generate(
                        **inputs,
                        max_new_tokens=512,
                        temperature=0.7,
                        top_p=0.9,
                        do_sample=True,
                        eos_token_id=tokenizer.eos_token_id,
                        pad_token_id=tokenizer.pad_token_id,
                        repetition_penalty=1.1
                    )
                
                resume_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
                
                end_time = time.time()
                generation_time = end_time - start_time
                
                generated_resumes.append({
                    "candidate_id": i,
                    "resume": resume_text,
                    "generation_time_seconds": generation_time,
                    "model": model_name
                })
                
                generation_times.append(generation_time)
                
                print(f"\n{model_name} - Candidate {i+1}: {generation_time:.2f}s")
                print(f"Output length: {len(resume_text)} characters")
                
            except Exception as e:
                print(f"Error generating for candidate {i+1}: {e}")
                generated_resumes.append({
                    "candidate_id": i,
                    "resume": f"ERROR: {str(e)}",
                    "generation_time_seconds": 0,
                    "model": model_name,
                    "error": str(e)
                })
        
        del model
        del tokenizer
        if device == "cuda":
            torch.cuda.empty_cache()
        
        return {
            "model_name": model_name,
            "model_path": model_path,
            "resumes": generated_resumes,
            "avg_generation_time": sum(generation_times) / len(generation_times) if generation_times else 0,
            "total_time": sum(generation_times),
            "status": "success"
        }
    
    except Exception as e:
        print(f"Failed to load {model_name}: {e}")
        return {
            "model_name": model_name,
            "model_path": model_path,
            "resumes": [],
            "error": str(e),
            "status": "failed"
        }



In [None]:

print("Preparing prompts...")
prompt_style = """Below is an instruction that describes a task, paired with an input that provides candidate details and a target job.
Write a professional, ATS-friendly resume tailored to the target role.

First, produce a concise 2–4 bullet **Plan** that lists the sections and focus points you will include (e.g., highlight leadership, quantify achievements, include keywords from job description). Do not reveal internal chain-of-thought — keep the plan explicit and short.

Then generate the resume. Use clear section headers (Summary, Experience, Education, Skills, Projects/Certs as applicable). For experience bullets, use the STAR/impact style (situation/task → action → measurable result) and include quantifiable metrics where possible. Tailor language and keywords to the target job.

### Candidate details / Job target:
{}

### Additional instructions (tone, length, must-include keywords, formatting notes):
{}

### Output format:
Plan:
- <short bullet 1>
- <short bullet 2>

Resume:
[Use sections: Summary, Experience (most recent first), Education, Skills, Projects/Certifications, Additional information (optional)]
"""

complex_cot = (
    "- Identify key skills from the candidate's past roles.\n"
    "- Match these skills to the job description keywords.\n"
    "- Prioritize experiences that show measurable achievements."
)

def format_prompts(df, extra_instructions="Tone: professional, one-page, include relevant keywords."):
    candidate_details = [
        f"Position: {p}\nMore info: {m}\nLooking For: {l}\nHighlights: {h}\nPrimary Keyword: {k}"
        for p, m, l, h, k in zip(
            df["Position"],
            df["Moreinfo"],
            df["Looking For"],
            df["Highlights"],
            df["Primary Keyword"]
        )
    ]

    prompts = [
        prompt_style.format(details, extra_instructions + "\n" + complex_cot)
        for details in candidate_details
    ]
    return prompts

test_profiles = profile.head(10)
prompts = format_prompts(test_profiles)
print(f"Prepared {len(prompts)} prompts for testing")
print("-" * 30)



In [None]:

all_results = {
    "test_config": {
        "device": device,
        "test_date": datetime.now().isoformat(),
        "number_of_candidates": 2,
        "max_new_tokens": 512,
        "total_profiles_available": len(profile),
        "profiles_used": len(test_profiles)
    },
    "model_results": []
}

print("Starting model comparison benchmark...")

for model_name, model_path in MODELS_TO_TEST.items():
    result = generate_resumes_for_model(model_name, model_path, prompts, device)
    all_results["model_results"].append(result)
    
    if result["status"] == "success":
        print(f"✅ {model_name}: {result['avg_generation_time']:.2f}s avg per resume")
    else:
        print(f"❌ {model_name}: Failed - {result.get('error', 'Unknown error')}")



In [None]:

output_filename = f"resume_generation_benchmark_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"

with open(output_filename, 'w', encoding='utf-8') as f:
    json.dump(all_results, f, indent=2, ensure_ascii=False)

print(f"\n{'='*50}")
print("Benchmark completed! Results saved to:", output_filename)


In [None]:

print("\nPERFORMANCE SUMMARY:")
print("=" * 50)
successful_models = [r for r in all_results["model_results"] if r["status"] == "success"]
failed_models = [r for r in all_results["model_results"] if r["status"] == "failed"]

if successful_models:
    print("SUCCESSFUL MODELS:")
    for result in successful_models:
        print(f"  {result['model_name']:25} | {result['avg_generation_time']:6.2f}s avg | {result['total_time']:6.2f}s total")

if failed_models:
    print("\nFAILED MODELS:")
    for result in failed_models:
        print(f"  {result['model_name']:25} | {result.get('error', 'Unknown error')}")

print(f"\nDevice used: {device}")
if device == "cuda":
    print(f"GPU memory allocated: {torch.cuda.max_memory_allocated() / 1024**3:.2f} GB")

print(f"\nSAMPLE RESULTS:")
print("=" * 50)
for result in successful_models:
    if result["resumes"]:
        first_resume = result["resumes"][0]["resume"]
        preview = first_resume[:200] + "..." if len(first_resume) > 200 else first_resume
        print(f"\n{result['model_name']} - First resume preview:")
        print(preview)
        print("-" * 30)