In [20]:
import pandas as pd
import torch

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    
)

from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer, SFTConfig
from datasets import Dataset
import evaluate
import nltk




In [2]:

# --- Configuration ---
MODEL_NAME = "google/gemma-2-2b-it"
OUTPUT_DIR = "gemma-2b-finetuned-resume"
# Increase this number for real training. The current value is for a quick demo.
TRAINING_DATA_ROWS = 10
LORA_R = 16
LORA_ALPHA = 32
LORA_DROPOUT = 0.05
# LoRA target modules for Gemma-2 (all linear layers)
TARGET_MODULES = [
    "q_proj", "k_proj", "v_proj", "o_proj",
    "gate_proj", "up_proj", "down_proj"
]

# Prompt template (from your notebook)
prompt_style = """Below is an instruction that describes a task, paired with an input that provides candidate details and a target job.
Write a professional, ATS-friendly resume tailored to the target role.

First, produce a concise 2–4 bullet **Plan** that lists the sections and focus points you will include (e.g., highlight leadership, quantify achievements, include keywords from job description). Do not reveal internal chain-of-thought — keep the plan explicit and short.

Then generate the resume. Use clear section headers (Summary, Experience, Education, Skills, Projects/Certs as applicable). For experience bullets, use the STAR/impact style (situation/task → action → measurable result) and include quantifiable metrics where possible. Tailor language and keywords to the target job.

### Candidate details / Job target:
{}

### Additional instructions (tone, length, must-include keywords, formatting notes):
{}

### Output format:
Plan:
- <short bullet 1>
- <short bullet 2>

Resume:
[Use sections: Summary, Experience (most recent first), Education, Skills, Projects/Certifications, Additional information (optional)]
"""
extra_instructions = "Tone: professional, one-page, include relevant keywords."
complex_cot = (
    "- Identify key skills from the candidate's past roles.\n"
    "- Match these skills to the job description keywords.\n"
    "- Prioritize experiences that show measurable achievements."
)
full_instructions = extra_instructions + "\n" + complex_cot


In [3]:


# --- 3. Data Loading and Preparation ---
print("Loading profiles dataset...")
try:
    profile = pd.read_parquet("hf://datasets/lang-uk/recruitment-dataset-candidate-profiles-english/data/train-00000-of-00001.parquet")
    train_df = profile.head(TRAINING_DATA_ROWS)
except Exception as e:
    print(f"Error loading dataset: {e}. Cannot proceed with fine-tuning without data.")
    raise

data_list = []
print(f"Preparing {len(train_df)} data points for SFT...")
for index, row in train_df.iterrows():
    candidate_details = (
        f"Position: {row['Position']}\n"
        f"More info: {row['Moreinfo']}\n"
        f"Looking For: {row['Looking For']}\n"
        f"Highlights: {row['Highlights']}\n"
        f"Primary Keyword: {row['Primary Keyword']}"
    )
    full_prompt = prompt_style.format(candidate_details, full_instructions)
    ground_truth_response = row['CV'] # Placeholder for the high-quality target resume

    # Format for SFT
    sft_formatted_text = (
        f"{full_prompt}<|endoftext|>"
        f"{ground_truth_response}"
    )

    data_list.append({
        "prompt": full_prompt,
        "response": ground_truth_response,
        "text": sft_formatted_text
    })

train_df_sft = pd.DataFrame(data_list)
train_df_sft.to_csv("sft_training_data_preview.csv", index=False)
print(f"SFT Training Data Preview saved to sft_training_data_preview.csv with {len(train_df_sft)} rows.")

# Convert to HuggingFace Dataset
hf_dataset = Dataset.from_pandas(train_df_sft)


Loading profiles dataset...
Preparing 10 data points for SFT...
SFT Training Data Preview saved to sft_training_data_preview.csv with 10 rows.


In [4]:

# --- 4. Model and LoRA Setup ---
print("\nLoading model and tokenizer with LoRA configuration...")

# 4-bit quantization configuration for memory efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=False,
)

# Load the model onto GPU 0 (CUDA)
device_map = {"": 0}
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map=device_map,
    torch_dtype=torch.bfloat16
)
model.config.use_cache = False
model.config.pretraining_tp = 1
model = prepare_model_for_kbit_training(model)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# LoRA configuration
peft_config = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES,
)

model = get_peft_model(model, peft_config)
print("Trainable parameters summary:")
model.print_trainable_parameters()



Loading model and tokenizer with LoRA configuration...


`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.76s/it]


Trainable parameters summary:
trainable params: 20,766,720 || all params: 2,635,108,608 || trainable%: 0.7881


In [5]:

# --- 5. Evaluation Metrics Function (BLEU/ROUGE) ---
rouge_metric = evaluate.load('rouge')
bleu_metric = evaluate.load('bleu')

def compute_metrics(eval_preds):
    predictions = eval_preds.predictions
    label_ids = eval_preds.label_ids

    if isinstance(predictions, tuple):
        predictions = predictions[0]

    # Decode predictions and labels
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    label_ids[label_ids == -100] = tokenizer.pad_token_id
    decoded_labels = tokenizer.batch_decode(label_ids, skip_special_tokens=True)

    # Simple cleanup
    decoded_preds = ["\n".join(pred.split('\n')) for pred in decoded_preds]
    decoded_labels = ["\n".join(label.split('\n')) for label in decoded_labels]

    # ROUGE
    rouge_results = rouge_metric.compute(
        predictions=decoded_preds, references=decoded_labels, use_stemmer=True
    )

    # BLEU: requires tokenized lists
    tokenized_preds = [nltk.word_tokenize(pred) for pred in decoded_preds]
    tokenized_labels = [[nltk.word_tokenize(label)] for label in decoded_labels]

    bleu_results = bleu_metric.compute(
        predictions=tokenized_preds, references=tokenized_labels, max_order=4
    )

    # Return combined dictionary of results
    result = {k: round(v, 4) for k, v in rouge_results.items()}
    result['bleu'] = round(bleu_results['bleu'], 4)
    return result


Downloading builder script: 6.14kB [00:00, 284kB/s]
Downloading builder script: 5.94kB [00:00, 12.2MB/s]
Downloading extra modules: 3.34kB [00:00, 8.43MB/s]


In [29]:
# --- 6. Training Arguments and SFTTrainer ---
training_arguments = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    save_steps=100,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.001,
    bf16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="none",
)

# 1) tokenize dataset manually
hf_dataset = hf_dataset.map(
    lambda e: tokenizer(e["text"], truncation=True, max_length=2048),
    batched=True
)

# 2) Initialize trainer (minimal style for old TRL)
trainer = SFTTrainer(
    model=model,
    train_dataset=hf_dataset,
    peft_config=peft_config,
    args=training_arguments,
)


Map: 100%|██████████| 10/10 [00:00<00:00, 983.70 examples/s]
Truncating train dataset: 100%|██████████| 10/10 [00:00<00:00, 3229.37 examples/s]


In [30]:

# --- 7. Start Fine-Tuning ---
print("\n--- Starting Fine-Tuning (trainer.train()) on CUDA ---")
# The progress bar will be displayed automatically in the notebook output.
trainer.train()

# --- 8. Save the LoRA adapter (Run after successful training) ---
trainer.model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print("\nCode structure complete. Uncomment `trainer.train()` to start the process in your environment.")


--- Starting Fine-Tuning (trainer.train()) on CUDA ---


  return fn(*args, **kwargs)


OutOfMemoryError: CUDA out of memory. Tried to allocate 1.10 GiB. GPU 0 has a total capacity of 5.61 GiB of which 301.62 MiB is free. Including non-PyTorch memory, this process has 4.69 GiB memory in use. Of the allocated memory 3.90 GiB is allocated by PyTorch, and 718.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)