In [None]:
import os

os.environ["OPENAI_API_KEY"] = ""

# Signature

In [1]:
import dspy


class JDResumeScoring(dspy.Signature):
    """Given a job description and a resume, predict a matching score (0-10)."""

    jd = dspy.InputField(desc="The job description text")
    resume = dspy.InputField(desc="The candidate's resume text")
    score = dspy.OutputField(desc="The matching score between JD and Resume (0–10)")
    explanation = dspy.OutputField(desc="The explanation of the matching score")

In [None]:
llm = dspy.LM(
    model="openai/gpt-4.1-mini-2025-04-14",
)
dspy.configure(lm=llm)

In [3]:
class JDResumeMatcher(dspy.Module):
    def __init__(self):
        super().__init__()
        self.scorer = dspy.Predict(JDResumeScoring)

    def forward(self, jd, resume):
        return self.scorer(jd=jd, resume=resume)

In [4]:
from sklearn.metrics import mean_absolute_error


def mae_metric(gold, pred, trace=None):
    try:
        pred_score = float(pred.score)
    except Exception:
        return 9999
    return mean_absolute_error([gold.score], [pred_score])


In [None]:
import numpy as np


def accuracy_at_threshold(gold_scores, pred_scores, threshold):
    """
    Calculates the Accuracy@threshold metric.

    The formula is: (1/N) * Σ 1(|Score_GT - Score_Pred| <= threshold)
    where 1() is the indicator function.
    """
    gold_scores = np.array(gold_scores)
    pred_scores = np.array(pred_scores)
    absolute_errors = np.abs(gold_scores - pred_scores)
    within_threshold_count = np.sum(absolute_errors <= threshold)
    return within_threshold_count / len(gold_scores)

In [5]:
import pandas as pd

train_df = pd.read_csv("../../data/train.csv")
val_df = pd.read_csv("../../data/test.csv")

In [6]:
from dspy.teleprompt import MIPROv2

trainset = [
    dspy.Example(
        jd=row["job_description"], resume=row["resume"], score=row["match_score"]
    ).with_inputs("jd", "resume")
    for i, row in train_df.iterrows()
]

valset = [
    dspy.Example(
        jd=row["job_description"], resume=row["resume"], score=row["match_score"]
    ).with_inputs("jd", "resume")
    for i, row in val_df.iterrows()
]
matcher = JDResumeMatcher()
matcher.set_lm(llm)

tuner = MIPROv2(metric=mae_metric)
tuner.compile(matcher, trainset=trainset)


2025/10/11 13:28:49 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:
num_trials: 10
minibatch: True
num_fewshot_candidates: 6
num_instruct_candidates: 3
valset size: 100

2025/10/11 13:28:49 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2025/10/11 13:28:49 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.

2025/10/11 13:28:49 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...


Bootstrapping set 1/6
Bootstrapping set 2/6
Bootstrapping set 3/6


  0%|          | 5/6000 [00:09<3:18:58,  1.99s/it]


Bootstrapped 4 full traces after 5 examples for up to 1 rounds, amounting to 5 attempts.
Bootstrapping set 4/6


  0%|          | 3/6000 [00:05<3:04:42,  1.85s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 5/6


  0%|          | 1/6000 [00:01<2:00:07,  1.20s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 6/6


  0%|          | 2/6000 [00:03<2:32:51,  1.53s/it]
2025/10/11 13:29:09 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2025/10/11 13:29:09 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.
2025/10/11 13:29:09 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing N=3 instructions...



Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2025/10/11 13:29:34 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2025/10/11 13:29:34 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Given a job description and a resume, predict a matching score (0-10).

2025/10/11 13:29:34 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Given a job description and a candidate's resume, analyze the degree of alignment between the two by focusing on matching domain-specific and technical skills while disregarding irrelevant or nonsensical filler text. Produce a matching score from 0 to 10 that reflects how well the candidate’s qualifications meet the job requirements. Additionally, provide a detailed explanation highlighting which specific skills and experiences from both documents overlap to support the score, and identify any important job skills missing from the resume that reduce the match. Your explanation should clearly justify the score by emphasizing key matching criteria to help recruiters or systems understand the 

Average Metric: 130.00 / 100 (130.0%): 100%|██████████| 100/100 [00:20<00:00,  4.79it/s]

2025/10/11 13:29:55 INFO dspy.evaluate.evaluate: Average Metric: 130.0 / 100 (130.0%)
2025/10/11 13:29:55 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 130.0

2025/10/11 13:29:55 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 2 / 13 - Minibatch ==



Average Metric: 29.00 / 35 (82.9%): 100%|██████████| 35/35 [00:08<00:00,  4.27it/s]

2025/10/11 13:30:03 INFO dspy.evaluate.evaluate: Average Metric: 29.0 / 35 (82.9%)
2025/10/11 13:30:03 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 82.86 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3'].
2025/10/11 13:30:03 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [82.86]
2025/10/11 13:30:03 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0]
2025/10/11 13:30:03 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0


2025/10/11 13:30:03 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 3 / 13 - Minibatch ==



Average Metric: 40.00 / 35 (114.3%): 100%|██████████| 35/35 [00:11<00:00,  3.07it/s]

2025/10/11 13:30:14 INFO dspy.evaluate.evaluate: Average Metric: 40.0 / 35 (114.3%)
2025/10/11 13:30:14 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 114.29 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/10/11 13:30:14 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [82.86, 114.29]
2025/10/11 13:30:14 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0]
2025/10/11 13:30:14 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0


2025/10/11 13:30:14 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 4 / 13 - Minibatch ==



Average Metric: 30.00 / 35 (85.7%): 100%|██████████| 35/35 [00:08<00:00,  4.12it/s] 

2025/10/11 13:30:23 INFO dspy.evaluate.evaluate: Average Metric: 30.0 / 35 (85.7%)
2025/10/11 13:30:23 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 85.71 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5'].
2025/10/11 13:30:23 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [82.86, 114.29, 85.71]
2025/10/11 13:30:23 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0]
2025/10/11 13:30:23 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0


2025/10/11 13:30:23 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 5 / 13 - Minibatch ==



Average Metric: 34.00 / 35 (97.1%): 100%|██████████| 35/35 [00:09<00:00,  3.87it/s] 

2025/10/11 13:30:32 INFO dspy.evaluate.evaluate: Average Metric: 34.0 / 35 (97.1%)
2025/10/11 13:30:32 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 97.14 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 2'].
2025/10/11 13:30:32 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [82.86, 114.29, 85.71, 97.14]
2025/10/11 13:30:32 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0]
2025/10/11 13:30:32 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0


2025/10/11 13:30:32 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 6 / 13 - Minibatch ==



Average Metric: 31.00 / 35 (88.6%): 100%|██████████| 35/35 [00:07<00:00,  4.45it/s]

2025/10/11 13:30:40 INFO dspy.evaluate.evaluate: Average Metric: 31.0 / 35 (88.6%)
2025/10/11 13:30:40 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.57 on minibatch of size 35 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5'].
2025/10/11 13:30:40 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [82.86, 114.29, 85.71, 97.14, 88.57]
2025/10/11 13:30:40 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0]
2025/10/11 13:30:40 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0


2025/10/11 13:30:40 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 13 - Full Evaluation =====
2025/10/11 13:30:40 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 114.29) from minibatch trials...



Average Metric: 126.00 / 100 (126.0%): 100%|██████████| 100/100 [00:20<00:00,  4.77it/s]

2025/10/11 13:31:01 INFO dspy.evaluate.evaluate: Average Metric: 126.0 / 100 (126.0%)
2025/10/11 13:31:01 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0, 126.0]
2025/10/11 13:31:01 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0
2025/10/11 13:31:01 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/10/11 13:31:01 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 8 / 13 - Minibatch ==



Average Metric: 45.00 / 35 (128.6%): 100%|██████████| 35/35 [00:00<00:00, 433.31it/s]

2025/10/11 13:31:01 INFO dspy.evaluate.evaluate: Average Metric: 45.0 / 35 (128.6%)
2025/10/11 13:31:01 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 128.57 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].
2025/10/11 13:31:01 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [82.86, 114.29, 85.71, 97.14, 88.57, 128.57]
2025/10/11 13:31:01 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0, 126.0]
2025/10/11 13:31:01 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0


2025/10/11 13:31:01 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 9 / 13 - Minibatch ==



Average Metric: 30.00 / 35 (85.7%): 100%|██████████| 35/35 [00:09<00:00,  3.87it/s] 

2025/10/11 13:31:10 INFO dspy.evaluate.evaluate: Average Metric: 30.0 / 35 (85.7%)
2025/10/11 13:31:10 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 85.71 on minibatch of size 35 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].
2025/10/11 13:31:10 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [82.86, 114.29, 85.71, 97.14, 88.57, 128.57, 85.71]
2025/10/11 13:31:10 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0, 126.0]
2025/10/11 13:31:10 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0


2025/10/11 13:31:10 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 10 / 13 - Minibatch ==



Average Metric: 38.00 / 35 (108.6%): 100%|██████████| 35/35 [00:10<00:00,  3.30it/s]

2025/10/11 13:31:21 INFO dspy.evaluate.evaluate: Average Metric: 38.0 / 35 (108.6%)
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 108.57 on minibatch of size 35 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4'].
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [82.86, 114.29, 85.71, 97.14, 88.57, 128.57, 85.71, 108.57]
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0, 126.0]
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0


2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 11 / 13 - Minibatch ==



Average Metric: 52.00 / 35 (148.6%): 100%|██████████| 35/35 [00:00<00:00, 4929.01it/s]

2025/10/11 13:31:21 INFO dspy.evaluate.evaluate: Average Metric: 52.0 / 35 (148.6%)
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 148.57 on minibatch of size 35 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 0'].
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [82.86, 114.29, 85.71, 97.14, 88.57, 128.57, 85.71, 108.57, 148.57]
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0, 126.0]
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0


2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 12 / 13 - Minibatch ==



Average Metric: 46.00 / 35 (131.4%): 100%|██████████| 35/35 [00:00<00:00, 5090.00it/s]

2025/10/11 13:31:21 INFO dspy.evaluate.evaluate: Average Metric: 46.0 / 35 (131.4%)
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 131.43 on minibatch of size 35 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 0'].
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [82.86, 114.29, 85.71, 97.14, 88.57, 128.57, 85.71, 108.57, 148.57, 131.43]
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0, 126.0]
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0


2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 13 / 13 - Full Evaluation =====
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 140.0) from minibatch trials...



Average Metric: 130.00 / 100 (130.0%): 100%|██████████| 100/100 [00:00<00:00, 1209.60it/s]

2025/10/11 13:31:21 INFO dspy.evaluate.evaluate: Average Metric: 130.0 / 100 (130.0%)
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [130.0, 126.0, 130.0]
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 130.0
2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: 

2025/10/11 13:31:21 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 130.0!





scorer = Predict(JDResumeScoring(jd, resume -> score, explanation
    instructions='Given a job description and a resume, predict a matching score (0-10).'
    jd = Field(annotation=str required=True json_schema_extra={'desc': 'The job description text', '__dspy_field_type': 'input', 'prefix': 'Jd:'})
    resume = Field(annotation=str required=True json_schema_extra={'desc': "The candidate's resume text", '__dspy_field_type': 'input', 'prefix': 'Resume:'})
    score = Field(annotation=str required=True json_schema_extra={'desc': 'The matching score between JD and Resume (0–10)', '__dspy_field_type': 'output', 'prefix': 'Score:'})
    explanation = Field(annotation=str required=True json_schema_extra={'desc': 'The explanation of the matching score', '__dspy_field_type': 'output', 'prefix': 'Explanation:'})
))

In [12]:
print("--- Optimized DSPy Prompt ---")
dspy.settings.lm.history[0]


--- Optimized DSPy Prompt ---


{'prompt': None,
 'messages': [{'role': 'system',
   'content': "Your input fields are:\n1. `jd` (str): The job description text\n2. `resume` (str): The candidate's resume text\nYour output fields are:\n1. `score` (str): The matching score between JD and Resume (0–10)\n2. `explanation` (str): The explanation of the matching score\nAll interactions will be structured in the following way, with the appropriate values filled in.\n\n[[ ## jd ## ]]\n{jd}\n\n[[ ## resume ## ]]\n{resume}\n\n[[ ## score ## ]]\n{score}\n\n[[ ## explanation ## ]]\n{explanation}\n\n[[ ## completed ## ]]\nIn adhering to this structure, your objective is: \n        Given a job description and a resume, predict a matching score (0-10)."},
  {'role': 'user',
   'content': 'This is an example of the task, though some input or output fields are not supplied.\n\n[[ ## jd ## ]]\nSoftware Engineer needed with experience in Git, Docker, Java, REST APIs. Hit summer discussion culture measure ever.\n\n[[ ## resume ## ]]\nExp

In [None]:
preds, golds = [], []
for example in valset:
    result = matcher(jd=example.jd, resume=example.resume)
    try:
        preds.append(float(result.score))
        golds.append(float(example.score))
    except Exception:
        continue

val_mae = mean_absolute_error(golds, preds)
val_mae

1.2023333333333333

In [18]:
import json

with open("preds/openai.json", "w") as f:
    json.dump(preds, f)

In [19]:
matcher.save("matcher/openai.json")

In [11]:
llm.history[-1]

{'prompt': None,
 'messages': [{'role': 'system',
   'content': "Your input fields are:\n1. `jd` (str): The job description text\n2. `resume` (str): The candidate's resume text\nYour output fields are:\n1. `score` (str): The matching score between JD and Resume (0–10)\nAll interactions will be structured in the following way, with the appropriate values filled in.\n\n[[ ## jd ## ]]\n{jd}\n\n[[ ## resume ## ]]\n{resume}\n\n[[ ## score ## ]]\n{score}\n\n[[ ## completed ## ]]\nIn adhering to this structure, your objective is: \n        Given a job description and a resume, predict a matching score (0-10)."},
  {'role': 'user',
   'content': '[[ ## jd ## ]]\nProduct Manager needed with experience in Product Roadmap, Scrum, Stakeholder Management, Agile. Score around create under. Must test game quickly woman by sure business. Model along wonder drive present.\n\n[[ ## resume ## ]]\nExperienced professional skilled in relationship, behind, User Stories. Tend although region product dark yea

In [17]:
llm.inspect_history(n=1)





[34m[2025-10-05T15:19:12.269124][0m

[31mSystem message:[0m

Your input fields are:
1. `jd` (str): The job description text
2. `resume` (str): The candidate's resume text
Your output fields are:
1. `score` (str): The matching score between JD and Resume (0–10)
All interactions will be structured in the following way, with the appropriate values filled in.

[[ ## jd ## ]]
{jd}

[[ ## resume ## ]]
{resume}

[[ ## score ## ]]
{score}

[[ ## completed ## ]]
In adhering to this structure, your objective is: 
        Given a job description and a resume, predict a matching score (0-10).


[31mUser message:[0m

[[ ## jd ## ]]
Product Manager needed with experience in Product Roadmap, Scrum, Stakeholder Management, Agile. Score around create under. Must test game quickly woman by sure business. Model along wonder drive present.

[[ ## resume ## ]]
Experienced professional skilled in relationship, behind, User Stories. Tend although region product dark year. Work however tough. Window