# Coding Boost: How to use Wisent to Make Your Models Better At Coding

Here, we use LiveCodeBench to compute steering vectors and change activations of a simple model to steer it in a direction of generating better code. 

In [1]:
import os
import json

# =============================================================================
# CONFIGURATION - Modify these parameters as needed
# =============================================================================

# Model Configuration
MODEL = "meta-llama/Llama-3.2-1B-Instruct"  # HuggingFace model ID

# Task Configuration
TASK = "livecodebench"             # Coding benchmark for contrastive pairs

# Steering Configuration
LAYER = 8                          # Primary layer for steering (middle layers work best)
LAYERS_MULTI = "6,8,10"            # Multiple layers for multi-layer steering
TRAIT_LABEL = "coding_ability"     # Label for the steering vector
TOKEN_AGGREGATION = "average"      # How to aggregate token activations
NUM_PAIRS = 50                     # Number of contrastive pairs to use
NORMALIZE = True                   # Normalize steering vectors

# Steering Strength
STEERING_STRENGTH = 1.5            # Default steering strength (0.5-2.0 typical)
STRENGTHS_TO_TEST = [0.5, 1.0, 1.5, 2.0]  # Strengths for comparison

# Generation Configuration
MAX_NEW_TOKENS = 512               # Max tokens for code generation
TEMPERATURE = 0.2                  # Lower = more deterministic
NUM_TEST_QUESTIONS = 10            # Number of problems to evaluate

# Output Configuration
OUTPUT_DIR = "./coding_boost_outputs"

# =============================================================================
# Setup - Create output directories
# =============================================================================
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/pairs", exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/vectors", exist_ok=True)
os.makedirs(f"{OUTPUT_DIR}/responses", exist_ok=True)

print("Configuration:")
print(f"  Model: {MODEL}")
print(f"  Task: {TASK}")
print(f"  Steering Layer: {LAYER}")
print(f"  Trait Label: {TRAIT_LABEL}")
print(f"  Num Pairs: {NUM_PAIRS}")
print(f"  Steering Strength: {STEERING_STRENGTH}")
print(f"  Output: {OUTPUT_DIR}")

Configuration:
  Model: meta-llama/Llama-3.2-1B-Instruct
  Task: livecodebench
  Steering Layer: 8
  Trait Label: coding_ability
  Num Pairs: 50
  Steering Strength: 1.5
  Output: ./coding_boost_outputs


LiveCodeBench provides pre-computed code solutions with pass/fail labels. We extract:
- **Positive (correct)**: Code solutions that pass all test cases
- **Negative (incorrect)**: Code solutions that fail test cases

In [None]:
# Extract contrastive pairs from LiveCodeBench
!python -m wisent.core.main generate-pairs-from-task \
    livecodebench \
    --output {OUTPUT_DIR}/pairs/livecodebench_pairs.json \
    --limit 100 \
    --verbose

[32m  .................  .:--++*##%%%%##**+=-:.  .................  
  ..             .:=*%@@@@@@@%%%%%%%@@@@@@%*=:.             ..  
  .           .-*%@@@%#+=-::.........:-=+#%@@@%*=.           .  
  .         -*%@@@#=:.                    .:=*%@@@*-.        .  
  .      .-#@@@*=.                            .-*@@@#-.      .  
  .     :#@@@*:                                  :+%@@#-     .  
  .   .+@@@*:                                      :+@@@+.   .  
  .  .*@@@@%*=:.                                     -%@@#:  .  
  . .#@@#=*%@@@%*-:.                                  .#@@%: .  
  ..*@@%.  .-+#@@@@#+-:.                               .*@@%..  
  .=@@@-       :-+#@@@@%*=:.                            .%@@*.  
  :#@@+           .:-+#@@@@%#+=:.                        -@@@-  
  =@@@:                .-=*%@@@@%#+=:..                  .#@@+  
  +@@@*=:.                 .:-+*%@@@@%#*=-:..             *@@+  
  +@@@@@@#+-..                  .:-=*#@@@@@%#*+--..       +@@+  
  +@@#-+%@@@%:      

In [None]:
# Examine the extracted pairs
with open(f"{OUTPUT_DIR}/pairs/livecodebench_pairs.json", 'r') as f:
    pairs_data = json.load(f)

print(f"Extracted {pairs_data['num_pairs']} contrastive pairs from {pairs_data['task_name']}")
print("\n" + "="*80)

# Show a few examples
for i, pair in enumerate(pairs_data['pairs'][:2]):
    print(f"\nExample {i+1}:")
    print(f"Problem (truncated): {pair['prompt'][:200]}...")
    print(f"\nCorrect Code (truncated):")
    print(pair['positive_response']['model_response'][:300])
    print(f"\nIncorrect Code (truncated):")
    print(pair['negative_response']['model_response'][:300])
    print("="*80)

The `generate-vector-from-task` command runs the complete pipeline:
1. Extract contrastive pairs
2. Collect activations from the model
3. Create steering vectors using CAA (Contrastive Activation Addition)

In [None]:
# Generate steering vector using configuration parameters
!python -m wisent.core.main generate-vector-from-task \
    --task {TASK} \
    --trait-label {TRAIT_LABEL} \
    --model {MODEL} \
    --num-pairs {NUM_PAIRS} \
    --layers {LAYER} \
    --token-aggregation {TOKEN_AGGREGATION} \
    --output {OUTPUT_DIR}/vectors/coding_vector.pt \
    --keep-intermediate \
    --intermediate-dir {OUTPUT_DIR}/vectors \
    --normalize \
    --verbose \
    --timing

In [None]:
# Examine the generated steering vector
import torch

vector_data = torch.load(f"{OUTPUT_DIR}/vectors/coding_vector.pt")

print("Steering Vector Info:")
print(f"  Model: {vector_data.get('model', 'N/A')}")
print(f"  Trait: {vector_data.get('trait_label', 'N/A')}")
print(f"  Method: {vector_data.get('method', 'N/A')}")
print(f"  Layer: {vector_data.get('layer', 'N/A')}")

if 'steering_vector' in vector_data:
    sv = vector_data['steering_vector']
    print(f"  Vector shape: {sv.shape}")
    print(f"  Vector norm: {torch.norm(sv).item():.4f}")

For optimal steering, we can create vectors for multiple layers and combine them.

In [None]:
# Generate steering vectors for multiple layers using LAYERS_MULTI config
!python -m wisent.core.main generate-vector-from-task \
    --task {TASK} \
    --trait-label {TRAIT_LABEL} \
    --model {MODEL} \
    --num-pairs {NUM_PAIRS} \
    --layers {LAYERS_MULTI} \
    --token-aggregation {TOKEN_AGGREGATION} \
    --output {OUTPUT_DIR}/vectors/coding_multi_layer.pt \
    --normalize \
    --verbose

Use the `multi-steer` command to apply the coding steering vector during generation.

In [None]:
# Load a test prompt dynamically from LiveCodeBench
from wisent.core.tasks import LiveCodeBenchTask

# Load LiveCodeBench data
lcb_task = LiveCodeBenchTask()
lcb_data = lcb_task.load_data(limit=10)

# Get the first problem as our test prompt
test_problem = lcb_data[0]
TEST_PROMPT = f"Question: {test_problem['question_content']}\n\nWrite a solution:"

print(f"Loaded problem: {test_problem.get('question_title', 'Unknown')}")
print(f"Prompt preview: {TEST_PROMPT[:300]}...")

# Apply steering using configuration parameters
!python -m wisent.core.main multi-steer \
    --vector {OUTPUT_DIR}/vectors/coding_vector.pt:{STEERING_STRENGTH} \
    --model {MODEL} \
    --layer {LAYER} \
    --prompt "{TEST_PROMPT}" \
    --max-new-tokens {MAX_NEW_TOKENS} \
    --verbose

Generate solutions for LiveCodeBench problems to evaluate steering effectiveness.

In [None]:
# Generate baseline responses (without steering) using configuration
!python -m wisent.core.main generate-responses \
    {MODEL} \
    --task {TASK} \
    --output {OUTPUT_DIR}/responses/baseline_responses.json \
    --num-questions {NUM_TEST_QUESTIONS} \
    --max-new-tokens {MAX_NEW_TOKENS} \
    --temperature {TEMPERATURE} \
    --verbose

In [None]:
# Generate steered responses using configuration
!python -m wisent.core.main generate-responses \
    {MODEL} \
    --task {TASK} \
    --output {OUTPUT_DIR}/responses/steered_responses.json \
    --num-questions {NUM_TEST_QUESTIONS} \
    --max-new-tokens {MAX_NEW_TOKENS} \
    --temperature {TEMPERATURE} \
    --use-steering \
    --verbose

In [None]:
# Compare baseline vs steered responses
with open(f"{OUTPUT_DIR}/responses/baseline_responses.json", 'r') as f:
    baseline = json.load(f)

with open(f"{OUTPUT_DIR}/responses/steered_responses.json", 'r') as f:
    steered = json.load(f)

print("Response Comparison:")
print("="*80)

for i in range(min(3, len(baseline['responses']))):
    base_resp = baseline['responses'][i]
    steer_resp = steered['responses'][i]
    
    print(f"\nProblem {i+1}:")
    print(f"Prompt: {base_resp['prompt'][:100]}...")
    print(f"\n--- Baseline Response ---")
    print(base_resp.get('generated_response', 'N/A')[:400])
    print(f"\n--- Steered Response ---")
    print(steer_resp.get('generated_response', 'N/A')[:400])
    print("="*80)

The `evaluate-responses` command can execute generated code in Docker to verify correctness.

In [None]:
# Evaluate baseline responses
!python -m wisent.core.main evaluate-responses \
    --input {OUTPUT_DIR}/responses/baseline_responses.json \
    --output {OUTPUT_DIR}/responses/baseline_evaluation.json \
    --task livecodebench \
    --verbose

In [None]:
# Evaluate steered responses
!python -m wisent.core.main evaluate-responses \
    --input {OUTPUT_DIR}/responses/steered_responses.json \
    --output {OUTPUT_DIR}/responses/steered_evaluation.json \
    --task livecodebench \
    --verbose

In [None]:
# Compare evaluation results
def load_eval_results(path):
    try:
        with open(path, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        return None

baseline_eval = load_eval_results(f"{OUTPUT_DIR}/responses/baseline_evaluation.json")
steered_eval = load_eval_results(f"{OUTPUT_DIR}/responses/steered_evaluation.json")

print("Evaluation Results Comparison:")
print("="*60)

if baseline_eval:
    metrics = baseline_eval.get('aggregated_metrics', {})
    print(f"\nBaseline Model:")
    print(f"  Pass Rate: {metrics.get('pass_rate', 0):.2%}")
    print(f"  Total Passed: {metrics.get('total_passed', 0)}")
    print(f"  Total Problems: {metrics.get('total_problems', 0)}")

if steered_eval:
    metrics = steered_eval.get('aggregated_metrics', {})
    print(f"\nSteered Model:")
    print(f"  Pass Rate: {metrics.get('pass_rate', 0):.2%}")
    print(f"  Total Passed: {metrics.get('total_passed', 0)}")
    print(f"  Total Problems: {metrics.get('total_problems', 0)}")