In [1]:
# Import necessary libraries
import os
from vllm import LLM, SamplingParams
from vllm.steer_vectors.request import SteerVectorRequest

# Set environment variables
os.environ["VLLM_USE_V1"] = "0"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Initialize LLM
llm = LLM(
    model="/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/",
    enable_steer_vector=True,
    enforce_eager=True,
    tensor_parallel_size=1
)

# Define math problems for testing
problems = [
    "Chandra has four bowls.  Each one is a different color (red, blue, yellow, green).  She also has exactly one glass the same color as each bowl.  If she chooses a bowl and a glass from the cupboard, how many pairings are possible?  One such pairing is a blue bowl and a yellow glass.",
    "The distance between two cities on a map is 15 inches. If the scale is 0.25 inches = 3 miles, how many miles apart are the actual cities?",
    "How many prime numbers are between 20 and 30?"
]

# Create prompt texts from problems
texts = ["Please reason step by step, and put your final answer within \\boxed{}.\nUser: " + problem + "\nAssistant: <think>" for problem in problems]

# Generate answers using the LLM
answers = llm.generate(
    texts,
    SamplingParams(
        temperature=0,
        max_tokens=4096,
        skip_special_tokens=False,
    ),
)
answers = [answer.outputs[0].text for answer in answers]

INFO 07-12 17:11:02 [__init__.py:244] Automatically detected platform cuda.
INFO 07-12 17:11:16 [config.py:841] This model supports multiple tasks: {'classify', 'reward', 'embed', 'generate'}. Defaulting to 'generate'.
INFO 07-12 17:11:16 [config.py:1472] Using max model len 131072
INFO 07-12 17:11:16 [llm_engine.py:232] Initializing a V0 LLM engine (v0.1.dev7499+g2a4b294) with config: model='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', speculative_config=None, tokenizer='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backen

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 07-12 17:11:19 [default_loader.py:272] Loading weights took 1.10 seconds
INFO 07-12 17:11:20 [model_runner.py:1255] Model loading took 3.3461 GiB and 1.255827 seconds
INFO 07-12 17:11:24 [worker.py:295] Memory profiling takes 3.95 seconds
INFO 07-12 17:11:24 [worker.py:295] the current vLLM instance can use total_gpu_memory (47.44GiB) x gpu_memory_utilization (0.90) = 42.69GiB
INFO 07-12 17:11:24 [worker.py:295] model weights take 3.35GiB; non_torch_memory takes 0.06GiB; PyTorch activation peak memory takes 8.07GiB; the rest of the memory reserved for KV Cache is 31.22GiB.
INFO 07-12 17:11:24 [executor_base.py:115] # cuda blocks: 73063, # CPU blocks: 9362
INFO 07-12 17:11:24 [executor_base.py:120] Maximum concurrency for 131072 tokens per request: 8.92x
INFO 07-12 17:11:30 [llm_engine.py:430] init engine (profile, create kv cache, warmup model) took 9.88 seconds


Adding requests:   0%|          | 0/3 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/3 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

In [2]:
from transformers import AutoTokenizer

# Create QA pairs by combining prompts and answers
qa_pairs = [texts[i] + answers[i] for i in range(len(texts))]

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/")

# The newline token suffix in tokenizer vocabulary
target_suffix = "ĊĊ"  # "\n\n" is tokenized as "ĊĊ"

# Process each QA pair to find newline positions
all_tokens_list = []
all_newline_positions = []
for qa in qa_pairs:
    # Tokenize the QA pair
    tokens = tokenizer.tokenize(qa, add_special_tokens=True)
    all_tokens_list.append(tokens)
    
    # Find all positions of "ĊĊ" in the tokens
    # These represent potential paragraph breaks in the text
    positions = [
        i for i, token in enumerate(tokens) 
        if isinstance(token, str) and token.endswith(target_suffix)
    ]
    all_newline_positions.append(positions)


In [3]:
# Define keyword sets for classifying reasoning segments
TRANSITION_KEYWORDS = [
    'alternatively', 'think differently', 'another way', 'another approach',
    'another method', 'another solution', 'another strategy', 'another technique'
]

REFLECTION_KEYWORDS = [
    'wait', 'verify', 'make sure', 'hold on', 'think again', "'s correct",
    "'s incorrect", 'let me check', 'seems right'
]

def classify_segment(text_segment):
    """
    Classify text segments based on keyword matches.
    
    Args:
        text_segment: String of text to classify
        
    Returns:
        String category: "Transition", "Reflection", or "Execution"
    """
    lower_text = text_segment.lower()
    
    if any(keyword in lower_text for keyword in TRANSITION_KEYWORDS):
        return "Transition"
    
    if any(keyword in lower_text for keyword in REFLECTION_KEYWORDS):
        return "Reflection"
    
    # Default category is "Execution" (not "Other")
    return "Execution"

# Perform classification on all QA pairs
all_classifications = []

for i, positions in enumerate(all_newline_positions):
    tokens = all_tokens_list[i]
    classifications_for_qa = []

    # Skip if no paragraph breaks were found
    if not positions:
        all_classifications.append(classifications_for_qa)
        continue

    # Classify each paragraph segment
    for j, pos in enumerate(positions):
        # Define segment boundaries
        start_slice = pos + 1
        end_slice = positions[j+1] if j + 1 < len(positions) else len(tokens)

        # Extract and decode the text segment
        token_slice = tokens[start_slice:end_slice]
        text_segment = tokenizer.decode(
            tokenizer.convert_tokens_to_ids(token_slice), 
            skip_special_tokens=True
        ).strip()
        
        # Classify the segment
        category = classify_segment(text_segment)

        # Store the classification result
        classifications_for_qa.append({
            "position_in_tokens": pos,
            "category": category,
        })

    all_classifications.append(classifications_for_qa)

# Print summary of classification results
print("--- Summary of classification results for all samples ---")

for i, qa_results in enumerate(all_classifications):
    print(f"\n--- Analysis of QA Pair {i+1} ---")

    # Group token positions by category
    summary = {
        "Transition": [],
        "Reflection": [],
        "Execution": []
    }

    # Collect positions for each category
    for result in qa_results:
        category = result["category"]
        position = result["position_in_tokens"]
        if category in summary:
            summary[category].append(position)

    # Print formatted summary by category
    print(f"Transition positions: {summary['Transition']}")
    print(f"Reflection positions: {summary['Reflection']}")
    print(f"Execution positions: {summary['Execution']}")

print("\n" + "="*40)

--- Summary of classification results for all samples ---

--- Analysis of QA Pair 1 ---
Transition positions: [442, 1139, 1729, 1924, 2114, 2519, 2683, 2925, 3089, 3331, 3495, 3737, 3884, 4031, 4178]
Reflection positions: [240, 681, 800, 850, 1061, 1233, 1282, 1339, 1439, 1656, 1681, 1853, 1964, 2020, 2066, 2154, 2211, 2448, 2559, 2616, 2632, 2761, 2818, 2856, 2965, 3022, 3038, 3167, 3224, 3262, 3371, 3428, 3444, 3573, 3630, 3668, 3777, 3815, 3924, 3962, 4071, 4109]
Execution positions: [154, 329, 542, 724, 918, 986, 1077, 1106, 1355, 1380, 1401, 1494, 1522, 1548, 1570, 1604, 1642, 1763, 1772, 1807, 1842, 1862, 1891, 2196, 2227, 2252, 2273, 2297, 2322, 2348, 2368, 2402, 2437, 2457, 2486, 2601, 2717, 2726, 2803, 2904, 3007, 3123, 3132, 3209, 3310, 3413, 3529, 3538, 3615, 3716, 3863, 4010, 4157]

--- Analysis of QA Pair 2 ---
Transition positions: [287, 789]
Reflection positions: [233, 641, 889]
Execution positions: [124, 182, 219, 344, 358, 366, 384, 406, 419, 436, 448, 496, 510, 524, 

In [4]:
# Import hidden states module to extract model activations
import easysteer.hidden_states as hs

# Create a new LLM instance in reward mode
# Note: This allows us to extract hidden states rather than generating text
llm = LLM(
    model="/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/",
    task="reward",  # Use reward task to get hidden states
    tensor_parallel_size=1
)

# Extract hidden states for all tokens in the QA pairs
all_hidden_states, outputs = hs.get_all_hidden_states(llm, qa_pairs)

INFO 07-12 17:12:36 [config.py:1472] Using max model len 16384
INFO 07-12 17:12:36 [config.py:4615] Only "last" pooling supports chunked prefill and prefix caching; disabling both.
INFO 07-12 17:12:36 [llm_engine.py:232] Initializing a V0 LLM engine (v0.1.dev7499+g2a4b294) with config: model='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', speculative_config=None, tokenizer='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=16384, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, r

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 07-12 17:12:38 [default_loader.py:272] Loading weights took 0.95 seconds
INFO 07-12 17:12:39 [model_runner.py:1255] Model loading took 2.8793 GiB and 0.999760 seconds


Adding requests:   0%|          | 0/3 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/3 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

In [5]:
from easysteer.steer import StatisticalControlVector
import numpy as np

# Step 1: Collect all relevant hidden states by category
#-------------------------------------------------

# Initialize a dictionary to collect all hidden states by category and layer
collected_states = {
    "Transition": {},
    "Reflection": {},
    "Execution": {}
}

# Get the number of layers in the model
num_layers = len(all_hidden_states[0])

# Process each sample's classification results to collect hidden states
for sample_idx, qa_results in enumerate(all_classifications):
    for result in qa_results:
        category = result["category"]
        position = result["position_in_tokens"]

        # For each layer, collect hidden states for tokens of this category
        for layer_idx in range(num_layers):
            # Initialize empty list for this layer if not already present
            if layer_idx not in collected_states[category]:
                collected_states[category][layer_idx] = []
            
            # Extract hidden state from the model output
            token_hidden = all_hidden_states[sample_idx][layer_idx][position]
            
            # Convert to numpy for easier processing
            token_hidden = token_hidden.cpu().float().numpy()
            
            # Store the hidden state
            collected_states[category][layer_idx].append(token_hidden)


# Step 2: Calculate the average hidden state for each category at each layer
#-------------------------------------------------

# Initialize result dictionaries
average_vectors = {
    "Transition": {},
    "Reflection": {},
    "Execution": {}
}

# Track vector counts for metadata
vector_counts = {} 

# Process each category
for category, layer_data in collected_states.items():
    # Skip empty categories
    if not layer_data:
        print(f"Warning: No vectors found for category '{category}'. Skipping.")
        continue

    # Record how many vectors we're averaging for this category
    vector_counts[category] = len(layer_data.get(0, []))
    print(f"Calculating average for '{category}' using {vector_counts[category]} vectors.")

    # Calculate average for each layer
    for layer_idx, vectors in layer_data.items():
        # Calculate mean across all vectors for this layer
        mean_vector = np.mean(np.array(vectors), axis=0)
        average_vectors[category][layer_idx] = mean_vector


# Step 3: Package and export as GGUF files
#-------------------------------------------------

# Try to get model type or use a placeholder
try:
    model_type_str = llm.config.model_type
except (AttributeError, NameError):
    print("Warning: Could not determine model_type from `llm` object. Using a placeholder.")
    model_type_str = "qwen2"  # Default placeholder - adjust as needed for your model

# Create and export control vectors for each category
for category, directions in average_vectors.items():
    if not directions:
        continue  # Skip if no data available
    
    # Prepare metadata for the vector
    metadata = {
        "source": "Averaged from classified newline tokens",
        "num_vectors_averaged": vector_counts.get(category, 0)
    }

    # Create the control vector object
    control_vector = StatisticalControlVector(
        model_type=model_type_str,
        method="Average",
        directions=directions,
        metadata=metadata
    )

    # Export to GGUF format
    control_vector.export_gguf(f"{category.lower()}_avg_vector.gguf")

Calculating average for 'Transition' using 18 vectors.
Calculating average for 'Reflection' using 47 vectors.
Calculating average for 'Execution' using 106 vectors.
