In [1]:
# Import necessary libraries
import os
from vllm import LLM, SamplingParams
from vllm.steer_vectors.request import SteerVectorRequest, VectorConfig
from transformers import AutoTokenizer

# Set environment variables
os.environ["VLLM_USE_V1"] = "0"
os.environ["CUDA_VISIBLE_DEVICES"] = "5"

# Initialize LLM with steering vector capability
llm = LLM(
    model="/data/zju-46/shenyl/hf/model/Qwen/Qwen2.5-1.5B-Instruct/",
    enable_steer_vector=True,
    enforce_eager=True,
    tensor_parallel_size=1
)

INFO 07-17 01:38:00 [__init__.py:244] Automatically detected platform cuda.
INFO 07-17 01:38:14 [config.py:841] This model supports multiple tasks: {'generate', 'classify', 'reward', 'embed'}. Defaulting to 'generate'.
INFO 07-17 01:38:14 [config.py:1472] Using max model len 32768
INFO 07-17 01:38:15 [llm_engine.py:232] Initializing a V0 LLM engine (v0.1.dev7499+g2a4b294) with config: model='/data/zju-46/shenyl/hf/model/Qwen/Qwen2.5-1.5B-Instruct/', speculative_config=None, tokenizer='/data/zju-46/shenyl/hf/model/Qwen/Qwen2.5-1.5B-Instruct/', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 07-17 01:38:18 [default_loader.py:272] Loading weights took 0.93 seconds
INFO 07-17 01:38:18 [model_runner.py:1255] Model loading took 2.8876 GiB and 1.088785 seconds
INFO 07-17 01:38:20 [worker.py:295] Memory profiling takes 1.43 seconds
INFO 07-17 01:38:20 [worker.py:295] the current vLLM instance can use total_gpu_memory (47.44GiB) x gpu_memory_utilization (0.90) = 42.69GiB
INFO 07-17 01:38:20 [worker.py:295] model weights take 2.89GiB; non_torch_memory takes 0.06GiB; PyTorch activation peak memory takes 2.02GiB; the rest of the memory reserved for KV Cache is 37.72GiB.
INFO 07-17 01:38:20 [executor_base.py:115] # cuda blocks: 88295, # CPU blocks: 9362
INFO 07-17 01:38:20 [executor_base.py:120] Maximum concurrency for 32768 tokens per request: 43.11x
INFO 07-17 01:38:25 [llm_engine.py:430] init engine (profile, create kv cache, warmup model) took 6.33 seconds


In [2]:
# Create a simple example prompt for testing
example = "<|im_start|>user\nWhat are some activities our family can do in the city this weekend?<|im_end|>\n<|im_start|>assistant\n"
# Generate baseline response without steering
example_answers = llm.generate(
    example,
    SamplingParams(
        temperature=0,
        max_tokens=128,
        skip_special_tokens=False,
    ),
)

# Display baseline response
print("=====Baseline=====")
print(example_answers[0].outputs[0].text)



Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

=====Baseline=====
There are many activities you can do in the city this weekend. Here are some ideas:

1. Visit a museum or art gallery. Many cities have museums and galleries that showcase local and international art. You can also check if there are any special exhibitions or events happening.

2. Take a walk or bike ride in a park or nature reserve. Many cities have beautiful parks and nature reserves that offer a peaceful escape from the hustle and bustle of city life.

3. Attend a concert or theater performance. Many cities have theaters and concert halls that host a variety of performances throughout the year.

4. Explore a local market or farmers' market


In [3]:
# For children

sv_request = SteerVectorRequest(
    steer_vector_name="children", 
    steer_vector_id=1,
    steer_vector_local_path="style-probe.gguf",
    scale=-1.0, 
    target_layers=list(range(28)), 
    prefill_trigger_tokens=[-1], 
    generate_trigger_tokens=[-1], 
    normalize=True,
    algorithm="direct",
)

output = llm.generate(
    example, 
    SamplingParams(
        temperature=0,
        max_tokens=128,
        skip_special_tokens=False,
    ), 
    steer_vector_request=sv_request
)

print("=====Children=====")
print(output[0].outputs[0].text)

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

=====Children=====
There are many fun activities you can do in the city this weekend. Here are a few ideas:

  1. Visit a museum or zoo: Many cities have museums and zoos that are open to the public. This is a great way to learn about different animals and learn about science and history.
  2. Go to a park: Many cities have beautiful parks that are perfect for a family day out. You can play on the playground, go for a hike, or have a picnic.
  3. Take a walk or bike ride: If you live in a city with a lot of parks or green spaces, you


In [4]:
# For adult

sv_request = SteerVectorRequest(
    steer_vector_name="adult", 
    steer_vector_id=2,
    steer_vector_local_path="style-probe.gguf",
    scale=1.5, 
    target_layers=list(range(28)), 
    prefill_trigger_tokens=[-1], 
    generate_trigger_tokens=[-1], 
    normalize=True,
    algorithm="direct",
)

output = llm.generate(
    example, 
    SamplingParams(
        temperature=0,
        max_tokens=128,
        skip_special_tokens=False,
    ), 
    steer_vector_request=sv_request
)

print("=====Adult=====")
print(output[0].outputs[0].text)

Adding requests:   0%|          | 0/1 [00:00<?, ?it/s]

Processed prompts:   0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, 

=====Adult=====
To create a memorable weekend in the city, consider the following activities tailored for a family setting:

1. **Dine at a Michelin-starred restaurant**: Indulge in a gourmet meal at a restaurant that has earned a Michelin star. This will be a special occasion, perfect for a family dinner.

2. **Private wine tasting**: Enjoy a private wine tasting session at a sommelier-led event. This is a great way to learn about wine and enjoy a unique experience.

3. **City tour with a guide**: Book a city tour with a local guide who can provide insights into the city's history, culture,
