In [1]:
import os
from vllm import LLM, SamplingParams
from vllm.steer_vectors.request import SteerVectorRequest, VectorConfig
from transformers import AutoTokenizer
import pynvml

def initialize_gpu_check():
    """初始化GPU检查环境"""
    try:
        pynvml.nvmlInit()
    except pynvml.NVMLError as e:
        print(f"无法初始化NVML库: {e}")
        return False
    return True

def get_available_gpu(mem_threshold=0.1, util_threshold=10):
    """获取第一个可用GPU的ID"""
    try:
        gpu_count = pynvml.nvmlDeviceGetCount()
        for i in range(gpu_count):
            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
            
            # 获取显存信息
            mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
            mem_used_percent = mem_info.used / mem_info.total
            
            # 获取GPU利用率
            util_info = pynvml.nvmlDeviceGetUtilizationRates(handle)
            gpu_util = util_info.gpu
            
            # 判断是否空闲
            if mem_used_percent < mem_threshold and gpu_util < util_threshold:
                return i
    except Exception as e:
        print(f"检查GPU时出错: {e}")
    return None

INFO 07-14 15:32:56 [__init__.py:244] Automatically detected platform cuda.


In [2]:
# 初始化GPU检查
if not initialize_gpu_check():
    print("使用默认设备配置")
    gpu_id = None  # 使用默认设备
else:
    # 查找可用GPU (显存使用率低于10%且利用率低于10%)
    gpu_id = get_available_gpu(mem_threshold=0.1, util_threshold=10)
    pynvml.nvmlShutdown()  # 关闭NVML连接

# 设置CUDA设备
if gpu_id is not None:
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
    print(f"自动选择GPU设备 {gpu_id}")
else:
    print("未找到可用GPU，使用默认设备配置")

# 设置环境变量
os.environ["VLLM_USE_V1"] = "0"

# 模型路径
model_path = "/data/zju-46/shenyl/hf/model/Qwen/Qwen2.5-1.5B-Instruct/"

# 初始化tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)

# 初始化LLM
llm = LLM(
    model=model_path,
    enable_steer_vector=True,
    enforce_eager=True,
    tensor_parallel_size=1
)

自动选择GPU设备 4
INFO 07-14 15:33:06 [config.py:841] This model supports multiple tasks: {'embed', 'reward', 'classify', 'generate'}. Defaulting to 'generate'.
INFO 07-14 15:33:06 [config.py:1472] Using max model len 32768
INFO 07-14 15:33:06 [llm_engine.py:232] Initializing a V0 LLM engine (v0.1.dev7499+g2a4b294) with config: model='/data/zju-46/shenyl/hf/model/Qwen/Qwen2.5-1.5B-Instruct/', speculative_config=None, tokenizer='/data/zju-46/shenyl/hf/model/Qwen/Qwen2.5-1.5B-Instruct/', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_pr

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 07-14 15:33:09 [default_loader.py:272] Loading weights took 1.06 seconds
INFO 07-14 15:33:09 [model_runner.py:1255] Model loading took 2.8876 GiB and 1.213015 seconds
INFO 07-14 15:33:11 [worker.py:295] Memory profiling takes 1.47 seconds
INFO 07-14 15:33:11 [worker.py:295] the current vLLM instance can use total_gpu_memory (47.40GiB) x gpu_memory_utilization (0.90) = 42.66GiB
INFO 07-14 15:33:11 [worker.py:295] model weights take 2.89GiB; non_torch_memory takes 0.06GiB; PyTorch activation peak memory takes 2.02GiB; the rest of the memory reserved for KV Cache is 37.69GiB.
INFO 07-14 15:33:11 [executor_base.py:115] # cuda blocks: 88220, # CPU blocks: 9362
INFO 07-14 15:33:11 [executor_base.py:120] Maximum concurrency for 32768 tokens per request: 43.08x
INFO 07-14 15:33:14 [llm_engine.py:430] init engine (profile, create kv cache, warmup model) took 5.26 seconds


In [3]:
# Define negative queries (that should be refused by the model)
negative = [
    "Fans of creature feature films have to endure a lot of awful movies lately. Blood Surf shamelessly joins the list of stupid, redundant pulp-horror titles about ridiculously big animals that want to turn the food chain upside down. Crocodiles are particularly successful as we already had to struggle our way through the abysmal 'Crocodile' (directed by a disappointing Tobe Hooper) and 'Lake Placid'. Blood Surf is every bit as bad as these other films and  on top of that  it likes to exaggerate tremendously. The saltwater-crocodile supposedly is 90 years old, over 30 ft long (!) and it kills for fun! During the film, he amuses himself by devouring a bunch of utterly stupid surfer-dudes & dudettes who came to seek new thrills by surfing in a shark-congested area. The only beautiful aspect about this film is the tropical location. Even though it's a completely inappropriate setting for a film like this, the lagoons and nature looks marvelous. Every other aspect is simply disastrous. There's a quite a bit of gore but it all looks fake and laughable. The dialogues are downright painful to listen to! You won't believe some of the lines these actors have to say! I know surfers are supposed to be a mentally underdeveloped group but I hope for their own sake they're not that stupid! Early in the film, one of the characters refers to Jaws as being a 'mechanical toy' but the croc here looks at least 10 times less real than Spielberg's great white shark. The visual effects in 'Blood Surf' are amateurish and the massacres fail to impress. I won't say too much about the acting since it's secondary in flicks like this. The girls look sexy in wet shirts and their boobs joyfully bounce while running away from the beast. You guessed right: Blood Surf is a very bad film. So bad it becomes fun again. But 'funny' for a whole other reason than James Hickox intended.",
    "You get 5 writers together, have each write a different story with a different genre, and then you try to make one movie out of it. It's action, it's adventure, it's sci-fi, it's western, it's a mess. Sorry, but this movie absolutely stinks. 4.5 is giving it an awefully high rating. That said, it's movies like this that make me think I could write movies, and I can barely write.",
    "I'm usually not one to say that a film is not worth watching, but this is certainly an extenuating circumstance. The only true upside to this film is Cornelia Sharpe, looking rather attractive, and the fact that this film is REALLY short.<br /><br />The plot in the film is unbelievably boring and goes virtually nowhere throughout the film. None of the characters are even remotely interesting and there is no reason to care about anyone. I'm not sure why on earth Sean Connery agreed to do this film, but he should have definitely passed on this one.<br /><br />The only reason I could see for seeing this film is if you are a die-hard Sean Connery fan and simply want to see everything he's done. Save this one for last though.<br /><br />Well, if you by some miracle end up seeing this despite my review (or any of the other reviews on this site), then I hope you enjoy it more than I did. Thanks for reading."
]

# Define positive queries (that should get helpful responses)
positive = [
    "This is a excellent start to the film career of Mickey Rooney. His talents here shows that a long career is ahead for him. The car and truck chase is exciting for the 1937 era. This start of the Andy Hardy series is an American treasure in my book. Spring Byington performance is excellent as usual. Please Mr Rooney or owners of the film rights, take a chance and get this produced on DVD. I think it would be a winner.",
    "Tintin and I recently aired as an episode of PBS's P.O.V. series. It's based on a taped interview of Georges Remi a.k.a. Herge, Tintin's creator, from 1971 in which in discusses his various experiences publishing his popular character, first in a Catholic newspaper, then in his own series of comic books. Awesome sweeping views of various comic pages and surreal images of Herge's dreams. I first encountered Tintin in the pages of Children's Digest at my local elementary school library reading The Secrets of the Unicorn. My mom later got a subscription to CD and I read the entire Red Rackham's Treasure every month in 1978. I remember seeing some Tintin comic books in a local book store after that but for some reason I didn't get any probably because I was 12 and I thought I was outgrowing them. I do have Breaking Free, a book written and drawn by J. Daniels, published in 1989, six years after Herge's death. Haven't read it yet. This film also covers the artist's personal life as when he left his first wife after his affair with a colorist in his employ (whom he later married). Her name is Fanny and she is interviewed here. If you love Tintin and his creator, this film is definitely worth a look. Update: 9/4/07-I've now read Breaking Free. Tintin and The Captain are the only regular characters that appear here and they are tailored to the anti-capitalist views of Mr. Daniels with Tintin portrayed as a rabble rouser with a chip on his shoulder who nevertheless cares for The Captain who he's staying with. The Captain here is just trying to make ends meet with a wife and daughter that he loves dearly. They and other construction workers vow to strike after a fellow employee dies from a faulty equipment accident. The whole thing takes place in England with working-class cockney accents intact. Not the kind of thing Herge would approve of but an interesting read nonetheless. Oh, yes, dog Snowy only appears in the top left corner of the cover (which has Tintin running over the police!) and the dedication page.",
    "Some thirty years ago, Author Numa Sadoul published a book length interview with the Belgian comic book artist Georges Remi (better known as Herge, the creator of Tintin). This movie catches up with Sadoul today as he recalls the interview, while we listen to the cassettes (Herge died in 1983) and see some old photos and footage of the man himself. Some parts of the interview were not published in the book at the request of Herge, and we now know these dealt with his separation from his wife, after he had an affair with one of his collaborators (who years later would become his second wife). An interesting thing the movie does not address well is the shift in the Tintin books from the early rightist and imperialist books (Tintin in the Congo, Tintin in the lands of the Soviets) to fairly anti-imperialist books just a few years later (The Blue Lotus). On the whole, I come out of this movie knowing a few more things about Herge and seeing him as a bit more unlikable than when I come in to the theater."
]

# Create properly formatted prompts for both harmful and normal queries
# The format follows the model's expected chat template
texts = [f"<|im_start|>user\n{x}please check the sentiment in the commitment.<|im_end|>\n<|im_start|>assistant\n" for x in negative+ positive]

# Generate responses for all queries
# This will validate that the model correctly refuses harmful queries
answers = llm.generate(
    texts,
    SamplingParams(
        temperature=0,        # Deterministic generation
        max_tokens=128,       # Limit response length
        skip_special_tokens=False,  # Keep special tokens intact
    ),
)
answers = [answer.outputs[0].text for answer in answers]

Adding requests:   0%|          | 0/6 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/6 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

In [4]:
# Generate responses for all queries
# This will validate that the model correctly refuses harmful queries
answers = llm.generate(
    texts,
    SamplingParams(
        temperature=0,        # Deterministic generation
        max_tokens=128,       # Limit response length
        skip_special_tokens=False,  # Keep special tokens intact
    ),
)
answers = [answer.outputs[0].text for answer in answers]
print(answers)

Adding requests:   0%|          | 0/6 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/6 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

['The sentiment in the commitment is negative. The reviewer expresses strong dissatisfaction with the film, describing it as "awful" and "bad" multiple times. They criticize the plot, acting, and visual effects, and even suggest that the film becomes "fun" due to its poor quality.', 'The sentiment expressed in the commitment is predominantly negative. The reviewer expresses strong dissatisfaction with the movie, describing it as "absolutely stinks" and giving it an "awfully high rating" of 4.5. They also state that it makes them question their own ability to write movies, indicating a very negative opinion.', 'The sentiment in the commitment is negative. The reviewer expresses disappointment and dissatisfaction with the film, criticizing its plot, characters, and overall quality. They recommend against watching it unless one is a die-hard fan of Sean Connery.', "The sentiment in the commitment is positive. The reviewer expresses enthusiasm and appreciation for the film, highlighting it

In [5]:
tokens = tokenizer.tokenize(texts[0], add_special_tokens=True)
print(tokens)

['<|im_start|>', 'user', 'Ċ', 'Fans', 'Ġof', 'Ġcreature', 'Ġfeature', 'Ġfilms', 'Ġhave', 'Ġto', 'Ġendure', 'Ġa', 'Ġlot', 'Ġof', 'Ġawful', 'Ġmovies', 'Ġlately', '.', 'ĠBlood', 'ĠSurf', 'Ġshame', 'lessly', 'Ġjoins', 'Ġthe', 'Ġlist', 'Ġof', 'Ġstupid', ',', 'Ġredundant', 'Ġpulp', '-h', 'or', 'ror', 'Ġtitles', 'Ġabout', 'Ġridiculously', 'Ġbig', 'Ġanimals', 'Ġthat', 'Ġwant', 'Ġto', 'Ġturn', 'Ġthe', 'Ġfood', 'Ġchain', 'Ġupside', 'Ġdown', '.', 'ĠCro', 'cod', 'iles', 'Ġare', 'Ġparticularly', 'Ġsuccessful', 'Ġas', 'Ġwe', 'Ġalready', 'Ġhad', 'Ġto', 'Ġstruggle', 'Ġour', 'Ġway', 'Ġthrough', 'Ġthe', 'Ġab', 'ys', 'mal', "Ġ'", 'Cro', 'codile', "'", 'Ġ(', 'direct', 'ed', 'Ġby', 'Ġa', 'Ġdisappointing', 'ĠT', 'obe', 'ĠHo', 'oper', ')', 'Ġand', "Ġ'", 'Lake', 'ĠPl', 'acid', "'.", 'ĠBlood', 'ĠSurf', 'Ġis', 'Ġevery', 'Ġbit', 'Ġas', 'Ġbad', 'Ġas', 'Ġthese', 'Ġother', 'Ġfilms', 'Ġand', 'ĠÂ', 'ĸ', 'Ġon', 'Ġtop', 'Ġof', 'Ġthat', 'ĠÂ', 'ĸ', 'Ġit', 'Ġlikes', 'Ġto', 'Ġexagger', 'ate', 'Ġtremendously', '.', 'ĠThe', 

In [7]:
from easysteer.hidden_states import get_all_hidden_states
llm = LLM(
    model="/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/",
    task="reward",  # Use reward task to get hidden states
    tensor_parallel_size=1
)
all_hidden_states, outputs = get_all_hidden_states(llm, texts)

INFO 07-14 15:35:41 [config.py:1472] Using max model len 16384
INFO 07-14 15:35:41 [config.py:4615] Only "last" pooling supports chunked prefill and prefix caching; disabling both.
INFO 07-14 15:35:41 [llm_engine.py:232] Initializing a V0 LLM engine (v0.1.dev7499+g2a4b294) with config: model='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', speculative_config=None, tokenizer='/data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=16384, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properti

INFO 07-14 15:35:42 [model_runner.py:1223] Starting to load model /data/zju-46/shenyl/hf/model/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B/...


Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]


INFO 07-14 15:35:48 [default_loader.py:272] Loading weights took 5.37 seconds
INFO 07-14 15:35:49 [model_runner.py:1255] Model loading took 2.9110 GiB and 5.437843 seconds


Adding requests:   0%|          | 0/6 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/6 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

In [8]:
from easysteer.steer import extract_diffmean_control_vector, StatisticalControlVector

# Extract direction vector at position -1
# DiffMean computes the difference between harmful and normal hidden states
control_vector = extract_diffmean_control_vector(
    all_hidden_states=all_hidden_states,  # 3D list [sample][layer][token]
    positive_indices=[0, 1, 2],     # Indices of harmful examples (should be refused)
    negative_indices=[3, 4, 5],     # Indices of normal examples (should be answered)
    model_type="qwen2.5",          # Model type
    token_pos=-1,                  # Target the last token position
    normalize=True                 # Normalize the resulting vector
)
control_vector.export_gguf("diffmean-1.gguf")  # Save vector to file

# Extract direction vector at position -2
control_vector = extract_diffmean_control_vector(
    all_hidden_states=all_hidden_states,
    positive_indices=[0, 1, 2],
    negative_indices=[3, 4, 5],
    model_type="qwen2.5",
    token_pos=-2,                  # Second-to-last token
    normalize=True
)
control_vector.export_gguf("diffmean-2.gguf")

# Extract direction vector at position -3
control_vector = extract_diffmean_control_vector(
    all_hidden_states=all_hidden_states,
    positive_indices=[0, 1, 2],
    negative_indices=[3, 4, 5],
    model_type="qwen2.5",
    token_pos=-3,                  # Third-to-last token
    normalize=True
)
control_vector.export_gguf("diffmean-3.gguf")

# Extract direction vector at position -4
control_vector = extract_diffmean_control_vector(
    all_hidden_states=all_hidden_states,
    positive_indices=[0, 1, 2],
    negative_indices=[3, 4, 5],
    model_type="qwen2.5",
    token_pos=-4,                  # Fourth-to-last token
    normalize=True
)
control_vector.export_gguf("diffmean-4.gguf")

Computing DiffMean directions:   0%|          | 0/28 [00:00<?, ?it/s]

Computing DiffMean directions:   0%|          | 0/28 [00:00<?, ?it/s]

Computing DiffMean directions:   0%|          | 0/28 [00:00<?, ?it/s]

Computing DiffMean directions:   0%|          | 0/28 [00:00<?, ?it/s]