In [5]:
from openai import OpenAI
import os
import dotenv
client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=os.environ.get("OPENROUTER_API_KEY"),
)

In [6]:
def generate_image(prompt: str, image_url):
    completion = client.chat.completions.create(
      model="google/gemini-2.5-flash-image",
      messages=[
        {
          "role": "user",
          "content": [
            {
              "type": "text",
              "text": prompt
            },
            {
              "type": "image_url", 
              "image_url": {
                "url": image_url
              }
            }] if image_url is not None else [{
              "type": "text",
              "text": prompt
            }],
        }
      ]
    )
    return completion.choices[0].message.images[0]["image_url"]["url"]

In [7]:
import base64
from io import BytesIO
from PIL import Image
def gen_images():
    images = {}
    img_url = generate_image("generate an image of a person very happy, face close to camera but the image shows not only face", None)
    b64_data = img_url.split(",")[1]
    image_data = base64.b64decode(b64_data)
    image = Image.open(BytesIO(image_data))
    images["happy"] = image

    sad_img_url = generate_image("keep most of the image unchanged, but make the emotion look very sad", img_url)
    b64_data = sad_img_url.split(",")[1] 
    image_data = base64.b64decode(b64_data)
    image = Image.open(BytesIO(image_data))
    images["sad"] = image

    angry_img_url = generate_image("keep most of the image unchanged, but make the emotion look very angry", img_url)
    b64_data = angry_img_url.split(",")[1]
    image_data = base64.b64decode(b64_data)
    image = Image.open(BytesIO(image_data))
    images["angry"] = image

    fearful_img_url = generate_image("keep most of the image unchanged, but make the emotion look very fearful", img_url)
    b64_data = fearful_img_url.split(",")[1]
    image_data = base64.b64decode(b64_data)
    image = Image.open(BytesIO(image_data))
    images["fearful"] = image

    return images

In [8]:
images = gen_images()

In [51]:
from multiprocessing import Pool
from tqdm import tqdm

def wrapper(_):
    return gen_images()

with Pool(processes=10) as pool:
    results = list(tqdm(pool.imap_unordered(wrapper, range(100)), total=100))

100%|██████████| 100/100 [08:04<00:00,  4.85s/it]


In [52]:
import pickle

with open("generated_images.pkl", "wb") as f:
    pickle.dump(results, f) 

In [54]:
def encode_image(image: Image.Image) -> str:
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
    return "data:image/png;base64," + img_str

In [None]:
from openai import OpenAI
def caption(images):
  completion = client.chat.completions.create(
    extra_body={
        "order": ["parasail/fp8"], 
        "allow_fallbacks": False
    },
    model="qwen/qwen3-vl-235b-a22b-instruct",
    messages=[
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "Describe all these images with one prompt, and use [emotion] as placeholder for the emotion shown on the person's face."
          }] + 
          [{
            "type": "image_url",
            "image_url": {
              "url": encode_image(image)
            } 
          } for image in images.values()]
      }
    ]
  )
  return completion.choices[0].message.content

In [62]:
prompts = [caption(results[i]) for i in range(len(results))] 

In [63]:
dataset = [] 
for i in range(len(results)): 
    dataset.append({
        "happy_image": results[i]["happy"],
        "sad_image": results[i]["sad"],
        "angry_image": results[i]["angry"],
        "fearful_image": results[i]["fearful"],
        "prompt": prompts[i]
    }) 

In [65]:
from datasets import Dataset
ds = Dataset.from_list(dataset)
ds.push_to_hub("weathon/emotion_bias_dataset")

Uploading the dataset shards:   0%|          | 0/2 [00:00<?, ? shards/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/3 [00:00<?, ?ba/s]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/3 [00:00<?, ?ba/s]

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

CommitInfo(commit_url='https://huggingface.co/datasets/weathon/emotion_bias_dataset/commit/f92d05e67732cabda12371bd4b3d36192ed2be5c', commit_message='Upload dataset', commit_description='', oid='f92d05e67732cabda12371bd4b3d36192ed2be5c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/weathon/emotion_bias_dataset', endpoint='https://huggingface.co', repo_type='dataset', repo_id='weathon/emotion_bias_dataset'), pr_revision=None, pr_num=None)

In [66]:
from hpsv3 import HPSv3RewardInferencer

inferencer = HPSv3RewardInferencer(device='cuda')

ImportError: cannot import name 'VideoInput' from 'transformers.image_utils' (/home/wg25r/miniconda/envs/neg/lib/python3.10/site-packages/transformers/image_utils.py)

In [None]:
import torch
with torch.no_grad():
  image_paths = ["sad.png", "happy.png"]
  prompts = [
    "A close-up headshot of a man sitting in a coffee shop, expressing sad.",
    "A close-up headshot of a man sitting in a coffee shop, expressing sad." 
  ]  

  # Get preference scores
  rewards = inferencer.reward(prompts=prompts, image_paths=image_paths)
  scores = [reward[0].item() for reward in rewards]  # Extract mu values
  print(f"Image scores: {scores}")

In [1]:
from hpsv3 import HPSv3RewardInferencer

# Initialize the model
inferencer = HPSv3RewardInferencer(device='cuda')

Flash Attention is not installed. Falling to SDPA.


`Qwen2VLRotaryEmbedding` can now be fully parameterized by passing the model config through the `config` argument. All other arguments will be removed in v4.46


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Some weights of Qwen2VLRewardModelBT were not initialized from the model checkpoint at Qwen/Qwen2-VL-7B-Instruct and are newly initialized: ['rm_head.0.bias', 'rm_head.0.weight', 'rm_head.3.bias', 'rm_head.3.weight', 'rm_head.5.bias', 'rm_head.5.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
import hpsv2
def score_image(sample):
  with torch.no_grad():
    prompts = []
    images = []
    for emotion in ["sad", "angry", "fearful"]:
        prompt = sample["prompt"].replace("[emotion]", emotion)
        images.append(sample["happy_image"])
        images.append(sample[f"{emotion}_image"])
        prompts.append(prompt)
        prompts.append(prompt)

    rewards = inferencer.reward(prompts=prompts, image_paths=images)
    scores = [reward[0].item() for reward in rewards]
    sample["hpsv3"] = {}
    for i, emotion in enumerate(["sad", "angry", "fearful"]):
      happy_score = scores[i*2]
      emotion_score = scores[i*2 + 1]
      
      sample["hpsv3"][f"{emotion}_hpsv3"] = emotion_score 
      sample["hpsv3"][f"{emotion}_hpsv3_happy_image"] = happy_score 
  return sample["hpsv3"]

In [5]:
from datasets import load_dataset
ds = load_dataset("weathon/emotion_bias_dataset")

README.md:   0%|          | 0.00/437 [00:00<?, ?B/s]

data/train-00000-of-00002.parquet:   0%|          | 0.00/286M [00:00<?, ?B/s]

data/train-00001-of-00002.parquet:   0%|          | 0.00/293M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100 [00:00<?, ? examples/s]

In [17]:
# ds = ds["train"]

In [18]:
import tqdm
import torch
returns = []
for sample in tqdm.tqdm(ds):
    score = score_image(sample)
    returns.append(score)

100%|██████████| 100/100 [04:22<00:00,  2.62s/it]


In [20]:
import json
with open("hpsv3_scores.json", "w") as f:
    json.dump(returns, f)