In [None]:
import random
from openai import OpenAI
from pydantic import BaseModel

class Prompts(BaseModel):
    long_description: str
    short_description: str

client = OpenAI(
    base_url="http://127.0.0.1:8000/v1",
    api_key="token-abc123"
)

def get_prompt(original_prompt, artifacts):
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": f'Write an image description based on {original_prompt}. The picture has effects of {artifacts}. '
                                    'Specifically, these effects are prioritized over the original subject. Make the effects concrete, for example if the attribute says dark '
                                    'describe it as a stormy night, and describe the grainy attribute as rough, digitized film grain. You should provide two responses, one long one and the '
                                    'other one has the entire description must be under 50 words and contain only the image statement. (i.e. no "here it is", "this is the description", etc.) /no_think'}
    ]
    response = client.chat.completions.parse(
        model="Qwen/Qwen3-VL-30B-A3B-Instruct",
        messages=messages,
        response_format=Prompts,
        temperature=0.001
    )
    return response.choices[0].message.parsed


In [2]:
from datasets import load_dataset
coco = load_dataset("raniatze/coco_stuff_train2017_captioned", split="train[0:1000]")

In [3]:
import pandas as pd
import re
from PIL import Image
import random
df = pd.read_csv("gen_rules.csv")
df.columns = df.columns.str.strip()
df['Dimension'] = df['Dimension'].ffill()

df['dim_key'] = df['Dimension'].apply(lambda x: re.search(r'\((.*?)\)', x).group(1) if re.search(r'\((.*?)\)', x) else x)

guide = {
    dim_key: {
        int(row['Score']): str(row['Description']).strip()
        for _, row in group.iterrows()
    }
    for dim_key, group in df.groupby('dim_key')
}

In [4]:
negative_prompts = {
    "symmetry": "symmetrical, high symmetry",
    "object pairing": "serenity, dynamism, harmony, resulting, overall coordination, visual unity, complementary relationships",
    "main object": "big noticeable main object",
    "richness": "many objects and small details, visually full or detailed",
    "background": "beautiful background",
    "clarity": "clear, sharpen, clarify",
    "color brightness": "bright color",
    "color aesthetic": "beautiful, nature, normal colors",
    "lighting distinction": "pronounced lighting, shadows, reflections, refractions",
    "lighting aesthetic": "pronounced lighting, shadows, reflections, refractions.",
    "emotion": "happy, joyful, cheerful, warmth, positive emotions",
    "detail refinement": "refined details",
    "detail realism": "photorealistic, authentic"
}

In [5]:
def re_prompt(sample):
  original_prompt = sample["text"]
  applied_keys = random.sample(list(guide.keys()), k=random.randint(1, 4))
  artifacts = [guide[key] for key in applied_keys]
  desc = []
  selected = {}
  for i, j in zip(artifacts, applied_keys):
    selected_key = random.choice([i for i in list(i.keys()) if i<0])
    selected[j] = selected_key
    desc.append(i[selected_key])
  desc = "\n".join(desc)
  prompt = get_prompt(original_prompt, desc)
  sample["disorted_long_prompt"] = prompt.long_description
  sample["disorted_short_prompt"] = prompt.short_description
  sample["selected"] = selected
  sample["desc"] = desc
  selected = [i for i in selected.keys() if selected[i] is not None]
  sample["negative_prompt"] = ", ".join(negative_prompts[i] for i in selected)

  return {
      "original_prompt": original_prompt,
      "disorted_long_prompt": sample["disorted_long_prompt"],
      "disorted_short_prompt": sample["disorted_short_prompt"],
      "selected": sample["selected"],
      "desc": sample["desc"],
      "negative_prompt": sample["negative_prompt"],
  }

In [6]:
re_prompt(coco[0])

{'original_prompt': 'A bicycle replica with a clock as the front wheel.',
 'disorted_long_prompt': 'A surreal, grainy digital film image captures a bicycle replica frozen in a void of absolute darkness. The front wheel is a hollow clock face, its hands frozen at midnight, its gears barely visible in the static blur. No light source exists—no glow, no reflection, no depth. The entire scene is rendered in coarse, digitized film grain, as if scanned from a damaged, forgotten reel. The bicycle is so faint, so peripheral, it seems to dissolve into the texture of the image itself, its form barely distinguishable from the noise. There is no center, no focus—only the faintest suggestion of structure in a world without illumination or presence.',
 'disorted_short_prompt': 'A bicycle replica with a clock as its front wheel floats in a grainy, shadowless void—no light, no depth, no focus. The form is faint, digitized, dissolving into static, barely visible, lost in endless dark noise.',
 'selecte

In [7]:
dataset = coco.map(re_prompt, num_proc=10) 

  StockPickler.save(self, obj, save_persistent_id)
  StockPickler.save(self, obj, save_persistent_id)


Map (num_proc=10):   0%|          | 0/1000 [00:00<?, ? examples/s]

Process ForkPoolWorker-4:
Process ForkPoolWorker-6:
Process ForkPoolWorker-5:
Process ForkPoolWorker-1:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-10:
  File "/home/wg25r/.local/lib/python3.10/site-packages/multiprocess/process.py", line 314, in _bootstrap
    self.run()
  File "/home/wg25r/.local/lib/python3.10/site-packages/multiprocess/process.py", line 314, in _bootstrap
    self.run()
  File "/home/wg25r/.local/lib/python3.10/site-packages/multiprocess/process.py", line 314, in _bootstrap
    self.run()
Process ForkPoolWorker-7:
  File "/home/wg25r/.local/lib/python3.10/site-packages/multiprocess/process.py", line 314, in _bootstrap
    self.run()
Process ForkPoolWorker-8:
Process ForkPoolWorker-9:
  File "/home/wg25r/.local/lib/python3.10/site-packages/multiprocess/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/wg25r/.l

TimeoutError: 

In [None]:
coco.push_to_hub("weathon/anti_aesthetics_dataset")