In [1]:
import os

os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

from huggingface_hub import notebook_login
notebook_login()  # 在 notebook 弹窗中填入你的 HF 访问令牌

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import os, torch, gc
from torch import autocast
from diffusers import DiffusionPipeline, StableDiffusion3Pipeline, BitsAndBytesConfig, DDPMScheduler
from transformers import CLIPTextModel, CLIPTokenizer, T5TokenizerFast
from peft import LoraConfig, get_peft_model

device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = StableDiffusion3Pipeline.from_pretrained(
    "stabilityai/stable-diffusion-3.5-medium",
    torch_dtype=torch.bfloat16,           
    cache_dir="./models/huggingface"
).to(device)
pipe.enable_attention_slicing()
print("已启用注意力切片。")

In [None]:
import datetime, os, torch, time
from torch.amp import autocast
from tqdm.auto import tqdm

# 设置参数
prompt = "a photo of a bydxt Pavilion in the snow"  # 根据您的需要调整提示词
guidance_scale = 5
num_inference_steps = 50
height = 512
width = 512
num_images_total = 100  # 需要生成的图片总数

# 创建FID数据集保存目录
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
fid_dataset_dir = os.path.join(output_dir, f"fid_dataset_{timestamp}")
os.makedirs(fid_dataset_dir, exist_ok=True)

print(f"开始生成FID数据集，总计图片数: {num_images_total}，保存目录: {fid_dataset_dir}")

# 设置基本种子
base_seed = 42
images_generated = 0
start_time = time.time()

# 使用tqdm创建进度条
progress_bar = tqdm(total=num_images_total, desc="生成FID数据集")

# 批量生成图片
try:
    while images_generated < num_images_total:
        # 每次生成一张图片，使用不同种子
        seed = base_seed + images_generated
        generator = torch.Generator(device="cuda").manual_seed(seed)
        
        with autocast(device_type="cuda", dtype=torch.bfloat16):
            image = pipe(
                prompt=prompt,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                height=height,
                width=width,
                generator=generator,
            ).images[0]
        
        # 保存图片，使用序号命名
        img_filename = f"fid_image_{images_generated:04d}.png"
        img_path = os.path.join(fid_dataset_dir, img_filename)
        image.save(img_path)
        
        # 更新计数和进度条
        images_generated += 1
        progress_bar.update(1)
        
        # 显示进度和预计剩余时间
        elapsed = time.time() - start_time
        images_per_sec = images_generated / elapsed
        remaining_time = (num_images_total - images_generated) / images_per_sec if images_per_sec > 0 else 0
        
        progress_bar.set_postfix({
            "速度": f"{images_per_sec:.2f}张/秒",
            "剩余时间": f"{remaining_time/60:.1f}分钟"
        })
        
        # 每10张图片清理一次CUDA缓存，防止内存泄漏
        if images_generated % 10 == 0:
            torch.cuda.empty_cache()
    
except KeyboardInterrupt:
    print(f"\n生成过程被中断。已生成 {images_generated} 张图片。")
except Exception as e:
    print(f"\n生成过程出错: {str(e)}。已生成 {images_generated} 张图片。")
finally:
    progress_bar.close()
    
    # 输出完成信息
    total_time = time.time() - start_time
    if images_generated > 0:
        avg_time_per_image = total_time / images_generated
        print(f"FID数据集生成完成! 共 {images_generated} 张图片")
        print(f"总用时: {total_time:.1f}秒 (平均 {avg_time_per_image:.2f}秒/张)")
    else:
        print(f"未能生成任何图片。总运行时间: {total_time:.1f}秒")
    print(f"保存路径: {fid_dataset_dir}")
    
    # 保存数据集信息
    with open(os.path.join(fid_dataset_dir, "dataset_info.txt"), "w") as f:
        f.write(f"生成日期: {datetime.datetime.now()}\n")
        f.write(f"提示词: {prompt}\n")
        f.write(f"参数: guidance_scale={guidance_scale}, steps={num_inference_steps}\n")
        f.write(f"图片数量: {images_generated}\n")
        f.write(f"总生成时间: {total_time:.1f}秒\n")

In [None]:
#下载cifar
from datasets import load_dataset
import os
from PIL import Image

# 1. 本地保存目录
output_dir = "./cifar10-train"
os.makedirs(output_dir, exist_ok=True)

# 2. 下载并加载 CIFAR-10 训练集
ds = load_dataset("cifar10", split="train")

# 3. 遍历并保存每张图像
for idx, example in enumerate(ds):
    # example["img"] 是 PIL Image 对象也可能是 numpy array
    img = example["img"]
    if not isinstance(img, Image.Image):
        img = Image.fromarray(img)
    # CIFAR-10 原始尺寸 32×32，无需调整
    fname = f"{idx:05d}.png"
    img.save(os.path.join(output_dir, fname))

print(f"共保存 {len(ds)} 张训练图像到 {output_dir}")

计算FID

In [3]:
import os
import torch
from cleanfid import fid
os.environ['CLEANFID_CACHE'] = "./fid_cache"
output_dir = "./diff_attention"
# 创建目录路径
fid_dataset_dir = os.path.join(output_dir, "fid_raw_dataset_20250531_0042")  # 您生成的图像路径
reference_dataset = "cifar10-train"   # 使用预处理好的cifar10验证集 "cifar10-train"

# 计算FID
fid_score = fid.compute_fid(
    fid_dataset_dir,  # 生成图像路径 
    reference_dataset,  # 参考数据集
    mode="clean",  # 使用官方实现的标准化预处理
    device="cuda" if torch.cuda.is_available() else "cpu",
    batch_size=32,
    verbose=True
)

print(f"FID分数: {fid_score:.4f}")

compute FID between two folders
Found 100 images in the folder ./diff_attention/fid_raw_dataset_20250531_0042


FID fid_raw_dataset_20250531_0042 : 100%|██████████| 4/4 [00:08<00:00,  2.17s/it]


Found 50000 images in the folder cifar10-train


FID cifar10-train : 100%|██████████| 1563/1563 [00:55<00:00, 28.14it/s]


FID分数: 306.3826


In [None]:
import os
from datasets import load_dataset
from PIL import Image
import torch
from cleanfid import fid

gen_dir = "./dreambooth-sd35/fid_dataset_20250530_1824"  # 你已经有的生成图
ref_dir = "./datasets/tju-pavilion-ref"                         # 准备存放参考图

os.makedirs(ref_dir, exist_ok=True)

dataset_id = "zjake/Memorial-Pavilion-of-TJU"
ds = load_dataset(dataset_id, split="train", cache_dir="./datasets")

for idx, example in enumerate(ds):
    img = example["image"]
    # 如果不是 PIL.Image，把它转成
    if not isinstance(img, Image.Image):
        img = Image.fromarray(img)
    img = img.convert("RGB")
    # 保存成 PNG 或 JPEG
    fname = f"{idx:05d}.png"
    img.save(os.path.join(ref_dir, fname))

print(f"共导出 {len(ds)} 张参考图到 {ref_dir}")


os.environ['CLEANFID_CACHE'] = "./fid_cache"
fid_score = fid.compute_fid(
    gen_dir,            # 你的生成图文件夹
    ref_dir,            # 你刚导出的参考图文件夹
    mode="clean",
    device="cuda" if torch.cuda.is_available() else "cpu",
    batch_size=32,
    verbose=True
)
print(f"FID 分数: {fid_score:.4f}")

In [None]:
#流式加载和保存
import os
from datasets import load_dataset
from PIL import Image
import torch
from cleanfid import fid

# —— 配置参数 —— #
style_name     = "Impressionism"                        # 要导出的风格
limit          = 200                                     # 最多导出多少张参考图
hf_dataset_id  = "huggan/wikiart"
wikiart_folder = "./datasets/wikiart-impressionism"               # 本地保存参考图的目录
gen_folder     = "fid_dataset_LoRA_Impressionism20250530_2031"  # 你的生成图目录

os.makedirs(wikiart_folder, exist_ok=True)

# —— 1. 找到 style 对应的索引 —— #
# 我们先加载一次 non-streaming，以获取 features 元信息
ds_info  = load_dataset(hf_dataset_id, split="train", streaming=True, cache_dir="./hf_cache")
style_idx = ds_info.features["style"].names.index(style_name)

# —— 2. 流式遍历并按风格筛选导出 —— #
ds_stream = load_dataset(hf_dataset_id, split="train", streaming=True, cache_dir="./hf_cache")
count = 0
for ex in ds_stream:
    if ex["style"] == style_idx:
        img = ex["image"]
        if not isinstance(img, Image.Image):
            img = Image.open(img).convert("RGB")
        fname = f"{count:05d}.png"
        img.save(os.path.join(wikiart_folder, fname))
        count += 1
        if count >= limit:
            break

print(f"已保存 {count} 张“{style_name}”风格的参考图到 {wikiart_folder}")

# —— 3. 计算 FID —— #
os.environ['CLEANFID_CACHE'] = "./fid_cache"
fid_score = fid.compute_fid(
    gen_folder,       # 生成图目录
    wikiart_folder,   # 参考图目录
    mode="clean",
    device="cuda" if torch.cuda.is_available() else "cpu",
    batch_size=32,
    verbose=True
)
print(f"FID ({style_name}): {fid_score:.4f}")


CLIP SCORE

In [5]:
import os
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

# —— 1. 配置 —— #
image_dir = "./diff_attention/fid_raw_dataset_20250531_0042"
# 请准备一个与图像一一对应的 prompt 列表，长度要与 image_list 相同
prompt = [
    "a beautiful bottle on a wooden table"
]
device = "cuda" if torch.cuda.is_available() else "cpu"

# —— 2. 加载 CLIP 模型与预处理器 —— #
model_name = "openai/clip-vit-base-patch32"
model     = CLIPModel.from_pretrained(model_name,cache_dir="models/huggingface").to(device).eval()
processor = CLIPProcessor.from_pretrained(model_name,cache_dir="models/huggingface")

# —— 3. 扫描图像文件 —— #
# 只读取常见图片后缀，可视需要扩展
image_files = sorted([
    os.path.join(image_dir, fn)
    for fn in os.listdir(image_dir)
    if fn.lower().endswith((".png", ".jpg", ".jpeg"))
])

prompts = prompt*len(image_files)
assert len(image_files) == len(prompts), "图像数量和 prompts 数量必须一致！"

# —— 4. 批量处理 —— #
batch_size = 16  # 可根据显存调整
all_scores = []

for i in range(0, len(image_files), batch_size):
    batch_imgs = [ Image.open(f).convert("RGB") for f in image_files[i : i+batch_size] ]
    batch_text = prompts[i : i+batch_size]

    # 预处理并移动到设备
    inputs = processor(text=batch_text,
                       images=batch_imgs,
                       return_tensors="pt",
                       padding=True).to(device)

    with torch.no_grad():
        outputs    = model(**inputs)
        img_embeds = outputs.image_embeds    # (B, D)
        txt_embeds = outputs.text_embeds     # (B, D)

        # 归一化
        img_embeds = img_embeds / img_embeds.norm(dim=-1, keepdim=True)
        txt_embeds = txt_embeds / txt_embeds.norm(dim=-1, keepdim=True)

        # 计算逐对余弦相似度
        scores = (img_embeds * txt_embeds).sum(dim=-1)  # (B,)
        all_scores.append(scores.cpu())

# —— 5. 汇总并输出 —— #
all_scores = torch.cat(all_scores, dim=0).numpy()
for idx, score in enumerate(all_scores):
    print(f"Image {idx:03d}  CLIPScore: {score:.4f}")

print(f"\n平均 CLIPScore: {all_scores.mean():.4f}")


Image 000  CLIPScore: 0.3128
Image 001  CLIPScore: 0.3155
Image 002  CLIPScore: 0.3162
Image 003  CLIPScore: 0.2746
Image 004  CLIPScore: 0.3092
Image 005  CLIPScore: 0.3100
Image 006  CLIPScore: 0.3203
Image 007  CLIPScore: 0.3103
Image 008  CLIPScore: 0.3042
Image 009  CLIPScore: 0.3315
Image 010  CLIPScore: 0.3284
Image 011  CLIPScore: 0.3010
Image 012  CLIPScore: 0.3175
Image 013  CLIPScore: 0.2872
Image 014  CLIPScore: 0.3243
Image 015  CLIPScore: 0.2938
Image 016  CLIPScore: 0.3304
Image 017  CLIPScore: 0.3180
Image 018  CLIPScore: 0.3190
Image 019  CLIPScore: 0.3026
Image 020  CLIPScore: 0.3180
Image 021  CLIPScore: 0.3144
Image 022  CLIPScore: 0.3036
Image 023  CLIPScore: 0.2914
Image 024  CLIPScore: 0.3338
Image 025  CLIPScore: 0.3120
Image 026  CLIPScore: 0.3119
Image 027  CLIPScore: 0.3433
Image 028  CLIPScore: 0.3196
Image 029  CLIPScore: 0.3197
Image 030  CLIPScore: 0.3065
Image 031  CLIPScore: 0.2991
Image 032  CLIPScore: 0.3345
Image 033  CLIPScore: 0.3229
Image 034  CLI