<a href="https://colab.research.google.com/github/tztechno/cc_archive/blob/main/AvatarArtist1_2D_Domain_Transfer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
# success 12/24 14:32
# output styled was moved to google drive manually


## **Step1**
# **AvatarArtist1: 2D Domain Transfer**

https://kumapowerliu.github.io/AvatarArtist/

Step1:¬†https://www.kaggle.com/code/stpeteishii/avatarartist1-2d-domain-transfer

Step2:¬†https://www.kaggle.com/code/stpeteishii/avatarartist2-next3d-4d-gan-fine-tuning

Step3:¬†https://www.kaggle.com/code/stpeteishii/avatarartist3-triplane-decomposition

Step4:¬†https://www.kaggle.com/code/stpeteishii/avatarartist4-diffusion-transformer-training

Step5:¬†https://www.kaggle.com/code/stpeteishii/avatarartist5-avatar-generation-inference

In [None]:
from google.colab import userdata
hf_token0=userdata.get('secret_hf_token')

In [None]:
!pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118
!pip install diffusers transformers accelerate
!pip install controlnet-aux opencv-python pillow
!pip install mediapipe==0.10.9

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

from google.colab import drive
drive.mount('/content/drive')

import os
gd_path='/content/drive/MyDrive/your_folder/pins_dataset'
#os.makedirs('/content/pins_dataset', exist_ok=True)

In [None]:
import shutil
import os
import random

paths=[]
for dirname, _, filenames in os.walk(gd_path):
    for filename in filenames:
        paths+=[(os.path.join(dirname, filename))]
print(paths[0:6])
random.shuffle(paths)

In [None]:
os.makedirs("input_images", exist_ok=True)
for path in paths[0:100]:
    shutil.copy(path, "/content/input_images")

for dirname, _, filenames in os.walk('/content/input_images'):
    for filename in filenames:
        print(filename)

In [None]:
"""
AvatarArtist: 2D Domain Transfer Script
Converts real-life images into specific styles using
Stable Diffusion + ControlNet + SDEdit.

[IMPORTANT] Hugging Face Token Setup:
Method 1: Environment Variable
  export HF_TOKEN="your_token_here"

Method 2: Command Line
  huggingface-cli login

Method 3: Specify in Code
  artist = AvatarArtist2D(hf_token="your_token_here")
"""

import os
import torch
import numpy as np
from PIL import Image
from pathlib import Path
from typing import Optional, List, Tuple
import cv2
from diffusers import (
    StableDiffusionControlNetPipeline,
    ControlNetModel,
    DDIMScheduler,
    UniPCMultistepScheduler
)
from diffusers.utils import load_image
from controlnet_aux import OpenposeDetector, CannyDetector

# MediaPipe is optional
try:
    import mediapipe as mp
    MEDIAPIPE_AVAILABLE = True
except ImportError:
    MEDIAPIPE_AVAILABLE = False
    print("Warning: MediaPipe not available. Using ControlNet only for pose detection.")


class AvatarArtist2D:
    """Main class for 2D domain transfer."""

    def __init__(
        self,
        model_id: str = "runwayml/stable-diffusion-v1-5",
        controlnet_model: str = "lllyasviel/sd-controlnet-openpose",
        device: str = "cuda",
        dtype: torch.dtype = torch.float16,
        use_canny: bool = False,
        hf_token: Optional[str] = hf_token0
    ):
        """
        Args:
            model_id: Path or ID for the Stable Diffusion model.
                - "runwayml/stable-diffusion-v1-5" (Recommended: No token required)
                - "stabilityai/stable-diffusion-2-1" (May require a token)
                - "stabilityai/stable-diffusion-xl-base-1.0" (SDXL)
            controlnet_model: Path or ID for the ControlNet model.
            device: Computing device to use.
            dtype: Data type.
            use_canny: Use Canny edge detection (simpler and lightweight).
            hf_token: Hugging Face token (optional).
        """
        self.device = device
        self.dtype = dtype
        self.use_canny = use_canny
        self.hf_token = hf_token or os.environ.get("HF_TOKEN")

        print(f"Using Model: {model_id}")
        print("Loading models...")

        # Select ControlNet based on the base model
        if "stable-diffusion-v1-5" in model_id or "v1-5" in model_id:
            if use_canny:
                controlnet_model = "lllyasviel/sd-controlnet-canny"
            else:
                controlnet_model = "lllyasviel/sd-controlnet-openpose"
        elif "stable-diffusion-2" in model_id:
            if use_canny:
                controlnet_model = "thibaud/controlnet-sd21-canny-diffusers"
            else:
                controlnet_model = "thibaud/controlnet-sd21-openpose-diffusers"

        print(f"ControlNet: {controlnet_model}")

        # Load ControlNet
        try:
            self.controlnet = ControlNetModel.from_pretrained(
                controlnet_model,
                torch_dtype=dtype,
                token=self.hf_token
            )
            print(f"‚úì ControlNet loaded successfully")
        except Exception as e:
            print(f"‚ö† Error: Failed to load {controlnet_model}")
            print(f"  Details: {e}")
            print("Attempting fallback to Canny model...")
            try:
                fallback_model = "lllyasviel/sd-controlnet-canny"
                self.controlnet = ControlNetModel.from_pretrained(
                    fallback_model,
                    torch_dtype=dtype,
                    token=self.hf_token
                )
                self.use_canny = True
                print(f"‚úì Using Canny ControlNet as fallback.")
            except Exception as e2:
                raise Exception(f"Failed to load any ControlNet model: {e2}")

        # Stable Diffusion pipeline setup
        try:
            self.pipe = StableDiffusionControlNetPipeline.from_pretrained(
                model_id,
                controlnet=self.controlnet,
                torch_dtype=dtype,
                safety_checker=None,
                token=self.hf_token
            )
            print(f"‚úì Stable Diffusion loaded successfully")
        except Exception as e:
            error_msg = str(e)
            if "gated" in error_msg.lower() or "token" in error_msg.lower():
                raise Exception(
                    f"\n{'='*60}\n"
                    f"üîê Authentication Error: This model requires a Hugging Face token.\n"
                    f"\nInstructions:"
                    f"\n1. Get a token at: https://huggingface.co/settings/tokens"
                    f"\n2. Set it using one of these methods:"
                    f"\n   a) export HF_TOKEN='your_token'"
                    f"\n   b) huggingface-cli login"
                    f"\n   c) artist = AvatarArtist2D(hf_token='your_token')"
                    f"\n\nAlternatively, use a model that doesn't require a token:"
                    f"\n   model_id='runwayml/stable-diffusion-v1-5'"
                    f"\n{'='*60}\n"
                )
            raise

        # Scheduler configuration (SDEdit compatible)
        self.pipe.scheduler = DDIMScheduler.from_config(
            self.pipe.scheduler.config
        )

        self.pipe = self.pipe.to(device)
        self.pipe.enable_attention_slicing()

        # Control image processor
        if self.use_canny:
            print("Initializing Canny detector...")
            self.processor = CannyDetector()
        else:
            print("Loading Openpose processor...")
            try:
                self.processor = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
            except Exception as e:
                print(f"Warning: OpenPose load failed: {e}")
                print("Falling back to Canny...")
                self.processor = CannyDetector()
                self.use_canny = True

        # MediaPipe Face Detection (Optional: for more detailed control)
        self.face_mesh = None
        if MEDIAPIPE_AVAILABLE:
            try:
                mp_face_mesh = mp.solutions.face_mesh
                self.face_mesh = mp_face_mesh.FaceMesh(
                    static_image_mode=True,
                    max_num_faces=1,
                    min_detection_confidence=0.5
                )
                print("MediaPipe face detection enabled.")
            except Exception as e:
                print(f"MediaPipe initialization failed: {e}")
                self.face_mesh = None

        print("Initialization complete!")

    def extract_pose_landmarks(self, image: Image.Image) -> Image.Image:
        """Extract control image from input (OpenPose or Canny)."""
        control_image = self.processor(image)
        return control_image

    def extract_face_landmarks(self, image: Image.Image) -> Optional[np.ndarray]:
        """Extract face landmarks using MediaPipe."""
        if self.face_mesh is None:
            return None

        try:
            image_np = np.array(image)
            results = self.face_mesh.process(cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR))

            if results.multi_face_landmarks:
                landmarks = results.multi_face_landmarks[0]
                h, w = image_np.shape[:2]
                points = np.array([
                    [lm.x * w, lm.y * h]
                    for lm in landmarks.landmark
                ])
                return points
        except Exception as e:
            print(f"Warning: Face landmark extraction failed: {e}")
        return None

    def apply_sdedit(
        self,
        image: Image.Image,
        prompt: str,
        control_image: Image.Image,
        noise_strength: float = 0.5,
        controlnet_conditioning_scale: float = 1.0,
        guidance_scale: float = 7.5,
        num_inference_steps: int = 50,
        seed: Optional[int] = None
    ) -> Image.Image:
        """Perform domain transfer applying SDEdit logic."""
        if seed is not None:
            generator = torch.Generator(device=self.device).manual_seed(seed)
        else:
            generator = None

        output = self.pipe(
            prompt=prompt,
            image=control_image,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
            controlnet_conditioning_scale=controlnet_conditioning_scale,
            generator=generator,
        )
        return output.images[0]

    def process_single_image(
        self,
        image_path: str,
        output_path: str,
        style_prompt: str,
        noise_strength: float = 0.5,
        controlnet_strength: float = 1.0,
        guidance_scale: float = 7.5,
        num_steps: int = 50,
        seed: Optional[int] = None
    ) -> bool:
        """Process a single image."""
        try:
            image = load_image(image_path)
            image = image.resize((512, 512))

            print(f"  Extracting control image...")
            control_image = self.extract_pose_landmarks(image)

            print(f"  Transforming style...")
            output_image = self.apply_sdedit(
                image=image,
                prompt=style_prompt,
                control_image=control_image,
                noise_strength=noise_strength,
                controlnet_conditioning_scale=controlnet_strength,
                guidance_scale=guidance_scale,
                num_inference_steps=num_steps,
                seed=seed
            )

            output_image.save(output_path)
            print(f"  Saved to: {output_path}")
            return True
        except Exception as e:
            print(f"  Error: {str(e)}")
            return False

    def process_batch(
        self,
        input_dir: str,
        output_dir: str,
        style_prompt: str,
        noise_strength: float = 0.5,
        controlnet_strength: float = 1.0,
        guidance_scale: float = 7.5,
        num_steps: int = 50,
        extensions: List[str] = [".jpg", ".jpeg", ".png"],
        seed: Optional[int] = None
    ):
        """Process all images in a folder."""
        os.makedirs(output_dir, exist_ok=True)
        input_path = Path(input_dir)
        image_files = []
        for ext in extensions:
            image_files.extend(list(input_path.glob(f"*{ext}")))
            image_files.extend(list(input_path.glob(f"*{ext.upper()}")))

        print(f"\nProcessing {len(image_files)} images")
        print(f"Style: {style_prompt}")
        print(f"Noise Strength: {noise_strength}")
        print(f"ControlNet Strength: {controlnet_strength}\n")

        success_count = 0
        for i, img_path in enumerate(image_files, 1):
            print(f"[{i}/{len(image_files)}] Processing: {img_path.name}")
            output_path = os.path.join(output_dir, f"styled_{img_path.name}")

            if self.process_single_image(
                str(img_path), output_path, style_prompt,
                noise_strength, controlnet_strength,
                guidance_scale, num_steps, seed
            ):
                success_count += 1

        print(f"\nFinished: Transformed {success_count}/{len(image_files)} images.")


def main():
    """Main execution entry point."""
    INPUT_DIR = "./input_images"
    OUTPUT_DIR = "./output_styled"

    # Model Selection
    # Option 1: SD 1.5 (Recommended - Fast, no token required)
    MODEL_ID = "runwayml/stable-diffusion-v1-5"

    # Option 2: SD 2.1 (Higher quality, may require token)
    # MODEL_ID = "stabilityai/stable-diffusion-2-1"

    # Hugging Face Token (if required)
    # Method 1: Environment variable export HF_TOKEN="your_token"
    # Method 2: Specify directly here
    HF_TOKEN = None  # e.g., "hf_xxxxxxxxxxxxx"

    STYLE_PROMPTS = {
        "pixar": "a 3D render of a face in Pixar animation style, high quality, detailed, professional lighting",
        "anime": "anime style portrait, cel shaded, vibrant colors, expressive eyes, detailed",
        "lego": "LEGO minifigure face, plastic texture, simplified features, toy style",
        "oil_painting": "oil painting portrait, classical style, rich colors, brushstrokes visible",
        "cartoon": "cartoon style portrait, bold lines, vibrant colors, simplified features"
    }

    STYLE = "pixar"
    NOISE_STRENGTH = 0.4 #reduced
    CONTROLNET_STRENGTH = 0.8
    GUIDANCE_SCALE = 7.5
    NUM_STEPS = 50
    SEED = 42
    USE_CANNY = False # True: Canny (Lightweight), False: OpenPose (High Accuracy)

    try:
        artist = AvatarArtist2D(
            model_id=MODEL_ID,
            device="cuda" if torch.cuda.is_available() else "cpu",
            use_canny=USE_CANNY,
            hf_token=HF_TOKEN
        )
    except Exception as e:
        print(f"\n‚ùå Initialization Error: {e}")
        print("\nüí° Troubleshooting:")
        print("  1. Login to Hugging Face: huggingface-cli login")
        print("  2. Or set environment variable: export HF_TOKEN='your_token'")
        print("  3. Or use a token-free model: MODEL_ID='runwayml/stable-diffusion-v1-5'")
        return

    artist.process_batch(
        input_dir=INPUT_DIR,
        output_dir=OUTPUT_DIR,
        style_prompt=STYLE_PROMPTS[STYLE],
        noise_strength=NOISE_STRENGTH,
        controlnet_strength=CONTROLNET_STRENGTH,
        guidance_scale=GUIDANCE_SCALE,
        num_steps=NUM_STEPS,
        seed=SEED
    )

if __name__ == "__main__":
    main()

In [None]:
import os
import matplotlib.pyplot as plt
from PIL import Image

def show_image(image_dir):
    image_paths = [
        os.path.join(image_dir, f)
        for f in sorted(os.listdir(image_dir))
        if f.lower().endswith((".png", ".jpg", ".jpeg"))
    ][:6]
    fig, axes = plt.subplots(2, 3, figsize=(12, 8))
    axes = axes.flatten()
    for ax, img_path in zip(axes, image_paths):
        img = Image.open(img_path)
        ax.imshow(img)
        ax.axis("off")
        ax.set_title(os.path.basename(img_path), fontsize=9)
    for ax in axes[len(image_paths):]:
        ax.axis("off")
    plt.tight_layout()
    plt.show()

In [None]:
show_image('input_images')

In [None]:
show_image('output_styled')