# SDXL Dreambooth Training With Kohya_SS SD-Scripts - No-config version
# Run each cell in sequence

In [None]:
# 1. INSTALL DEPENDENCIES

!wget https://raw.githubusercontent.com/yushan777/kohya_ss_vastai/main/colors.py -q
from colors import bcolors
from IPython.display import clear_output

print(f"{bcolors.BOLD}{bcolors.GREEN}Installing prerequisites...{bcolors.ENDC}")
# !sudo apt-get update -y && apt-get install -y -qq libgl1
# !sudo dpkg --configure -a
# !sudo apt update -y && sudo apt install -y -qq python3-tk
# clear_output(wait=True)

print(f"{bcolors.BOLD}{bcolors.GREEN}Installing main dependencies - this will take a few minutes...{bcolors.ENDC}")
%pip install -U "huggingface_hub[cli]"
%pip install gdown
!chmod +x setup.sh
!./setup.sh

print(f"{bcolors.BOLD}{bcolors.GREEN}Finished installing dependencies.{bcolors.ENDC}")



In [None]:
# 2. Configure Accelerate With defaults and bf16 precision
!accelerate config default --mixed_precision "bf16"

In [None]:
# 3. VARIABLES
# The following variables can be changed according to your project preferences
token_word = "ohwx"
class_word = "person" 
training_repeats = 40
training_root_dir = "training_images" 
regularization_root_dir = "reg_images"
models_dir = "training_models"
project_name = "myProject"
output_dir = "trained_models"

In [None]:
# 4. CREATE DATASET FOLDERS & MODEL FOLDER

# ================================================================
# your training and reg image subfolders will be named according to what is set in previous cell: 
# so if you stuck with the defaults, they would be for example :
# "training_images/40_ohwx person"
# "reg_images/1_person"
training_dir = f'{training_root_dir}/{training_repeats}_{token_word} {class_word}'
reg_dir = f'{regularization_root_dir}/1_{class_word}'

# back to parent folder
%cd /workspace/kohya_ss

import os
if os.path.exists(training_dir) == False:
  os.makedirs(training_dir)
  print(f'{training_dir} Created.')
else:
  print(f'{training_dir} already exists.')

if os.path.exists(reg_dir) == False:
  os.makedirs(reg_dir)
  print(f'{reg_dir} Created.')
else:
  print(f'{reg_dir} already exists.')

if os.path.exists(models_dir) == False:
  os.makedirs(models_dir)
  print(f'{models_dir} Created.')
else:
  print(f'{models_dir} already exists.')

if os.path.exists(output_dir) == False:
  os.makedirs(output_dir)
  print(f'{output_dir} Created.')
else:
  print(f'{output_dir} already exists.')

# Create prompt file use for samples during training. 
# first 2 prompts are the same but one has CFG=1 and the other CFG=7
# if subject likeness is strong and regular even at CFG=1 then it is an good indicator that it is overfitted
# third prompt is to check how well it responds to styling. 
lines = [
    f"a photo of {token_word} {class_word} --w 1024 --h 1024 --l 7, --s 20 --d 1234567890\n",
    f"a photo of {token_word} {class_word} --w 1024 --h 1024 --l 1, --s 20 --d 1234567890\n",
    f"a portrait of {token_word} {class_word} in the style of Rembrandt --w 1024 --h 1024 --l 6, --s 20 --d 1234567890\n"
]

# Create and write to the text file
with open("prompt.txt", "w") as file:
    file.writelines(lines)

print("File 'prompt.txt' has been created.")

In [None]:
# 5. Download and unzip Training Images to Training Image folder
import shutil
import zipfile

# back to parent folder
%cd /workspace/kohya_ss

# =================================================================
# Use YOUR OWN Google Drive file IDs for your image zips
training_images_file_ID = '1BIixbqMYW5xxxxxxxxxxxxxxxxxxx' 
# =================================================================

# download training images from google drive, rename to train.zip
!gdown '{training_images_file_ID}' -O train.zip

# move train.zip to training images sub folder
shutil.move('train.zip', f'{training_dir}')

# extract zip contents to current folder and delete zip
%cd $training_dir
with zipfile.ZipFile('train.zip', 'r') as train_ref:
    train_ref.extractall()

# delete zip
!rm train.zip

# back to parent folder
%cd /workspace/kohya_ss

## For Regularization Images, you have 2 choices: 
1) Download your own regularization images (google drive) : use Cell 6a. <br/>
or
2) Download a set (if class is appropriate) from my github repo. : use Cell 6b. <br/> 
Sets available : person_ddim, woman_ddim, man_ddim

In [None]:
# 6a. Download YOUR OWN Reg Images (Google Drive)
import shutil
import zipfile

# back to parent folder
%cd /workspace/kohya_ss

# =================================================================
# Use YOUR OWN Google Drive file IDs for your image zips
reg_images_file_ID = '1CIqOhLBfzxxxxxxxxxxxxxxxxxx'
# =================================================================

# download reg images from google drive, rename to reg.zip
!gdown '{reg_images_file_ID}' -O reg.zip

# move train.zip to reg images sub folder
shutil.move('reg.zip', f'{reg_dir}')

# extract zip contents to current folder and delete zip
%cd $reg_dir
with zipfile.ZipFile('reg.zip', 'r') as reg_ref:
    reg_ref.extractall()

# delete zip
!rm reg.zip

# back to parent folder
%cd /workspace/kohya_ss


In [None]:
# 6b. Download one of my pre-made set of regularization images
!pip install github-clone
import shutil
import os

# back to parent folder
%cd /workspace/kohya_ss

print("This might take a few minutes.")
orig_dir_name = ''
if class_word == 'person':
    !ghclone https://github.com/yushan777/SD-Regularization-Images/tree/main/sdxl_person_ddim_1000
    orig_dir_name = 'sdxl_person_ddim_1000'
elif class_word == 'woman':
    !ghclone https://github.com/yushan777/SD-Regularization-Images/tree/main/sdxl_woman_ddim_1000
    orig_dir_name = 'sdxl_woman_ddim_1000'
elif class_word == 'man':
    !ghclone https://github.com/yushan777/SD-Regularization-Images/tree/main/sdxl_man_ddim_1000
    orig_dir_name = 'sdxl_man_ddim_1000'
    

if len(orig_dir_name) > 0:
    # first make sure reg dir is empty, by emptying it
    for filename in os.listdir(reg_dir):
        file_path = os.path.join(reg_dir, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))
            

    # now move images to reg folder
    print("Moving images to {reg_dir}")
    file_names = os.listdir(orig_dir_name)
        
    for file_name in file_names:
        shutil.move(os.path.join(orig_dir_name, file_name), reg_dir)

    # back to parent folder
    %cd /workspace/kohya_ss

    print(f"{bcolors.GREEN}Finished downloading regularization images.{bcolors.ENDC}")
else:
    print(f"{bcolors.RED}Your class {class_word} does not match any available pre-made sets. Nothing downloaded.{bcolors.ENDC}")


In [None]:
# 7. Check Dataset Folders
# ====================================================
# remove any non-image files & warn if any additional folders exist
import os
import shutil
from glob import glob
folder_path = f'{training_dir}'

# Get a list of all files in the folder
files = glob(folder_path + '/*', recursive=False)

# Iterate over the files and delete the ones that are not JPG or PNG
for file_path in files:
    if not (file_path.endswith('.jpg') or file_path.endswith('.png')):
        if os.path.isfile(file_path):
            os.remove(file_path)
        elif os.path.isdir(file_path):
            print(f'{bcolors.BOLD}{bcolors.RED} Unexpected folder: \'{file_path}\' was found in training images folder path.  Check and remove it.{bcolors.ENDC}')

# force remove hidden .ipynb_checkpoints folder in images folder. 
if os.path.exists(f'{folder_path}/.ipynb_checkpoints'):
    shutil.rmtree(f'{folder_path}/.ipynb_checkpoints')

# ====================================================
# delete any non-image files & warn if any additional folders

folder_path = f'{reg_dir}'

# Get a list of all files in the folder
files = glob(folder_path + '/*', recursive=False)

# Iterate over the files and delete the ones that are not JPG or PNG
for file_path in files:
    if not (file_path.endswith('.jpg') or file_path.endswith('.png')):
        if os.path.isfile(file_path):
            os.remove(file_path)
        elif os.path.isdir(file_path):
            print(f'{bcolors.BOLD}{bcolors.RED} Unexpected folder: \'{file_path}\' was found in training images folder path.  Check and remove it.{bcolors.ENDC}')

# force remove hidden .ipynb_checkpoints folder in images folder. 
if os.path.exists(f'{folder_path}/.ipynb_checkpoints'):
    shutil.rmtree(f'{folder_path}/.ipynb_checkpoints')

In [None]:
# 8. Download SDXL 1.0 (0.9VAE) base model (the one with 0.9vae baked in instead of 1.0vae
from huggingface_hub import hf_hub_download

# Specify the repository ID, the filename, and the desired download directory
repo_id = 'stabilityai/stable-diffusion-xl-base-1.0'  
filename = 'sd_xl_base_1.0_0.9vae.safetensors'  
download_directory = 'training_models'  

# Download the file to the training_models dir, as a regular file (not symlink)
base_model = hf_hub_download(repo_id=repo_id, filename=filename, local_dir=download_directory)
print(base_model)


In [None]:
# 9. START TRAINING

max_steps = 2000
learning_rate = 1e-5 
learning_rate_te1 = 1e-7
learning_rate_te2 = 1e-7
save_every_n_steps = 1000

print(base_model)
!accelerate launch --num_cpu_threads_per_process=2 "sd-scripts/sdxl_train.py" \
  --pretrained_model_name_or_path="{base_model}" \
  --train_data_dir="{training_root_dir}" \
  --reg_data_dir="{regularization_root_dir}" \
  --output_dir="{output_dir}" \
  --output_name="{project_name}" \
  --save_model_as="safetensors" \
  --train_batch_size=2 \
  --max_train_steps={max_steps} \
  --save_every_n_steps={save_every_n_steps} \
  --optimizer_type="adafactor" \
  --optimizer_args scale_parameter=False relative_step=False warmup_init=False \
  --xformers \
  --cache_latents \
  --lr_scheduler="constant_with_warmup" \
  --lr_warmup_steps=100 \
  --learning_rate="{learning_rate}" \
  --learning_rate_te1="{learning_rate_te1}" \
  --learning_rate_te2="{learning_rate_te2}" \
  --max_grad_norm=0.0 \
  --train_text_encoder \
  --resolution="1024,1024" \
  --save_precision="bf16" \
  --save_n_epoch_ratio=1 \
  --max_data_loader_n_workers=1 \
  --persistent_data_loader_workers \
  --mixed_precision="bf16" \
  --full_bf16 \
  --logging_dir="logs" \
  --log_prefix="last" \
  --gradient_checkpointing \
  --min_snr_gamma=0 \
  --noise_offset=0.0357 \
  --sample_sampler="euler_a" \
  --sample_prompts="prompt.txt" \
  --sample_every_n_steps=100

# Training finished...
# List all checkpoints
print("Trained models:\n")
!ls trained_models -1

In [None]:
# 10. (Optional) Extract LoRA from checkpoint

trained_model_name='myProject.safetensors' #this must match one of your trained checkpoints
lora_name='xxxxxxxxxxxxx-loRA-64.safetensors' #this can be anything. 

!python3 sd-scripts/networks/extract_lora_from_models.py \
--sdxl \
--model_org='{base_model}' \
--model_tuned='{output_dir}/{trained_model_name}' \
--save_to='{output_dir}/{lora_name}' \
--save_precision='bf16' \
--min_diff=0.001 \
--dim=64 \
--device='cuda'

# more info about arguments at:
# https://github.com/kohya-ss/sd-scripts/blob/95ae56bd22c285ccb2fe5fca96d92f39842bb99b/networks/extract_lora_from_models.py#L211


In [None]:
# 11. (Optional) Upload to Hugging Face 
# you will need an existing repo that you have access rights to and your hugging face access token

!huggingface-cli login --token hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

%cd output # or a subfolder of output/

from huggingface_hub import HfApi
# Variables
filename = 'xxxxxxxxx.safetensors'
target_subfolder = 'xxx'
target_filename = 'xxxxxxxxx.safetensors'

api = HfApi()
api.upload_file(
    path_or_fileobj=filename,  # Assuming the file is in the current working directory
    path_in_repo=f"{target_subfolder}/{filename}",  # create the path in the repository
    repo_id="username/repo",
    repo_type="model"
    )

In [None]:
# Empty System Trash
# If you have deleted models etc or large downloads, they will still be in the trash
# you will need to run this to release storage space
rm -rf ~/.local/share/Trash/*