In [None]:
# SDXL Dreambooth Training With Kohya_SS SD-Scripts - No-config version
# Run each cell in sequence

In [None]:
# 1. INSTALL DEPENDENCIES


!wget https://raw.githubusercontent.com/yushan777/kohya_ss_vastai/main/colors.py -q
from colors import bcolors
from IPython.display import clear_output
!apt-get update -y && apt-get install -y libgl1
clear_output(wait=True)

print(f"{bcolors.BOLD}{bcolors.GREEN}Installing dependencies - this will take a few minutes...{bcolors.ENDC}")
print(f"{bcolors.BOLD}{bcolors.GREEN}Ignore any warnings about running pip as the 'root'{bcolors.ENDC}")
!pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 --root-user-action=ignore # no_verify leave this to specify not checking this a verification stage
!pip install xformers==0.0.20 bitsandbytes==0.35.0 --root-user-action=ignore
!pip install tensorboard==2.12.3 tensorflow==2.12.0 wheel --root-user-action=ignore
!pip install protobuf==3.20.3 --root-user-action=ignore
!pip install tensorrt --root-user-action=ignore
!pip install gdown --root-user-action=ignore
!pip install accelerate==0.19.0 --root-user-action=ignore
!pip install transformers==4.30.2 --root-user-action=ignore
!pip install diffusers[torch]==0.18.2 --root-user-action=ignore
!pip install ftfy==6.1.1 --root-user-action=ignore
!pip install albumentations==1.3.0 --root-user-action=ignore
!pip install opencv-python==4.7.0.68 --root-user-action=ignore
!pip install einops==0.6.0 --root-user-action=ignore
!pip install pytorch-lightning==1.9.0 --root-user-action=ignore
!pip install bitsandbytes==0.35.0 --root-user-action=ignore
!pip install tensorboard==2.12.0 --root-user-action=ignore
!pip install safetensors==0.3.1 --root-user-action=ignore
!pip install altair==4.2.2 --root-user-action=ignore
!pip install easygui==0.98.3 --root-user-action=ignore
!pip install toml==0.10.2 --root-user-action=ignore
!pip install voluptuous==0.13.1 --root-user-action=ignore
!pip install huggingface-hub==0.15.1 --root-user-action=ignore
!pip install invisible-watermark==0.2.0  --root-user-action=ignore
!pip install open-clip-torch==2.20.0 --root-user-action=ignore
!pip install protobuf==3.20.3 --root-user-action=ignore
!pip install -e . --root-user-action=ignore
clear_output(wait=True)
print(f"{bcolors.BOLD}{bcolors.GREEN}Finished installing dependencies.{bcolors.ENDC}")



In [None]:
# 2. VARIABLES
# The following variables can be changed according to your project preferences
token_word = "ohwx"
class_word = "person" 
training_repeats = 40 
training_root_dir = "training_images" 
regularization_root_dir = "reg_images"

project_name = "myProject"
output_dir = "trained_models"

In [None]:
# 3. CREATE DATASET FOLDERS

# ================================================================
# your training and reg image subfolders will be named according to what is set in previous cell: 
# so if you stuck with the defaults, they would be for example :
# "training_images/40_ohwx person"
# "reg_images/1_person"
training_dir = f'{training_root_dir}/{training_repeats}_{token_word} {class_word}'
reg_dir = f'{regularization_root_dir}/1_{class_word}'

# back to parent folder
%cd /workspace/sd-scripts

import os
if os.path.exists(training_dir) == False:
  os.makedirs(training_dir)
  print(f'{training_dir} Created.')
else:
  print(f'{training_dir} already exists.')

if os.path.exists(reg_dir) == False:
  os.makedirs(reg_dir)
  print(f'{reg_dir} Created.')
else:
  print(f'{reg_dir} already exists.')


In [None]:
# 4. Download and unzip images to appropriate folders
import shutil
import zipfile

# =================================================================
# Use YOUR OWN Google Drive file IDs for your image zips
training_images_file_ID = '1BIixbqMYW5-eOvTFC_RaZyWmlnBzey-0' 
reg_images_file_ID = '1CIqOhLBfzrl5OkvGgbp2XK_njdFh5bMw'
# =================================================================

# download training images from google drive, rename to train.zip
!gdown '{training_images_file_ID}' -O train.zip

# move train.zip to training images sub folder
shutil.move('train.zip', f'{training_dir}')

# extract zip contents to current folder and delete zip
%cd $training_dir
with zipfile.ZipFile('train.zip', 'r') as train_ref:
    train_ref.extractall()

# delete zip
!rm train.zip

# back to parent folder
%cd /workspace/sd-scripts

# download the reg images from google drive, rename to regs.zip
!gdown '{reg_images_file_ID}' -O regs.zip
# move regs.zip to reg_images folder
shutil.move('regs.zip', f'{reg_dir}')

# extract zip contents to current folder and delete zip
%cd $reg_dir
with zipfile.ZipFile('regs.zip', 'r') as zip_ref:
    zip_ref.extractall()

# delete zip
!rm regs.zip

# back to parent folder
%cd /workspace/sd-scripts

In [None]:
# 5. Check Dataset Folders
# ====================================================
# remove any non-image files & warn if any additional folders exist
import os
import shutil
from glob import glob
folder_path = f'{training_dir}'

# Get a list of all files in the folder
files = glob(folder_path + '/*', recursive=False)

# Iterate over the files and delete the ones that are not JPG or PNG
for file_path in files:
    if not (file_path.endswith('.jpg') or file_path.endswith('.png')):
        if os.path.isfile(file_path):
            os.remove(file_path)
        elif os.path.isdir(file_path):
            print(f'{bcolors.BOLD}{bcolors.RED} Unexpected folder: \'{file_path}\' was found in training images folder path.  Check and remove it.{bcolors.ENDC}')

# force remove hidden .ipynb_checkpoints folder in images folder. 
if os.path.exists(f'{folder_path}/.ipynb_checkpoints'):
    shutil.rmtree(f'{folder_path}/.ipynb_checkpoints')

# ====================================================
# delete any non-image files & warn if any additional folders

folder_path = f'{reg_dir}'

# Get a list of all files in the folder
files = glob(folder_path + '/*', recursive=False)

# Iterate over the files and delete the ones that are not JPG or PNG
for file_path in files:
    if not (file_path.endswith('.jpg') or file_path.endswith('.png')):
        if os.path.isfile(file_path):
            os.remove(file_path)
        elif os.path.isdir(file_path):
            print(f'{bcolors.BOLD}{bcolors.RED} Unexpected folder: \'{file_path}\' was found in training images folder path.  Check and remove it.{bcolors.ENDC}')

# force remove hidden .ipynb_checkpoints folder in images folder. 
if os.path.exists(f'{folder_path}/.ipynb_checkpoints'):
    shutil.rmtree(f'{folder_path}/.ipynb_checkpoints')

In [None]:
# 6. Configure Accelerate With defaults
!accelerate config default --mixed_precision "bf16"

In [None]:
# 6. START TRAINING

!accelerate launch --num_cpu_threads_per_process=2 "./sdxl_train.py" \
  --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0" \
  --train_data_dir="{training_root_dir}" \
  --reg_data_dir="{regularization_root_dir}" \
  --output_dir="{output_dir}" \
  --output_name="{project_name}" \
  --save_model_as="safetensors" \
  --train_batch_size=2 \
  --max_train_steps=2000 \
  --save_every_n_steps=500 \
  --optimizer_type="adafactor" \
  --optimizer_args scale_parameter=False relative_step=False warmup_init=False \
  --xformers \
  --cache_latents \
  --lr_scheduler="constant_with_warmup" \
  --lr_warmup_steps=100 \
  --learning_rate=1e-5 \
  --max_grad_norm=0.0 \
  --train_text_encoder \
  --resolution="1024,1024" \
  --save_precision="float" \
  --save_n_epoch_ratio=1 \
  --max_data_loader_n_workers=1 \
  --persistent_data_loader_workers \
  --mixed_precision="bf16" \
  --full_bf16 \
  --logging_dir="logs" \
  --log_prefix="last" \
  --gradient_checkpointing
 