In [1]:
################################################################################
# This scripts follows the procedure of fine-tuning LLaVA

# Users are supposed to download the pre-trained LLaVA model from HuggingFace first
# These codes are conducted under the folder "LLaVA"
# The employed LLaVA version is LLaVA-1.5-7B

# Please ensure sufficient GPU storage to conduct the fine-tuning
# 6 RTX 4090 GPUs are utilized in our experiments
# If parallel training is used, please ensure the model is merged before evaluation
################################################################################

import os
os.chdir("LLaVA")

In [2]:
from llava.model.builder import load_pretrained_model
from llava.mm_utils import get_model_name_from_path
from llava.eval.run_llava import eval_model

model_path = "llava-v1.5-7b/"

tokenizer, model, image_processor, context_len = load_pretrained_model(
    model_path=model_path,
    model_base=None,
    model_name=get_model_name_from_path(model_path),
    offload_folder="llava_model"
)

[2024-07-11 14:31:33,278] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/root/miniconda3/compiler_compat/ld: cannot find -laio
collect2: error: ld returned 1 exit status




You are using a model of type llava to instantiate a model of type llava_llama. This is not supported for all configurations of models and can yield errors.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  return self.fget.__get__(instance, owner)()


In [4]:
# Assign paths to variables
DEEPSPEED_SCRIPT = "deepspeed llava/train/train_xformers.py"
DEEPSPEED_JSON = "./scripts/zero3.json"
MODEL_NAME = "llava-v1.5-7b"
DATA_PATH = "../similarity_train.json"
IMAGE_FOLDER = "../survey_pairs"
VISION_TOWER = "openai/clip-vit-large-patch14-336"
OUTPUT_DIR = "sim_llava-v1.5-7b-lora"

In [13]:
finetune_script = f'''
{DEEPSPEED_SCRIPT} \
    --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
    --deepspeed {DEEPSPEED_JSON} \
    --model_name_or_path {MODEL_NAME} \
    --version v1 \
    --data_path {DATA_PATH} \
    --image_folder {IMAGE_FOLDER} \
    --vision_tower {VISION_TOWER} \
    --mm_projector_type mlp2x_gelu \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --image_aspect_ratio pad \
    --group_by_modality_length True \
    --bf16 True \
    --output_dir {OUTPUT_DIR} \
    --num_train_epochs 2 \
    --per_device_train_batch_size 4 \
    --per_device_eval_batch_size 2 \
    --gradient_accumulation_steps 1 \
    --evaluation_strategy "no" \
    --save_strategy "steps" \
    --save_steps 50000 \
    --save_total_limit 1 \
    --learning_rate 2e-5 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 True \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True
'''

In [14]:
import torch

torch.cuda.empty_cache()

In [15]:
!{finetune_script}

[2024-07-11 14:44:55,516] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-07-11 14:44:57,705] [INFO] [runner.py:568:main] cmd = /root/miniconda3/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMSwgMiwgM119 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None llava/train/train_xformers.py --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 --deepspeed ./scripts/zero3.json --model_name_or_path llava-v1.5-7b --version v1 --data_path ../similarity_train.json --image_folder ../survey_pairs --vision_tower openai/clip-vit-large-patch14-336 --mm_projector_type mlp2x_gelu --mm_vision_select_layer -2 --mm_use_im_start_end False --mm_use_im_patch_token False --image_aspect_ratio pad --group_by_modality_length True --bf16 True --output_dir sim_llava-v1.5-7b-lora --num_train_epochs 2 --per_device_train_batch_size 4 --per_device_eval_batch_size 2 --gradient_accumulation_steps 1 