From bdc362f27a1f8932a12fe5e16b20f0ff3c20990d Mon Sep 17 00:00:00 2001
From: Chirag Jain <jain.chirag925@gmail.com>
Date: Mon, 29 Apr 2024 11:31:21 +0000
Subject: [PATCH] Fix memory leaks, model merging on gpus, missing pad tokens

---
 Dockerfile          | 11 +++++------
 Dockerfile-notebook |  7 +++----
 config-base.yaml    |  9 ++++++---
 finetune.ipynb      |  4 ++--
 mlfoundry_utils.py  |  2 +-
 requirements.txt    |  5 +++--
 train.py            |  9 +++++++--
 7 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 262a2f9..12e98ca 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
-# https://hub.docker.com/layers/winglian/axolotl/main-py3.11-cu121-2.1.2/images/sha256-a794e3d8562d3a9a40296726671480c45951cd6e0ad6e8f359e47e75ccbe22ab?context=explore
-FROM --platform=linux/amd64 winglian/axolotl@sha256:dc46cae262116297d23f2b445deda3d4b9759b7da5b318315665036a0e2c7140
+# https://hub.docker.com/layers/winglian/axolotl/main-20240423-py3.11-cu121-2.2.1/images/sha256-fc2b9d2b1e46d6b7c47c28a65d2c1d2c3ae4f032fafef27ffaf6ec63bf442f44?context=explore
+FROM --platform=linux/amd64 winglian/axolotl@sha256:e0b5b8a94934aaf183932c66ab3ce3ad822e91e19341ade8dbf9eccd9339d799
 USER root
 COPY requirements.txt /tmp/
 RUN pip install -U pip wheel setuptools && \
@@ -7,12 +7,11 @@ RUN pip install -U pip wheel setuptools && \
     pip install --no-cache-dir -U -r /tmp/requirements.txt
 RUN mkdir -p /packages && \
     cd /packages && \
-    git clone https://github.com/OpenAccess-AI-Collective/axolotl && \
+    git clone https://github.com/truefoundry/axolotl && \
     cd axolotl/ && \
-    git checkout 40a88e8c4a2f32b63df0fe2079f7acfe73329273
+    git checkout 7ac62f5fa6b3df526a7d0fed7c711faa20df12b0
 RUN cd /packages/axolotl/ && \
-    MAX_JOBS=1 NVCC_APPEND_FLAGS="--verbose --threads 1" pip install -v -U --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm,fused-dense-lib] && \
-    pip uninstall -y mlflow tfy-mlflow-client && \
+    MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install -U --no-build-isolation -e .[flash-attn,mamba-ssm,fused-dense-lib] && \
     pip install --no-cache-dir -U -r /tmp/requirements.txt && \
     rm -rf /root/.cache/pip
 WORKDIR /app
diff --git a/Dockerfile-notebook b/Dockerfile-notebook
index 49ce7ef..008c792 100644
--- a/Dockerfile-notebook
+++ b/Dockerfile-notebook
@@ -19,12 +19,11 @@ RUN mkdir -p /packages && \
     chown -R jovyan:users /packages
 USER jovyan
 RUN cd /packages && \
-    git clone https://github.com/OpenAccess-AI-Collective/axolotl && \
+    git clone https://github.com/truefoundry/axolotl && \
     cd axolotl/ && \
-    git checkout 40a88e8c4a2f32b63df0fe2079f7acfe73329273
+    git checkout 7ac62f5fa6b3df526a7d0fed7c711faa20df12b0
 RUN cd /packages/axolotl/ && \
-    MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install -v -U --no-build-isolation -e .[deepspeed,flash-attn,mamba-ssm,fused-dense-lib] && \
-    pip uninstall -y mlflow tfy-mlflow-client && \
+    MAX_JOBS=1 NVCC_APPEND_FLAGS="--threads 1" pip install -U --no-build-isolation -e .[flash-attn,mamba-ssm,fused-dense-lib] && \
     pip install --no-cache-dir -U -r /tmp/llm-finetune/notebook-requirements.txt
 COPY --chown=jovyan:users . /tmp_home/jovyan/llm-finetune/
 ENV JUPYTER_APP_LAUNCHER_PATH=/home/jovyan/llm-finetune/.jp_app_launcher/
diff --git a/config-base.yaml b/config-base.yaml
index 753ecb1..9f0538c 100644
--- a/config-base.yaml
+++ b/config-base.yaml
@@ -54,6 +54,7 @@ dataset_prepared_path: ./outputs/data/last_run_prepared
 ddp_timeout: 21600
 deepspeed: ./deepspeed_configs/3_ds_z2_config.json
 default_system_message: You are a helpful assistant. Please give a long and detailed answer.
+device_map: null
 early_stopping_patience: 10
 eval_sample_packing: False
 eval_steps: 0.1
@@ -62,7 +63,7 @@ flash_attention: True
 flash_attn_cross_entropy: True
 flash_attn_rms_norm: True
 gradient_accumulation_steps: 4
-gradient_checkpointing: True
+gradient_checkpointing: unsloth
 gradient_checkpointing_kwargs:
   use_reentrant: True
 learning_rate: 0.00001
@@ -73,15 +74,16 @@ lora_alpha: 64
 lora_dropout: 0.05
 lora_on_cpu: False
 lora_r: 32
-lora_target_modules: null
 lora_target_linear: True
+lora_target_modules: null
+low_cpu_mem_usage: True
 lr_scheduler: cosine
 max_grad_norm: 1.0
-type_of_model: AutoModelForCausalLM
 num_epochs: 10
 optimizer: adamw_torch_fused
 output_dir: ./outputs
 pad_to_sequence_len: True
+remove_unused_columns: True
 report_to: tensorboard
 resize_token_embeddings_to_32x: False
 sample_packing: True
@@ -94,6 +96,7 @@ strict: False
 tokenizer_type: AutoTokenizer
 train_on_inputs: False
 trust_remote_code: True
+type_of_model: AutoModelForCausalLM
 warmup_ratio: 0.1
 weight_decay: 0.01
 ## Added by TrueFoundry, not native to Axolotl
diff --git a/finetune.ipynb b/finetune.ipynb
index f769633..f2bc361 100644
--- a/finetune.ipynb
+++ b/finetune.ipynb
@@ -28,7 +28,7 @@
     "# This should point to your Truefoundry platform endpoint\n",
     "TRUEFOUNDRY_HOST = os.getenv(\"TFY_HOST\", \"https://<your-org>.truefoundry.cloud\")\n",
     "\n",
-    "import mlfoundry\n",
+    "from truefoundry import ml as mlfoundry\n",
     "mlfoundry.login(TRUEFOUNDRY_HOST)"
    ]
   },
@@ -435,7 +435,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,
diff --git a/mlfoundry_utils.py b/mlfoundry_utils.py
index 0ab7eaa..88c6f76 100644
--- a/mlfoundry_utils.py
+++ b/mlfoundry_utils.py
@@ -8,11 +8,11 @@
 import string
 from typing import Any, Dict, Optional
 
-import mlfoundry
 import numpy as np
 from huggingface_hub import scan_cache_dir
 from transformers import TrainerCallback
 from transformers.integrations import rewrite_logs
+from truefoundry import ml as mlfoundry
 
 logger = logging.getLogger("axolotl")
 
diff --git a/requirements.txt b/requirements.txt
index c109e2f..e4f2a14 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 --extra-index-url https://download.pytorch.org/whl/cu121
-torch==2.1.2+cu121
+torch==2.2.1+cu121
 cloud-files==4.15.2
-mlfoundry==0.10.8rc2
+truefoundry[ml]==0.1.2
 snowflake-connector-python[pandas]==3.7.0
 pyarrow==15.0.0
+deepspeed @ git+https://github.com/truefoundry/DeepSpeed@0866580c316963ddda30ffee44de2c3e21129556
diff --git a/train.py b/train.py
index adb9bfa..1a4f34b 100644
--- a/train.py
+++ b/train.py
@@ -14,6 +14,7 @@
 from axolotl.cli.train import do_cli as axolotl_train_cli
 from axolotl.utils.dict import DictDefault
 from axolotl.utils.distributed import barrier, is_main_process, zero_first
+from axolotl.utils.models import load_tokenizer
 from transformers.utils import is_torch_bf16_gpu_available, is_torch_tf32_available
 
 from checkpoint_utils import cleanup_checkpoints, get_last_checkpoint_for_resume_if_any
@@ -83,6 +84,8 @@ def make_axolotl_config(config_base, kwargs, timestamp=None):
     axolotl_config = os.path.join(cfg.output_dir, "axolotl_config.yaml")
 
     if is_main_process():
+        set_cfg_option_if_auto(cfg, "tokenizer_config", cfg.base_model_config or cfg.base_model)
+
         os.makedirs(cfg.data_dir, exist_ok=True)
         os.makedirs(cfg.output_dir, exist_ok=True)
 
@@ -169,8 +172,10 @@ def make_axolotl_config(config_base, kwargs, timestamp=None):
         # Problem is axolotl tries fixing/adding some tokens by its own.
         # We don't want to override those decisions without understanding the consequences
         set_cfg_option_if_auto(cfg, "special_tokens", {})
+        tokenizer = load_tokenizer(cfg=cfg)
+        if not tokenizer.pad_token:
+            cfg["special_tokens"]["pad_token"] = tokenizer.eos_token
         set_cfg_option_if_auto(cfg, "lora_modules_to_save", [])
-
         logger.info(f"Prepared config: {cfg}")
         # This hack is needed because yaml dump refuses to tread DictDefault as dict
         yaml.add_representer(
@@ -207,7 +212,7 @@ def train_with_truefoundry(config_base: Path = Path("examples/"), **kwargs):
         model_dir = cfg.output_dir
         cleanup_checkpoints(output_dir=cfg.output_dir)
         if cfg.adapter in {"lora", "qlora"}:
-            axolotl_merge_lora_cli(config=axolotl_config)
+            axolotl_merge_lora_cli(config=axolotl_config, deepspeed=None, fsdp=None, device_map="auto")
             model_dir = os.path.join(model_dir, "merged")
             model_parent_dir = os.path.dirname(model_dir)
             # Copy tensorboard logs