From 715e1ac5e3f5db1c5db1df436a69c3ce433b492f Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Thu, 10 Aug 2023 21:04:47 +0800
Subject: [PATCH 01/12] update text-generation-webui support
---
README.md | 2 +-
README_EN.md | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index c4a3fa6..0bbe317 100644
--- a/README.md
+++ b/README.md
@@ -131,7 +131,7 @@
| [**🤗Transformers**](https://github.com/huggingface/transformers) | 原生transformers推理接口 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_zh) |
| [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | 在Colab中启动交互界面 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) |
| [**仿OpenAI API调用**](https://platform.openai.com/docs/api-reference) | 仿OpenAI API接口的服务器Demo | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_zh) |
-| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) |
+| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) |
| [**LangChain**](https://github.com/hwchase17/langchain) | 适合二次开发的大模型应用开源框架 | ✅† | ✅ | ✅† | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_zh) |
| [**privateGPT**](https://github.com/imartinez/privateGPT) | 基于LangChain的多文档本地问答框架 | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_zh) |
diff --git a/README_EN.md b/README_EN.md
index 14f2391..29dea92 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -125,7 +125,7 @@ The models in this project mainly support the following quantization, inference,
| [**🤗Transformers**](https://github.com/huggingface/transformers) | Native transformers inference interface | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_en) |
| [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | Running a Gradio web demo in Colab | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) |
| [**OpenAI API Calls**](https://platform.openai.com/docs/api-reference) | A server that implements OpenAI API | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_en) |
-| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) |
+| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) |
| [**LangChain**](https://github.com/hwchase17/langchain) | LLM application development framework, suitable for secondary development | ✅† | ✅ | ✅† | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_en) |
| [**privateGPT**](https://github.com/imartinez/privateGPT) | LangChain-based multi-document QA framework | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_en) |
From e9a29f0356f60848029ca2bd8e8f2d6ffa9fccc2 Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Thu, 17 Aug 2023 11:38:35 +0800
Subject: [PATCH 02/12] fix data cache_path
---
scripts/training/build_dataset.py | 2 +-
scripts/training/run_clm_pt_with_peft.py | 4 +++-
scripts/training/run_clm_sft_with_peft.py | 4 +++-
3 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/scripts/training/build_dataset.py b/scripts/training/build_dataset.py
index 953a6d2..9fd1fdb 100644
--- a/scripts/training/build_dataset.py
+++ b/scripts/training/build_dataset.py
@@ -62,7 +62,7 @@ def tokenization(examples):
if data_cache_dir is None:
data_cache_dir = str(os.path.dirname(file))
- cache_path = os.path.join(data_cache_dir,os.path.basename(file).split('.')[0])
+ cache_path = os.path.join(data_cache_dir,os.path.basename(file).split('.')[0]+f"_{max_seq_length}")
os.makedirs(cache_path, exist_ok=True)
try:
processed_dataset = datasets.load_from_disk(cache_path)
diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py
index 96f403c..7c18d64 100644
--- a/scripts/training/run_clm_pt_with_peft.py
+++ b/scripts/training/run_clm_pt_with_peft.py
@@ -528,6 +528,7 @@ def group_texts(examples):
if model_args.torch_dtype in ["auto", None]
else getattr(torch, model_args.torch_dtype)
)
+ device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
model = LlamaForCausalLM.from_pretrained(
model_args.model_name_or_path,
from_tf=bool(".ckpt" in model_args.model_name_or_path),
@@ -536,7 +537,8 @@ def group_texts(examples):
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
torch_dtype=torch_dtype,
- low_cpu_mem_usage=True
+ low_cpu_mem_usage=True,
+ device_map=device_map
)
else:
model = AutoModelForCausalLM.from_config(config)
diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py
index 4daf208..f0fbd62 100644
--- a/scripts/training/run_clm_sft_with_peft.py
+++ b/scripts/training/run_clm_sft_with_peft.py
@@ -337,6 +337,7 @@ def main():
if model_args.torch_dtype in ["auto", None]
else getattr(torch, model_args.torch_dtype)
)
+ device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
model = LlamaForCausalLM.from_pretrained(
model_args.model_name_or_path,
from_tf=bool(".ckpt" in model_args.model_name_or_path),
@@ -345,7 +346,8 @@ def main():
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
torch_dtype=torch_dtype,
- low_cpu_mem_usage=True
+ low_cpu_mem_usage=True,
+ device_map=device_map
)
else:
model = AutoModelForCausalLM.from_config(config)
From fa7707b2ad1861ce83b1a0254104d30f9e81fa8c Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Thu, 17 Aug 2023 12:43:19 +0800
Subject: [PATCH 03/12] add device_map for training
---
README.md | 1 +
README_EN.md | 1 +
scripts/training/run_clm_pt_with_peft.py | 2 +-
scripts/training/run_clm_sft_with_peft.py | 3 +--
4 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 8f1adcb..a76f21c 100644
--- a/README.md
+++ b/README.md
@@ -249,6 +249,7 @@
问题4:为什么不对模型做全量预训练而是用LoRA?
问题5:二代模型支不支持某些支持一代LLaMA的工具?
问题6:Chinese-Alpaca-2是Llama-2-Chat训练得到的吗?
+问题7:为什么24G显存微调chinese-alpaca-2-7b OOM?
```
diff --git a/README_EN.md b/README_EN.md
index 930faf8..4ac2c90 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -232,6 +232,7 @@ Question 3: Do you accept third-party Pull Requests?
Question 4: Why not perform full pre-training but use LoRA instead?
Question 5: Does Llama-2 series support tools that support the first-gen LLaMA?
Question 6: Is Chinese-Alpaca-2 trained from Llama-2-Chat?
+Question 7: Why does training with 24GB VRAM lead to an OOM error when fine-tuning chinese-alpaca-2-7b?
```
For specific questions and answers, please refer to the project >>> [📚 GitHub Wiki](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/faq_en)
diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py
index 7c18d64..68f9032 100644
--- a/scripts/training/run_clm_pt_with_peft.py
+++ b/scripts/training/run_clm_pt_with_peft.py
@@ -557,7 +557,7 @@ def group_texts(examples):
if training_args.peft_path is not None:
logger.info("Peft from pre-trained model")
- model = PeftModel.from_pretrained(model, training_args.peft_path)
+ model = PeftModel.from_pretrained(model, training_args.peft_path, device_map=device_map)
else:
logger.info("Init new peft model")
target_modules = training_args.trainable.split(',')
diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py
index f0fbd62..b6524fb 100644
--- a/scripts/training/run_clm_sft_with_peft.py
+++ b/scripts/training/run_clm_sft_with_peft.py
@@ -51,7 +51,6 @@
from peft import LoraConfig, TaskType, get_peft_model, PeftModel, get_peft_model_state_dict
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
-IGNORE_INDEX = -100
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
@@ -363,7 +362,7 @@ def main():
if training_args.peft_path is not None:
logger.info("Peft from pre-trained model")
- model = PeftModel.from_pretrained(model, training_args.peft_path)
+ model = PeftModel.from_pretrained(model, training_args.peft_path, device_map=device_map)
else:
logger.info("Init new peft model")
target_modules = training_args.trainable.split(',')
From e3731fb67a35c10efc17c0b9d8b29bb45a15043c Mon Sep 17 00:00:00 2001
From: Xin Yao <35353688+iMountTai@users.noreply.github.com>
Date: Thu, 17 Aug 2023 13:01:56 +0800
Subject: [PATCH 04/12] Update README.md
---
README.md | 3 ---
1 file changed, 3 deletions(-)
diff --git a/README.md b/README.md
index a76f21c..8444263 100644
--- a/README.md
+++ b/README.md
@@ -144,9 +144,6 @@
| [**🤗Transformers**](https://github.com/huggingface/transformers) | 原生transformers推理接口 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_zh) |
| [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | 在Colab中启动交互界面 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) |
| [**仿OpenAI API调用**](https://platform.openai.com/docs/api-reference) | 仿OpenAI API接口的服务器Demo | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_zh) |
-<<<<<<< HEAD
-| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) |
-=======
| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 | ✅ | ✅ | ✅ | ✅ | ✅† | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) |
>>>>>>> fced4ce77289f8104e7c434e70ad56540b854dcf
| [**LangChain**](https://github.com/hwchase17/langchain) | 适合二次开发的大模型应用开源框架 | ✅† | ✅ | ✅† | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_zh) |
From 94d00603842900d992efdec27f55f470d739bbf3 Mon Sep 17 00:00:00 2001
From: Xin Yao <35353688+iMountTai@users.noreply.github.com>
Date: Thu, 17 Aug 2023 13:02:37 +0800
Subject: [PATCH 05/12] Update README.md
---
README.md | 1 -
1 file changed, 1 deletion(-)
diff --git a/README.md b/README.md
index 8444263..7b7b083 100644
--- a/README.md
+++ b/README.md
@@ -145,7 +145,6 @@
| [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | 在Colab中启动交互界面 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) |
| [**仿OpenAI API调用**](https://platform.openai.com/docs/api-reference) | 仿OpenAI API接口的服务器Demo | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_zh) |
| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 | ✅ | ✅ | ✅ | ✅ | ✅† | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) |
->>>>>>> fced4ce77289f8104e7c434e70ad56540b854dcf
| [**LangChain**](https://github.com/hwchase17/langchain) | 适合二次开发的大模型应用开源框架 | ✅† | ✅ | ✅† | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_zh) |
| [**privateGPT**](https://github.com/imartinez/privateGPT) | 基于LangChain的多文档本地问答框架 | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_zh) |
From 1d668b7484c0417c3799f95100edf89966bd423e Mon Sep 17 00:00:00 2001
From: Xin Yao <35353688+iMountTai@users.noreply.github.com>
Date: Thu, 17 Aug 2023 13:03:21 +0800
Subject: [PATCH 06/12] Update README_EN.md
---
README_EN.md | 4 ----
1 file changed, 4 deletions(-)
diff --git a/README_EN.md b/README_EN.md
index 4ac2c90..51cee13 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -138,11 +138,7 @@ The models in this project mainly support the following quantization, inference,
| [**🤗Transformers**](https://github.com/huggingface/transformers) | Native transformers inference interface | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_en) |
| [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | Running a Gradio web demo in Colab | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) |
| [**OpenAI API Calls**](https://platform.openai.com/docs/api-reference) | A server that implements OpenAI API | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_en) |
-<<<<<<< HEAD
-| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) |
-=======
| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI | ✅ | ✅ | ✅ | ✅ | ✅† | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) |
->>>>>>> fced4ce77289f8104e7c434e70ad56540b854dcf
| [**LangChain**](https://github.com/hwchase17/langchain) | LLM application development framework, suitable for secondary development | ✅† | ✅ | ✅† | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_en) |
| [**privateGPT**](https://github.com/imartinez/privateGPT) | LangChain-based multi-document QA framework | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_en) |
From 8ef6dd8dc6cd673d503cf5b4b75a90a4616ccca3 Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Thu, 17 Aug 2023 14:11:36 +0800
Subject: [PATCH 07/12] delete unused params
---
scripts/training/run_clm_pt_with_peft.py | 44 ++++++++++-------------
scripts/training/run_clm_sft_with_peft.py | 41 ++++++++++-----------
scripts/training/run_pt.sh | 6 ++--
scripts/training/run_sft.sh | 4 +--
4 files changed, 41 insertions(+), 54 deletions(-)
diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py
index 68f9032..3ca05c3 100644
--- a/scripts/training/run_clm_pt_with_peft.py
+++ b/scripts/training/run_clm_pt_with_peft.py
@@ -467,13 +467,13 @@ def group_texts(examples):
for idx, file in enumerate(files):
data_file = os.path.join(path, file)
filename = ''.join(file.split(".")[:-1])
- cache_path = os.path.join(data_args.data_cache_dir, filename)
+ cache_path = os.path.join(data_args.data_cache_dir, filename+f"_{block_size}")
os.makedirs(cache_path, exist_ok=True)
try:
processed_dataset = datasets.load_from_disk(cache_path, keep_in_memory=False)
logger.info(f'training datasets-{filename} has been loaded from disk')
except Exception:
- cache_dir = os.path.join(data_args.data_cache_dir, filename+"_text")
+ cache_dir = os.path.join(data_args.data_cache_dir, filename+f"_text_{block_size}")
os.makedirs(cache_dir, exist_ok=True)
raw_dataset = load_dataset("text", data_files=data_file, cache_dir=cache_dir, keep_in_memory=False)
logger.info(f"{file} has been loaded")
@@ -503,7 +503,6 @@ def group_texts(examples):
else:
assert lm_datasets.features.type == processed_dataset["train"].features.type
lm_datasets = concatenate_datasets([lm_datasets, processed_dataset["train"]])
-
lm_datasets = lm_datasets.train_test_split(test_size = data_args.validation_split_percentage)
if training_args.do_train:
@@ -522,28 +521,23 @@ def group_texts(examples):
logger.info(f"Num eval_samples {len(eval_dataset)}")
logger.info("Evaluation example:")
logger.info(tokenizer.decode(eval_dataset[0]['input_ids']))
- if model_args.model_name_or_path:
- torch_dtype = (
- model_args.torch_dtype
- if model_args.torch_dtype in ["auto", None]
- else getattr(torch, model_args.torch_dtype)
- )
- device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
- model = LlamaForCausalLM.from_pretrained(
- model_args.model_name_or_path,
- from_tf=bool(".ckpt" in model_args.model_name_or_path),
- config=config,
- cache_dir=model_args.cache_dir,
- revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
- torch_dtype=torch_dtype,
- low_cpu_mem_usage=True,
- device_map=device_map
- )
- else:
- model = AutoModelForCausalLM.from_config(config)
- n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
- logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
+ torch_dtype = (
+ model_args.torch_dtype
+ if model_args.torch_dtype in ["auto", None]
+ else getattr(torch, model_args.torch_dtype)
+ )
+ device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
+ model = LlamaForCausalLM.from_pretrained(
+ model_args.model_name_or_path,
+ from_tf=bool(".ckpt" in model_args.model_name_or_path),
+ config=config,
+ cache_dir=model_args.cache_dir,
+ revision=model_args.model_revision,
+ use_auth_token=True if model_args.use_auth_token else None,
+ torch_dtype=torch_dtype,
+ low_cpu_mem_usage=True,
+ device_map=device_map
+ )
model_vocab_size = model.get_output_embeddings().weight.size(0)
tokenizer_vocab_size = len(tokenizer)
diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py
index b6524fb..d71c411 100644
--- a/scripts/training/run_clm_sft_with_peft.py
+++ b/scripts/training/run_clm_sft_with_peft.py
@@ -294,7 +294,7 @@ def main():
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
- if (len(tokenizer))!=55296:
+ if (len(tokenizer)) != 55296:
raise ValueError(f"The vocab size of the tokenizer should be 55296, but found {len(tokenizer)}.\n"
"Please use Chinese-LLaMA-2 tokenizer.")
@@ -330,28 +330,23 @@ def main():
logger.info("Evaluation example:")
logger.info(tokenizer.decode(eval_dataset[0]['input_ids']))
- if model_args.model_name_or_path:
- torch_dtype = (
- model_args.torch_dtype
- if model_args.torch_dtype in ["auto", None]
- else getattr(torch, model_args.torch_dtype)
- )
- device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
- model = LlamaForCausalLM.from_pretrained(
- model_args.model_name_or_path,
- from_tf=bool(".ckpt" in model_args.model_name_or_path),
- config=config,
- cache_dir=model_args.cache_dir,
- revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
- torch_dtype=torch_dtype,
- low_cpu_mem_usage=True,
- device_map=device_map
- )
- else:
- model = AutoModelForCausalLM.from_config(config)
- n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
- logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
+ torch_dtype = (
+ model_args.torch_dtype
+ if model_args.torch_dtype in ["auto", None]
+ else getattr(torch, model_args.torch_dtype)
+ )
+ device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
+ model = LlamaForCausalLM.from_pretrained(
+ model_args.model_name_or_path,
+ from_tf=bool(".ckpt" in model_args.model_name_or_path),
+ config=config,
+ cache_dir=model_args.cache_dir,
+ revision=model_args.model_revision,
+ use_auth_token=True if model_args.use_auth_token else None,
+ torch_dtype=torch_dtype,
+ low_cpu_mem_usage=True,
+ device_map=device_map
+ )
model_vocab_size = model.get_input_embeddings().weight.shape[0]
logger.info(f"Model vocab size: {model_vocab_size}")
diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh
index b409eac..56fc540 100644
--- a/scripts/training/run_pt.sh
+++ b/scripts/training/run_pt.sh
@@ -10,8 +10,8 @@ chinese_tokenizer_path=path/to/chinese/llama-2/tokenizer/dir
dataset_dir=path/to/pt/data/dir
data_cache=temp_data_cache_dir
per_device_train_batch_size=1
-per_device_eval_batch_size=1
gradient_accumulation_steps=8
+block_size=512
output_dir=output_dir
deepspeed_config_file=ds_zero2_no_offload.json
@@ -22,9 +22,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \
--tokenizer_name_or_path ${chinese_tokenizer_path} \
--dataset_dir ${dataset_dir} \
--data_cache_dir ${data_cache} \
- --validation_split_percentage 0.001 \
--per_device_train_batch_size ${per_device_train_batch_size} \
- --per_device_eval_batch_size ${per_device_eval_batch_size} \
--do_train \
--seed $RANDOM \
--fp16 \
@@ -40,7 +38,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \
--save_steps 200 \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--preprocessing_num_workers 8 \
- --block_size 1024 \
+ --block_size ${block_size} \
--output_dir ${output_dir} \
--overwrite_output_dir \
--ddp_timeout 30000 \
diff --git a/scripts/training/run_sft.sh b/scripts/training/run_sft.sh
index 0c31a8b..73a5ead 100644
--- a/scripts/training/run_sft.sh
+++ b/scripts/training/run_sft.sh
@@ -11,6 +11,7 @@ dataset_dir=path/to/sft/data/dir
per_device_train_batch_size=1
per_device_eval_batch_size=1
gradient_accumulation_steps=8
+max_seq_length=512
output_dir=output_dir
peft_model=path/to/peft/model/dir
validation_file=validation_file_name
@@ -22,7 +23,6 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \
--model_name_or_path ${pretrained_model} \
--tokenizer_name_or_path ${chinese_tokenizer_path} \
--dataset_dir ${dataset_dir} \
- --validation_split_percentage 0.001 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \
--do_train \
@@ -43,7 +43,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \
--save_steps 200 \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--preprocessing_num_workers 8 \
- --max_seq_length 1024 \
+ --max_seq_length ${max_seq_length} \
--output_dir ${output_dir} \
--overwrite_output_dir \
--ddp_timeout 30000 \
From 796bb874f7f312b474aef7c54204ce1364e0a2a8 Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Thu, 17 Aug 2023 14:20:27 +0800
Subject: [PATCH 08/12] add use_cahce=false
---
scripts/training/run_clm_pt_with_peft.py | 1 +
scripts/training/run_clm_sft_with_peft.py | 1 +
2 files changed, 2 insertions(+)
diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py
index 3ca05c3..cd36b7a 100644
--- a/scripts/training/run_clm_pt_with_peft.py
+++ b/scripts/training/run_clm_pt_with_peft.py
@@ -538,6 +538,7 @@ def group_texts(examples):
low_cpu_mem_usage=True,
device_map=device_map
)
+ model.config.use_cache = False
model_vocab_size = model.get_output_embeddings().weight.size(0)
tokenizer_vocab_size = len(tokenizer)
diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py
index d71c411..fea0879 100644
--- a/scripts/training/run_clm_sft_with_peft.py
+++ b/scripts/training/run_clm_sft_with_peft.py
@@ -347,6 +347,7 @@ def main():
low_cpu_mem_usage=True,
device_map=device_map
)
+ model.config.use_cache = False
model_vocab_size = model.get_input_embeddings().weight.shape[0]
logger.info(f"Model vocab size: {model_vocab_size}")
From e16d943078a57838d3b3548b274515bafe7c5468 Mon Sep 17 00:00:00 2001
From: Xin Yao <35353688+iMountTai@users.noreply.github.com>
Date: Thu, 17 Aug 2023 14:45:48 +0800
Subject: [PATCH 09/12] Update run_pt.sh
---
scripts/training/run_pt.sh | 1 +
1 file changed, 1 insertion(+)
diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh
index 56fc540..40813a3 100644
--- a/scripts/training/run_pt.sh
+++ b/scripts/training/run_pt.sh
@@ -22,6 +22,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \
--tokenizer_name_or_path ${chinese_tokenizer_path} \
--dataset_dir ${dataset_dir} \
--data_cache_dir ${data_cache} \
+ --validation_split_percentage 0.001 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--do_train \
--seed $RANDOM \
From 74b701015bf954c3d5ade6e819807054605e40e3 Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Fri, 18 Aug 2023 11:48:22 +0800
Subject: [PATCH 10/12] delete modules_to_save
---
scripts/training/run_pt.sh | 10 +++-------
scripts/training/run_sft.sh | 12 +++---------
2 files changed, 6 insertions(+), 16 deletions(-)
diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh
index 56fc540..7c4a7dd 100644
--- a/scripts/training/run_pt.sh
+++ b/scripts/training/run_pt.sh
@@ -2,11 +2,10 @@ lr=2e-4
lora_rank=64
lora_alpha=128
lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj"
-modules_to_save="embed_tokens,lm_head"
lora_dropout=0.05
-pretrained_model=path/to/hf/llama-2/dir
-chinese_tokenizer_path=path/to/chinese/llama-2/tokenizer/dir
+pretrained_model=path/to/hf/chinese-llama-2/dir
+chinese_tokenizer_path=path/to/chinese/chinese-llama-2/tokenizer/dir
dataset_dir=path/to/pt/data/dir
data_cache=temp_data_cache_dir
per_device_train_batch_size=1
@@ -46,8 +45,5 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \
--lora_rank ${lora_rank} \
--lora_alpha ${lora_alpha} \
--trainable ${lora_trainable} \
- --modules_to_save ${modules_to_save} \
--lora_dropout ${lora_dropout} \
- --torch_dtype float16 \
- --gradient_checkpointing \
- --ddp_find_unused_parameters False
+ --torch_dtype float16
diff --git a/scripts/training/run_sft.sh b/scripts/training/run_sft.sh
index 73a5ead..c180a42 100644
--- a/scripts/training/run_sft.sh
+++ b/scripts/training/run_sft.sh
@@ -2,18 +2,16 @@ lr=1e-4
lora_rank=64
lora_alpha=128
lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj"
-modules_to_save="embed_tokens,lm_head"
lora_dropout=0.05
-pretrained_model=path/to/hf/llama-2/or/merged/llama-2/dir/or/model_id
-chinese_tokenizer_path=path/to/chinese/llama-2/tokenizer/dir
+pretrained_model=path/to/hf/chinese-alpaca-2/dir/or/model_id
+chinese_tokenizer_path=path/to/chinese/chinese-alpaca-2/tokenizer/dir
dataset_dir=path/to/sft/data/dir
per_device_train_batch_size=1
per_device_eval_batch_size=1
gradient_accumulation_steps=8
max_seq_length=512
output_dir=output_dir
-peft_model=path/to/peft/model/dir
validation_file=validation_file_name
deepspeed_config_file=ds_zero2_no_offload.json
@@ -51,10 +49,6 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \
--lora_rank ${lora_rank} \
--lora_alpha ${lora_alpha} \
--trainable ${lora_trainable} \
- --modules_to_save ${modules_to_save} \
--lora_dropout ${lora_dropout} \
--torch_dtype float16 \
- --validation_file ${validation_file} \
- --peft_path ${peft_model} \
- --gradient_checkpointing \
- --ddp_find_unused_parameters False
+ --validation_file ${validation_file}
From d6ba233386d6c1b44d0b903c55cc4e7ffbb7c5ca Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Wed, 23 Aug 2023 00:37:57 +0800
Subject: [PATCH 11/12] add some suggestions fot training
---
scripts/training/run_pt.sh | 6 +++++-
scripts/training/run_sft.sh | 8 ++++++--
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh
index 8852ea4..e103ab5 100644
--- a/scripts/training/run_pt.sh
+++ b/scripts/training/run_pt.sh
@@ -1,10 +1,13 @@
+# 运行脚本前请仔细阅读wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/pt_scripts_zh)
+# Read the wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/pt_scripts_zh) carefully before running the script
lr=2e-4
lora_rank=64
lora_alpha=128
lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj"
+modules_to_save="embed_tokens,lm_head"
lora_dropout=0.05
-pretrained_model=path/to/hf/chinese-llama-2/dir
+pretrained_model=path/to/hf/llama-2/dir
chinese_tokenizer_path=path/to/chinese/chinese-llama-2/tokenizer/dir
dataset_dir=path/to/pt/data/dir
data_cache=temp_data_cache_dir
@@ -47,4 +50,5 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \
--lora_alpha ${lora_alpha} \
--trainable ${lora_trainable} \
--lora_dropout ${lora_dropout} \
+ --modules_to_save ${modules_to_save} \
--torch_dtype float16
diff --git a/scripts/training/run_sft.sh b/scripts/training/run_sft.sh
index c180a42..a74986d 100644
--- a/scripts/training/run_sft.sh
+++ b/scripts/training/run_sft.sh
@@ -1,11 +1,14 @@
+# 运行脚本前请仔细阅读wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/sft_scripts_zh)
+# Read the wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/sft_scripts_zh) carefully before running the script
lr=1e-4
lora_rank=64
lora_alpha=128
lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj"
+modules_to_save="embed_tokens,lm_head"
lora_dropout=0.05
-pretrained_model=path/to/hf/chinese-alpaca-2/dir/or/model_id
-chinese_tokenizer_path=path/to/chinese/chinese-alpaca-2/tokenizer/dir
+pretrained_model=path/to/hf/llama-2/or/chinese-llama-2/dir/or/model_id
+chinese_tokenizer_path=path/to/chinese-llama-2/tokenizer/dir
dataset_dir=path/to/sft/data/dir
per_device_train_batch_size=1
per_device_eval_batch_size=1
@@ -50,5 +53,6 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \
--lora_alpha ${lora_alpha} \
--trainable ${lora_trainable} \
--lora_dropout ${lora_dropout} \
+ --modules_to_save ${modules_to_save} \
--torch_dtype float16 \
--validation_file ${validation_file}
From 7ff746c94f0adcc57e019a47c1cf554ace141182 Mon Sep 17 00:00:00 2001
From: Xin Yao <35353688+iMountTai@users.noreply.github.com>
Date: Wed, 23 Aug 2023 00:47:44 +0800
Subject: [PATCH 12/12] Update run_pt.sh
---
scripts/training/run_pt.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh
index e103ab5..663a326 100644
--- a/scripts/training/run_pt.sh
+++ b/scripts/training/run_pt.sh
@@ -8,7 +8,7 @@ modules_to_save="embed_tokens,lm_head"
lora_dropout=0.05
pretrained_model=path/to/hf/llama-2/dir
-chinese_tokenizer_path=path/to/chinese/chinese-llama-2/tokenizer/dir
+chinese_tokenizer_path=path/to/chinese-llama-2/tokenizer/dir
dataset_dir=path/to/pt/data/dir
data_cache=temp_data_cache_dir
per_device_train_batch_size=1