From 715e1ac5e3f5db1c5db1df436a69c3ce433b492f Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Thu, 10 Aug 2023 21:04:47 +0800
Subject: [PATCH 01/12] update text-generation-webui support

---
 README.md    | 2 +-
 README_EN.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index c4a3fa6..0bbe317 100644
--- a/README.md
+++ b/README.md
@@ -131,7 +131,7 @@
 | [**🤗Transformers**](https://github.com/huggingface/transformers) | 原生transformers推理接口     |  ✅   |  ✅   |  ✅   |  ✅   |  ❌   |  ✅  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_zh) |
 | [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | 在Colab中启动交互界面 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) |
 | [**仿OpenAI API调用**](https://platform.openai.com/docs/api-reference) | 仿OpenAI API接口的服务器Demo |  ✅   |  ✅   |  ✅   |  ❌   |  ✅   |  ✅  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_zh) |
-| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 |  ✅   |  ✅   |  ✅   |  ✅   |  ❌   | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) |
+| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 |  ✅   |  ✅   |  ✅   |  ✅   |  ✅   | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) |
 | [**LangChain**](https://github.com/hwchase17/langchain) | 适合二次开发的大模型应用开源框架 |  ✅<sup>†</sup>  |  ✅   |  ✅<sup>†</sup>   |  ❌   |  ❌   | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_zh) |
 | [**privateGPT**](https://github.com/imartinez/privateGPT) | 基于LangChain的多文档本地问答框架 | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_zh) |
 
diff --git a/README_EN.md b/README_EN.md
index 14f2391..29dea92 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -125,7 +125,7 @@ The models in this project mainly support the following quantization, inference,
 | [**🤗Transformers**](https://github.com/huggingface/transformers) | Native transformers inference interface                 |  ✅   |  ✅   |   ✅   |  ✅   |  ❌   |  ✅  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_en) |
 | [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | Running a Gradio web demo in Colab | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) |
 | [**OpenAI API Calls**](https://platform.openai.com/docs/api-reference) | A server that implements OpenAI API |  ✅   |  ✅   |  ✅   |  ❌   |  ✅   |  ✅  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_en) |
-| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI |  ✅   |  ✅   |  ✅   |  ✅   |  ❌   | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) |
+| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI |  ✅   |  ✅   |  ✅   |  ✅   |  ✅   | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) |
 | [**LangChain**](https://github.com/hwchase17/langchain) | LLM application development framework, suitable for secondary development |  ✅<sup>†</sup>  |  ✅   |  ✅<sup>†</sup>   |  ❌   |  ❌   | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_en) |
 | [**privateGPT**](https://github.com/imartinez/privateGPT) | LangChain-based multi-document QA framework | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_en) |
 

From e9a29f0356f60848029ca2bd8e8f2d6ffa9fccc2 Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Thu, 17 Aug 2023 11:38:35 +0800
Subject: [PATCH 02/12] fix data cache_path

---
 scripts/training/build_dataset.py         | 2 +-
 scripts/training/run_clm_pt_with_peft.py  | 4 +++-
 scripts/training/run_clm_sft_with_peft.py | 4 +++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/scripts/training/build_dataset.py b/scripts/training/build_dataset.py
index 953a6d2..9fd1fdb 100644
--- a/scripts/training/build_dataset.py
+++ b/scripts/training/build_dataset.py
@@ -62,7 +62,7 @@ def tokenization(examples):
 
         if data_cache_dir is None:
             data_cache_dir = str(os.path.dirname(file))
-        cache_path = os.path.join(data_cache_dir,os.path.basename(file).split('.')[0])
+        cache_path = os.path.join(data_cache_dir,os.path.basename(file).split('.')[0]+f"_{max_seq_length}")
         os.makedirs(cache_path, exist_ok=True)
         try:
             processed_dataset = datasets.load_from_disk(cache_path)
diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py
index 96f403c..7c18d64 100644
--- a/scripts/training/run_clm_pt_with_peft.py
+++ b/scripts/training/run_clm_pt_with_peft.py
@@ -528,6 +528,7 @@ def group_texts(examples):
             if model_args.torch_dtype in ["auto", None]
             else getattr(torch, model_args.torch_dtype)
         )
+        device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
         model = LlamaForCausalLM.from_pretrained(
             model_args.model_name_or_path,
             from_tf=bool(".ckpt" in model_args.model_name_or_path),
@@ -536,7 +537,8 @@ def group_texts(examples):
             revision=model_args.model_revision,
             use_auth_token=True if model_args.use_auth_token else None,
             torch_dtype=torch_dtype,
-            low_cpu_mem_usage=True
+            low_cpu_mem_usage=True,
+            device_map=device_map
         )
     else:
         model = AutoModelForCausalLM.from_config(config)
diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py
index 4daf208..f0fbd62 100644
--- a/scripts/training/run_clm_sft_with_peft.py
+++ b/scripts/training/run_clm_sft_with_peft.py
@@ -337,6 +337,7 @@ def main():
             if model_args.torch_dtype in ["auto", None]
             else getattr(torch, model_args.torch_dtype)
         )
+        device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
         model = LlamaForCausalLM.from_pretrained(
             model_args.model_name_or_path,
             from_tf=bool(".ckpt" in model_args.model_name_or_path),
@@ -345,7 +346,8 @@ def main():
             revision=model_args.model_revision,
             use_auth_token=True if model_args.use_auth_token else None,
             torch_dtype=torch_dtype,
-            low_cpu_mem_usage=True
+            low_cpu_mem_usage=True,
+            device_map=device_map
         )
     else:
         model = AutoModelForCausalLM.from_config(config)

From fa7707b2ad1861ce83b1a0254104d30f9e81fa8c Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Thu, 17 Aug 2023 12:43:19 +0800
Subject: [PATCH 03/12] add device_map for training

---
 README.md                                 | 1 +
 README_EN.md                              | 1 +
 scripts/training/run_clm_pt_with_peft.py  | 2 +-
 scripts/training/run_clm_sft_with_peft.py | 3 +--
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 8f1adcb..a76f21c 100644
--- a/README.md
+++ b/README.md
@@ -249,6 +249,7 @@
 问题4：为什么不对模型做全量预训练而是用LoRA？
 问题5：二代模型支不支持某些支持一代LLaMA的工具？
 问题6：Chinese-Alpaca-2是Llama-2-Chat训练得到的吗？
+问题7：为什么24G显存微调chinese-alpaca-2-7b OOM？
 ```
 
 
diff --git a/README_EN.md b/README_EN.md
index 930faf8..4ac2c90 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -232,6 +232,7 @@ Question 3: Do you accept third-party Pull Requests?
 Question 4: Why not perform full pre-training but use LoRA instead?
 Question 5: Does Llama-2 series support tools that support the first-gen LLaMA?
 Question 6: Is Chinese-Alpaca-2 trained from Llama-2-Chat?
+Question 7: Why does training with 24GB VRAM lead to an OOM error when fine-tuning chinese-alpaca-2-7b?
 ```
 
 For specific questions and answers, please refer to the project >>> [📚 GitHub Wiki](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/faq_en)
diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py
index 7c18d64..68f9032 100644
--- a/scripts/training/run_clm_pt_with_peft.py
+++ b/scripts/training/run_clm_pt_with_peft.py
@@ -557,7 +557,7 @@ def group_texts(examples):
 
     if training_args.peft_path is not None:
         logger.info("Peft from pre-trained model")
-        model = PeftModel.from_pretrained(model, training_args.peft_path)
+        model = PeftModel.from_pretrained(model, training_args.peft_path, device_map=device_map)
     else:
         logger.info("Init new peft model")
         target_modules = training_args.trainable.split(',')
diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py
index f0fbd62..b6524fb 100644
--- a/scripts/training/run_clm_sft_with_peft.py
+++ b/scripts/training/run_clm_sft_with_peft.py
@@ -51,7 +51,6 @@
 from peft import LoraConfig, TaskType, get_peft_model, PeftModel, get_peft_model_state_dict
 from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
 
-IGNORE_INDEX = -100
 
 require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
 
@@ -363,7 +362,7 @@ def main():
 
     if training_args.peft_path is not None:
         logger.info("Peft from pre-trained model")
-        model = PeftModel.from_pretrained(model, training_args.peft_path)
+        model = PeftModel.from_pretrained(model, training_args.peft_path, device_map=device_map)
     else:
         logger.info("Init new peft model")
         target_modules = training_args.trainable.split(',')

From e3731fb67a35c10efc17c0b9d8b29bb45a15043c Mon Sep 17 00:00:00 2001
From: Xin Yao <35353688+iMountTai@users.noreply.github.com>
Date: Thu, 17 Aug 2023 13:01:56 +0800
Subject: [PATCH 04/12] Update README.md

---
 README.md | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/README.md b/README.md
index a76f21c..8444263 100644
--- a/README.md
+++ b/README.md
@@ -144,9 +144,6 @@
 | [**🤗Transformers**](https://github.com/huggingface/transformers) | 原生transformers推理接口     |  ✅   |  ✅   |  ✅   |  ✅   |  ❌   |  ✅  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_zh) |
 | [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | 在Colab中启动交互界面 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) |
 | [**仿OpenAI API调用**](https://platform.openai.com/docs/api-reference) | 仿OpenAI API接口的服务器Demo |  ✅   |  ✅   |  ✅   |  ❌   |  ✅   |  ✅  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_zh) |
-<<<<<<< HEAD
-| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 |  ✅   |  ✅   |  ✅   |  ✅   |  ✅   | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) |
-=======
 | [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 |  ✅   |  ✅   |  ✅   |  ✅   |  ✅<sup>†</sup>  | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) |
 >>>>>>> fced4ce77289f8104e7c434e70ad56540b854dcf
 | [**LangChain**](https://github.com/hwchase17/langchain) | 适合二次开发的大模型应用开源框架 |  ✅<sup>†</sup>  |  ✅   |  ✅<sup>†</sup>   |  ❌   |  ❌   | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_zh) |

From 94d00603842900d992efdec27f55f470d739bbf3 Mon Sep 17 00:00:00 2001
From: Xin Yao <35353688+iMountTai@users.noreply.github.com>
Date: Thu, 17 Aug 2023 13:02:37 +0800
Subject: [PATCH 05/12] Update README.md

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 8444263..7b7b083 100644
--- a/README.md
+++ b/README.md
@@ -145,7 +145,6 @@
 | [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | 在Colab中启动交互界面 | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) |
 | [**仿OpenAI API调用**](https://platform.openai.com/docs/api-reference) | 仿OpenAI API接口的服务器Demo |  ✅   |  ✅   |  ✅   |  ❌   |  ✅   |  ✅  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_zh) |
 | [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | 前端Web UI界面的部署方式 |  ✅   |  ✅   |  ✅   |  ✅   |  ✅<sup>†</sup>  | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_zh) |
->>>>>>> fced4ce77289f8104e7c434e70ad56540b854dcf
 | [**LangChain**](https://github.com/hwchase17/langchain) | 适合二次开发的大模型应用开源框架 |  ✅<sup>†</sup>  |  ✅   |  ✅<sup>†</sup>   |  ❌   |  ❌   | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_zh) |
 | [**privateGPT**](https://github.com/imartinez/privateGPT) | 基于LangChain的多文档本地问答框架 | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_zh) |
 

From 1d668b7484c0417c3799f95100edf89966bd423e Mon Sep 17 00:00:00 2001
From: Xin Yao <35353688+iMountTai@users.noreply.github.com>
Date: Thu, 17 Aug 2023 13:03:21 +0800
Subject: [PATCH 06/12] Update README_EN.md

---
 README_EN.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/README_EN.md b/README_EN.md
index 4ac2c90..51cee13 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -138,11 +138,7 @@ The models in this project mainly support the following quantization, inference,
 | [**🤗Transformers**](https://github.com/huggingface/transformers) | Native transformers inference interface                 |  ✅   |  ✅   |   ✅   |  ✅   |  ❌   |  ✅  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/inference_with_transformers_en) |
 | [**Colab Demo**](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) | Running a Gradio web demo in Colab | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | [link](https://colab.research.google.com/drive/1yu0eZ3a66by8Zqm883LLtRQrguBAb9MR?usp=sharing) |
 | [**OpenAI API Calls**](https://platform.openai.com/docs/api-reference) | A server that implements OpenAI API |  ✅   |  ✅   |  ✅   |  ❌   |  ✅   |  ✅  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/api_calls_en) |
-<<<<<<< HEAD
-| [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI |  ✅   |  ✅   |  ✅   |  ✅   |  ✅   | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) |
-=======
 | [**text-generation-webui**](https://github.com/oobabooga/text-generation-webui) | A tool for deploying model as a web UI |  ✅   |  ✅   |  ✅   |  ✅   | ✅<sup>†</sup> | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/text-generation-webui_en) |
->>>>>>> fced4ce77289f8104e7c434e70ad56540b854dcf
 | [**LangChain**](https://github.com/hwchase17/langchain) | LLM application development framework, suitable for secondary development |  ✅<sup>†</sup>  |  ✅   |  ✅<sup>†</sup>   |  ❌   |  ❌   | ❌  | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/langchain_en) |
 | [**privateGPT**](https://github.com/imartinez/privateGPT) | LangChain-based multi-document QA framework | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/privategpt_en) |
 

From 8ef6dd8dc6cd673d503cf5b4b75a90a4616ccca3 Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Thu, 17 Aug 2023 14:11:36 +0800
Subject: [PATCH 07/12] delete unused params

---
 scripts/training/run_clm_pt_with_peft.py  | 44 ++++++++++-------------
 scripts/training/run_clm_sft_with_peft.py | 41 ++++++++++-----------
 scripts/training/run_pt.sh                |  6 ++--
 scripts/training/run_sft.sh               |  4 +--
 4 files changed, 41 insertions(+), 54 deletions(-)

diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py
index 68f9032..3ca05c3 100644
--- a/scripts/training/run_clm_pt_with_peft.py
+++ b/scripts/training/run_clm_pt_with_peft.py
@@ -467,13 +467,13 @@ def group_texts(examples):
         for idx, file in enumerate(files):
             data_file = os.path.join(path, file)
             filename = ''.join(file.split(".")[:-1])
-            cache_path = os.path.join(data_args.data_cache_dir, filename)
+            cache_path = os.path.join(data_args.data_cache_dir, filename+f"_{block_size}")
             os.makedirs(cache_path, exist_ok=True)
             try:
                 processed_dataset = datasets.load_from_disk(cache_path, keep_in_memory=False)
                 logger.info(f'training datasets-{filename} has been loaded from disk')
             except Exception:
-                cache_dir = os.path.join(data_args.data_cache_dir, filename+"_text")
+                cache_dir = os.path.join(data_args.data_cache_dir, filename+f"_text_{block_size}")
                 os.makedirs(cache_dir, exist_ok=True)
                 raw_dataset = load_dataset("text", data_files=data_file, cache_dir=cache_dir, keep_in_memory=False)
                 logger.info(f"{file} has been loaded")
@@ -503,7 +503,6 @@ def group_texts(examples):
             else:
                 assert lm_datasets.features.type == processed_dataset["train"].features.type
                 lm_datasets = concatenate_datasets([lm_datasets, processed_dataset["train"]])
-
         lm_datasets = lm_datasets.train_test_split(test_size = data_args.validation_split_percentage)
 
     if training_args.do_train:
@@ -522,28 +521,23 @@ def group_texts(examples):
         logger.info(f"Num eval_samples  {len(eval_dataset)}")
         logger.info("Evaluation example:")
         logger.info(tokenizer.decode(eval_dataset[0]['input_ids']))
-    if model_args.model_name_or_path:
-        torch_dtype = (
-            model_args.torch_dtype
-            if model_args.torch_dtype in ["auto", None]
-            else getattr(torch, model_args.torch_dtype)
-        )
-        device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
-        model = LlamaForCausalLM.from_pretrained(
-            model_args.model_name_or_path,
-            from_tf=bool(".ckpt" in model_args.model_name_or_path),
-            config=config,
-            cache_dir=model_args.cache_dir,
-            revision=model_args.model_revision,
-            use_auth_token=True if model_args.use_auth_token else None,
-            torch_dtype=torch_dtype,
-            low_cpu_mem_usage=True,
-            device_map=device_map
-        )
-    else:
-        model = AutoModelForCausalLM.from_config(config)
-        n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
-        logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
+    torch_dtype = (
+        model_args.torch_dtype
+        if model_args.torch_dtype in ["auto", None]
+        else getattr(torch, model_args.torch_dtype)
+    )
+    device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
+    model = LlamaForCausalLM.from_pretrained(
+        model_args.model_name_or_path,
+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
+        config=config,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        torch_dtype=torch_dtype,
+        low_cpu_mem_usage=True,
+        device_map=device_map
+    )
 
     model_vocab_size = model.get_output_embeddings().weight.size(0)
     tokenizer_vocab_size = len(tokenizer)
diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py
index b6524fb..d71c411 100644
--- a/scripts/training/run_clm_sft_with_peft.py
+++ b/scripts/training/run_clm_sft_with_peft.py
@@ -294,7 +294,7 @@ def main():
             "You can do it from another script, save it, and load it from here, using --tokenizer_name."
         )
 
-    if (len(tokenizer))!=55296:
+    if (len(tokenizer)) != 55296:
         raise ValueError(f"The vocab size of the tokenizer should be 55296, but found {len(tokenizer)}.\n"
                          "Please use Chinese-LLaMA-2 tokenizer.")
 
@@ -330,28 +330,23 @@ def main():
         logger.info("Evaluation example:")
         logger.info(tokenizer.decode(eval_dataset[0]['input_ids']))
 
-    if model_args.model_name_or_path:
-        torch_dtype = (
-            model_args.torch_dtype
-            if model_args.torch_dtype in ["auto", None]
-            else getattr(torch, model_args.torch_dtype)
-        )
-        device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
-        model = LlamaForCausalLM.from_pretrained(
-            model_args.model_name_or_path,
-            from_tf=bool(".ckpt" in model_args.model_name_or_path),
-            config=config,
-            cache_dir=model_args.cache_dir,
-            revision=model_args.model_revision,
-            use_auth_token=True if model_args.use_auth_token else None,
-            torch_dtype=torch_dtype,
-            low_cpu_mem_usage=True,
-            device_map=device_map
-        )
-    else:
-        model = AutoModelForCausalLM.from_config(config)
-        n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
-        logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
+    torch_dtype = (
+        model_args.torch_dtype
+        if model_args.torch_dtype in ["auto", None]
+        else getattr(torch, model_args.torch_dtype)
+    )
+    device_map = {"":int(os.environ.get("LOCAL_RANK") or 0)}
+    model = LlamaForCausalLM.from_pretrained(
+        model_args.model_name_or_path,
+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
+        config=config,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        torch_dtype=torch_dtype,
+        low_cpu_mem_usage=True,
+        device_map=device_map
+    )
 
     model_vocab_size = model.get_input_embeddings().weight.shape[0]
     logger.info(f"Model vocab size: {model_vocab_size}")
diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh
index b409eac..56fc540 100644
--- a/scripts/training/run_pt.sh
+++ b/scripts/training/run_pt.sh
@@ -10,8 +10,8 @@ chinese_tokenizer_path=path/to/chinese/llama-2/tokenizer/dir
 dataset_dir=path/to/pt/data/dir
 data_cache=temp_data_cache_dir
 per_device_train_batch_size=1
-per_device_eval_batch_size=1
 gradient_accumulation_steps=8
+block_size=512
 output_dir=output_dir
 
 deepspeed_config_file=ds_zero2_no_offload.json
@@ -22,9 +22,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \
     --tokenizer_name_or_path ${chinese_tokenizer_path} \
     --dataset_dir ${dataset_dir} \
     --data_cache_dir ${data_cache} \
-    --validation_split_percentage 0.001 \
     --per_device_train_batch_size ${per_device_train_batch_size} \
-    --per_device_eval_batch_size ${per_device_eval_batch_size} \
     --do_train \
     --seed $RANDOM \
     --fp16 \
@@ -40,7 +38,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \
     --save_steps 200 \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --preprocessing_num_workers 8 \
-    --block_size 1024 \
+    --block_size ${block_size} \
     --output_dir ${output_dir} \
     --overwrite_output_dir \
     --ddp_timeout 30000 \
diff --git a/scripts/training/run_sft.sh b/scripts/training/run_sft.sh
index 0c31a8b..73a5ead 100644
--- a/scripts/training/run_sft.sh
+++ b/scripts/training/run_sft.sh
@@ -11,6 +11,7 @@ dataset_dir=path/to/sft/data/dir
 per_device_train_batch_size=1
 per_device_eval_batch_size=1
 gradient_accumulation_steps=8
+max_seq_length=512
 output_dir=output_dir
 peft_model=path/to/peft/model/dir
 validation_file=validation_file_name
@@ -22,7 +23,6 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \
     --model_name_or_path ${pretrained_model} \
     --tokenizer_name_or_path ${chinese_tokenizer_path} \
     --dataset_dir ${dataset_dir} \
-    --validation_split_percentage 0.001 \
     --per_device_train_batch_size ${per_device_train_batch_size} \
     --per_device_eval_batch_size ${per_device_eval_batch_size} \
     --do_train \
@@ -43,7 +43,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \
     --save_steps 200 \
     --gradient_accumulation_steps ${gradient_accumulation_steps} \
     --preprocessing_num_workers 8 \
-    --max_seq_length 1024 \
+    --max_seq_length ${max_seq_length} \
     --output_dir ${output_dir} \
     --overwrite_output_dir \
     --ddp_timeout 30000 \

From 796bb874f7f312b474aef7c54204ce1364e0a2a8 Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Thu, 17 Aug 2023 14:20:27 +0800
Subject: [PATCH 08/12] add use_cahce=false

---
 scripts/training/run_clm_pt_with_peft.py  | 1 +
 scripts/training/run_clm_sft_with_peft.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/scripts/training/run_clm_pt_with_peft.py b/scripts/training/run_clm_pt_with_peft.py
index 3ca05c3..cd36b7a 100644
--- a/scripts/training/run_clm_pt_with_peft.py
+++ b/scripts/training/run_clm_pt_with_peft.py
@@ -538,6 +538,7 @@ def group_texts(examples):
         low_cpu_mem_usage=True,
         device_map=device_map
     )
+    model.config.use_cache = False
 
     model_vocab_size = model.get_output_embeddings().weight.size(0)
     tokenizer_vocab_size = len(tokenizer)
diff --git a/scripts/training/run_clm_sft_with_peft.py b/scripts/training/run_clm_sft_with_peft.py
index d71c411..fea0879 100644
--- a/scripts/training/run_clm_sft_with_peft.py
+++ b/scripts/training/run_clm_sft_with_peft.py
@@ -347,6 +347,7 @@ def main():
         low_cpu_mem_usage=True,
         device_map=device_map
     )
+    model.config.use_cache = False
 
     model_vocab_size = model.get_input_embeddings().weight.shape[0]
     logger.info(f"Model vocab size: {model_vocab_size}")

From e16d943078a57838d3b3548b274515bafe7c5468 Mon Sep 17 00:00:00 2001
From: Xin Yao <35353688+iMountTai@users.noreply.github.com>
Date: Thu, 17 Aug 2023 14:45:48 +0800
Subject: [PATCH 09/12] Update run_pt.sh

---
 scripts/training/run_pt.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh
index 56fc540..40813a3 100644
--- a/scripts/training/run_pt.sh
+++ b/scripts/training/run_pt.sh
@@ -22,6 +22,7 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \
     --tokenizer_name_or_path ${chinese_tokenizer_path} \
     --dataset_dir ${dataset_dir} \
     --data_cache_dir ${data_cache} \
+    --validation_split_percentage 0.001 \
     --per_device_train_batch_size ${per_device_train_batch_size} \
     --do_train \
     --seed $RANDOM \

From 74b701015bf954c3d5ade6e819807054605e40e3 Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Fri, 18 Aug 2023 11:48:22 +0800
Subject: [PATCH 10/12] delete modules_to_save

---
 scripts/training/run_pt.sh  | 10 +++-------
 scripts/training/run_sft.sh | 12 +++---------
 2 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh
index 56fc540..7c4a7dd 100644
--- a/scripts/training/run_pt.sh
+++ b/scripts/training/run_pt.sh
@@ -2,11 +2,10 @@ lr=2e-4
 lora_rank=64
 lora_alpha=128
 lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj"
-modules_to_save="embed_tokens,lm_head"
 lora_dropout=0.05
 
-pretrained_model=path/to/hf/llama-2/dir
-chinese_tokenizer_path=path/to/chinese/llama-2/tokenizer/dir
+pretrained_model=path/to/hf/chinese-llama-2/dir
+chinese_tokenizer_path=path/to/chinese/chinese-llama-2/tokenizer/dir
 dataset_dir=path/to/pt/data/dir
 data_cache=temp_data_cache_dir
 per_device_train_batch_size=1
@@ -46,8 +45,5 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \
     --lora_rank ${lora_rank} \
     --lora_alpha ${lora_alpha} \
     --trainable ${lora_trainable} \
-    --modules_to_save ${modules_to_save} \
     --lora_dropout ${lora_dropout} \
-    --torch_dtype float16 \
-    --gradient_checkpointing \
-    --ddp_find_unused_parameters False
+    --torch_dtype float16 
diff --git a/scripts/training/run_sft.sh b/scripts/training/run_sft.sh
index 73a5ead..c180a42 100644
--- a/scripts/training/run_sft.sh
+++ b/scripts/training/run_sft.sh
@@ -2,18 +2,16 @@ lr=1e-4
 lora_rank=64
 lora_alpha=128
 lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj"
-modules_to_save="embed_tokens,lm_head"
 lora_dropout=0.05
 
-pretrained_model=path/to/hf/llama-2/or/merged/llama-2/dir/or/model_id
-chinese_tokenizer_path=path/to/chinese/llama-2/tokenizer/dir
+pretrained_model=path/to/hf/chinese-alpaca-2/dir/or/model_id
+chinese_tokenizer_path=path/to/chinese/chinese-alpaca-2/tokenizer/dir
 dataset_dir=path/to/sft/data/dir
 per_device_train_batch_size=1
 per_device_eval_batch_size=1
 gradient_accumulation_steps=8
 max_seq_length=512
 output_dir=output_dir
-peft_model=path/to/peft/model/dir
 validation_file=validation_file_name
 
 deepspeed_config_file=ds_zero2_no_offload.json
@@ -51,10 +49,6 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \
     --lora_rank ${lora_rank} \
     --lora_alpha ${lora_alpha} \
     --trainable ${lora_trainable} \
-    --modules_to_save ${modules_to_save} \
     --lora_dropout ${lora_dropout} \
     --torch_dtype float16 \
-    --validation_file ${validation_file} \
-    --peft_path ${peft_model} \
-    --gradient_checkpointing \
-    --ddp_find_unused_parameters False
+    --validation_file ${validation_file}

From d6ba233386d6c1b44d0b903c55cc4e7ffbb7c5ca Mon Sep 17 00:00:00 2001
From: iMountTai <2506700016@qq.com>
Date: Wed, 23 Aug 2023 00:37:57 +0800
Subject: [PATCH 11/12] add some suggestions fot training

---
 scripts/training/run_pt.sh  | 6 +++++-
 scripts/training/run_sft.sh | 8 ++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh
index 8852ea4..e103ab5 100644
--- a/scripts/training/run_pt.sh
+++ b/scripts/training/run_pt.sh
@@ -1,10 +1,13 @@
+# 运行脚本前请仔细阅读wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/pt_scripts_zh)
+# Read the wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/pt_scripts_zh) carefully before running the script
 lr=2e-4
 lora_rank=64
 lora_alpha=128
 lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj"
+modules_to_save="embed_tokens,lm_head"
 lora_dropout=0.05
 
-pretrained_model=path/to/hf/chinese-llama-2/dir
+pretrained_model=path/to/hf/llama-2/dir
 chinese_tokenizer_path=path/to/chinese/chinese-llama-2/tokenizer/dir
 dataset_dir=path/to/pt/data/dir
 data_cache=temp_data_cache_dir
@@ -47,4 +50,5 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_pt_with_peft.py \
     --lora_alpha ${lora_alpha} \
     --trainable ${lora_trainable} \
     --lora_dropout ${lora_dropout} \
+    --modules_to_save ${modules_to_save} \
     --torch_dtype float16 
diff --git a/scripts/training/run_sft.sh b/scripts/training/run_sft.sh
index c180a42..a74986d 100644
--- a/scripts/training/run_sft.sh
+++ b/scripts/training/run_sft.sh
@@ -1,11 +1,14 @@
+# 运行脚本前请仔细阅读wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/sft_scripts_zh)
+# Read the wiki(https://github.com/ymcui/Chinese-LLaMA-Alpaca-2/wiki/sft_scripts_zh) carefully before running the script
 lr=1e-4
 lora_rank=64
 lora_alpha=128
 lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj"
+modules_to_save="embed_tokens,lm_head"
 lora_dropout=0.05
 
-pretrained_model=path/to/hf/chinese-alpaca-2/dir/or/model_id
-chinese_tokenizer_path=path/to/chinese/chinese-alpaca-2/tokenizer/dir
+pretrained_model=path/to/hf/llama-2/or/chinese-llama-2/dir/or/model_id
+chinese_tokenizer_path=path/to/chinese-llama-2/tokenizer/dir
 dataset_dir=path/to/sft/data/dir
 per_device_train_batch_size=1
 per_device_eval_batch_size=1
@@ -50,5 +53,6 @@ torchrun --nnodes 1 --nproc_per_node 1 run_clm_sft_with_peft.py \
     --lora_alpha ${lora_alpha} \
     --trainable ${lora_trainable} \
     --lora_dropout ${lora_dropout} \
+    --modules_to_save ${modules_to_save} \
     --torch_dtype float16 \
     --validation_file ${validation_file}

From 7ff746c94f0adcc57e019a47c1cf554ace141182 Mon Sep 17 00:00:00 2001
From: Xin Yao <35353688+iMountTai@users.noreply.github.com>
Date: Wed, 23 Aug 2023 00:47:44 +0800
Subject: [PATCH 12/12] Update run_pt.sh

---
 scripts/training/run_pt.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/training/run_pt.sh b/scripts/training/run_pt.sh
index e103ab5..663a326 100644
--- a/scripts/training/run_pt.sh
+++ b/scripts/training/run_pt.sh
@@ -8,7 +8,7 @@ modules_to_save="embed_tokens,lm_head"
 lora_dropout=0.05
 
 pretrained_model=path/to/hf/llama-2/dir
-chinese_tokenizer_path=path/to/chinese/chinese-llama-2/tokenizer/dir
+chinese_tokenizer_path=path/to/chinese-llama-2/tokenizer/dir
 dataset_dir=path/to/pt/data/dir
 data_cache=temp_data_cache_dir
 per_device_train_batch_size=1