From 66cf6d5cb57d40f3efaa8799cffad031fc7933d3 Mon Sep 17 00:00:00 2001 From: Phodal Huang Date: Sun, 31 Dec 2023 10:51:16 +0800 Subject: [PATCH] docs: update for samples --- README.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 73d0bcc..f7e7f71 100644 --- a/README.md +++ b/README.md @@ -851,7 +851,7 @@ if __name__ == "__main__": ![Finetune Model Choice](images/finetune-model-choice.jpg) -#### 数据集信息 +#### 数据集示例 1 由 Unit Eval + OSS Instruct 数据集构建而来: @@ -863,6 +863,21 @@ if __name__ == "__main__": 测试视频:[开源 AI 辅助编程方案:Unit Mesh 端到端打通 v0.0.1 版本](https://www.bilibili.com/video/BV1si4y1h7Vw/) +在 Unit Eval [0.3.2](https://github.com/unit-mesh/unit-eval/releases/tag/v0.3.2) 版本里 + +组成如下: + +```python +# Merge and shuffle records from different files +merge_jsonl( + output_file=merged_file, + input_files=[oss_instruction, 'code_bugfix_cleaned_5K.json', 'codeGPT_CN_cleaned_20K.json', + 'code_summarization_CN_cleaned_10K.json', 'java-code-completion.jsonl', 'java-test-gen.jsonl', + 'kotlin-completion-11929.jsonl', 'kotlin-java-comments-3715.jsonl'], + lines_per_file=[4000, 4000, 15000, 8000, 5000, 3000, 3000, 2000 ] +) +``` + #### 参数示例: ```bash