In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from swift.llm import (
    get_model_tokenizer, get_template, inference, ModelType,
    get_default_template_type, inference_stream
)
from swift.utils import seed_everything
import torch

model_type = ModelType.qwen_1_8b_chat
template_type = get_default_template_type(model_type)
print(f'template_type: {template_type}')  # template_type: qwen


kwargs = {}
# kwargs['use_flash_attn'] = True  # 使用flash_attn

model, tokenizer = get_model_tokenizer(model_type, torch.float16,
                                       model_kwargs={'device_map': 'auto'}, **kwargs)
# 修改max_new_tokens
model.generation_config.max_new_tokens = 128

template = get_template(template_type, tokenizer)
seed_everything(42)
query = '介绍你自己'
response, history = inference(model, template, query)
print(f'query: {query}')
print(f'response: {response}')

2024-07-23 15:39:26.377384: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-23 15:39:26.389224: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-23 15:39:26.403322: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-23 15:39:26.407565: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-23 15:39:26.418226: I tensorflow/core/platform/cpu_feature_guar

template_type: qwen


[INFO:modelscope] Use user-specified model revision: master
Downloading: 100%|██████████| 8.21k/8.21k [00:00<00:00, 15.3kB/s]
Downloading: 100%|██████████| 50.8k/50.8k [00:00<00:00, 99.7kB/s]
Downloading: 100%|██████████| 910/910 [00:00<00:00, 1.77kB/s]
Downloading: 100%|██████████| 77.0/77.0 [00:00<00:00, 130B/s]
Downloading: 100%|██████████| 2.29k/2.29k [00:00<00:00, 3.78kB/s]
Downloading: 100%|██████████| 1.88k/1.88k [00:00<00:00, 2.65kB/s]
Downloading: 100%|██████████| 249/249 [00:00<00:00, 403B/s]
Downloading: 100%|██████████| 7.11k/7.11k [00:00<00:00, 14.6kB/s]
Downloading: 100%|██████████| 80.8k/80.8k [00:00<00:00, 126kB/s]
Downloading: 100%|██████████| 1.90G/1.90G [00:05<00:00, 356MB/s] 
Downloading: 100%|██████████| 1.52G/1.52G [00:06<00:00, 270MB/s] 
Downloading: 100%|██████████| 14.4k/14.4k [00:00<00:00, 23.9kB/s]
Downloading: 100%|██████████| 54.3k/54.3k [00:00<00:00, 106kB/s]
Downloading: 100%|██████████| 15.0k/15.0k [00:00<00:00, 26.8kB/s]
Downloading: 100%|██████████| 2.

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[INFO:swift] model.max_model_len: 8192
[INFO:swift] Global seed set to 42


query: 介绍你自己
response: 我是来自阿里云的大规模语言模型，我叫通义千问。我的目标是为用户提供最好的服务体验。我可以回答各种问题、提供定义、解释和建议，还能表达观点、撰写代码以及开发算法。如果你有任何问题或需要帮助，请随时告诉我，我会尽力为你提供支持。


In [11]:
from typing import Optional, Tuple

from datasets import Dataset as HfDataset
from modelscope import MsDataset

from swift.llm import get_dataset, register_dataset, get_dataset_from_repo
from swift.utils import get_logger

logger = get_logger()


class CustomDatasetName:
    kenan = 'kenan'

def _preprocess_stsb(dataset: HfDataset) -> HfDataset:
    prompt = """Task: Based on the given two sentences, provide a similarity score between 0.0 and 5.0.
Sentence 1: {text1}
Sentence 2: {text2}
Similarity score: """
    query = []
    response = []
    for d in dataset:
        query.append(prompt.format(text1=d['text1'], text2=d['text2']))
        response.append(f"{d['label']:.1f}")
    return HfDataset.from_dict({'query': query, 'response': response})


register_dataset(CustomDatasetName.kenan, 'swift/kenan.json', None, _preprocess_stsb, get_dataset_from_repo)


if __name__ == '__main__':
    # test dataset
    train_dataset, val_dataset = get_dataset([CustomDatasetName.kenan],
                                             check_dataset_strategy='warning')
    print(f'train_dataset: {train_dataset}')
    print(f'val_dataset: {val_dataset}')


ValueError: The `kenan` has already been registered in the DATASET_MAPPING.

In [16]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from swift.llm import DatasetName, ModelType, SftArguments, sft_main

sft_args = SftArguments(
    model_type=ModelType.qwen_1_8b_chat,
    dataset=[f'{DatasetName.alpaca_zh}#500',
             f'kenan.json#500'],
    logging_steps=5,
    max_length=2048,
    learning_rate=1e-4,
    output_dir='output',
    lora_target_modules=['ALL']
    )
output = sft_main(sft_args)
best_model_checkpoint = output['best_model_checkpoint']
print(f'best_model_checkpoint: {best_model_checkpoint}')

[INFO:swift] Setting template_type: qwen
[INFO:swift] Setting args.lazy_tokenize: False
[INFO:swift] Setting args.dataloader_num_workers: 1
[INFO:swift] output_dir: /mnt/workspace/swift/output/qwen-1_8b-chat/v2-20240723-155740
[INFO:swift] Start time of running main: 2024-07-23 15:57:40.740586
[INFO:swift] args: SftArguments(model_type='qwen-1_8b-chat', model_id_or_path='qwen/Qwen-1_8B-Chat', model_revision='master', sft_type='lora', freeze_parameters=0.0, additional_trainable_parameters=[], tuner_backend='peft', template_type='qwen', output_dir='/mnt/workspace/swift/output/qwen-1_8b-chat/v2-20240723-155740', add_output_dir_suffix=True, ddp_backend=None, ddp_find_unused_parameters=None, ddp_broadcast_buffers=None, seed=42, resume_from_checkpoint=None, resume_only_model=False, ignore_data_skip=False, dtype='bf16', packing=False, dataset=['alpaca-zh#500', 'kenan.json#500'], val_dataset=[], dataset_seed=42, dataset_test_ratio=0.01, use_loss_scale=False, loss_scale_config_path='/mnt/worksp

device_count: 1
rank: -1, local_rank: -1, world_size: 1, local_world_size: 1


[INFO:modelscope] Use user-specified model revision: master
[INFO:swift] Loading the model using model_dir: /mnt/workspace/.cache/modelscope/hub/qwen/Qwen-1_8B-Chat
[INFO:swift] model_kwargs: {'low_cpu_mem_usage': True, 'device_map': 'cuda:0'}
Try importing flash-attention for faster inference...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[INFO:swift] model.max_model_len: 8192
[INFO:swift] model_config: QWenConfig {
  "_name_or_path": "/mnt/workspace/.cache/modelscope/hub/qwen/Qwen-1_8B-Chat",
  "architectures": [
    "QWenLMHeadModel"
  ],
  "attn_dropout_prob": 0.0,
  "auto_map": {
    "AutoConfig": "configuration_qwen.QWenConfig",
    "AutoModelForCausalLM": "modeling_qwen.QWenLMHeadModel"
  },
  "bf16": true,
  "emb_dropout_prob": 0.0,
  "fp16": false,
  "fp32": false,
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "kv_channels": 128,
  "layer_norm_epsilon": 1e-06,
  "max_position_embeddings": 8192,
  "model_type": "qwen",
  "no_bias": true,
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "onnx_safe": null,
  "rotary_emb_base": 10000,
  "rotary_pct": 1.0,
  "scale_attn_weights": true,
  "seq_length": 8192,
  "softmax_in_fp32": false,
  "tie_word_embeddings": false,
  "tokenizer_class": "QWenTokenizer",
  "transformers_version": "4.42.4",
  "use_cache": true,
  "use_cach

Downloading readme:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

[INFO:modelscope] storing https://www.modelscope.cn/api/v1/datasets/AI-ModelScope/alpaca-gpt4-data-zh/repo?Source=SDK&Revision=master&FilePath=README.md&View=False in cache at /mnt/workspace/.cache/modelscope/hub/datasets/0e5068922103f4f9417b7739909a0715b01750431f2038e5f4e9d7dcdd6cdcaa
[INFO:modelscope] creating metadata file for /mnt/workspace/.cache/modelscope/hub/datasets/0e5068922103f4f9417b7739909a0715b01750431f2038e5f4e9d7dcdd6cdcaa
[INFO:modelscope] Downloading to /mnt/workspace/.cache/modelscope/hub/datasets/downloads/ee3959cc16ee530c43270b123e2d8694a153a70d1b9a10d1e697df701b3fd791.incomplete


Downloading data: 0.00B [00:00, ?B/s]

[INFO:modelscope] storing https://www.modelscope.cn/api/v1/datasets/AI-ModelScope/alpaca-gpt4-data-zh/repo?Source=SDK&Revision=master&FilePath=train.csv in cache at /mnt/workspace/.cache/modelscope/hub/datasets/downloads/ee3959cc16ee530c43270b123e2d8694a153a70d1b9a10d1e697df701b3fd791
[INFO:modelscope] creating metadata file for /mnt/workspace/.cache/modelscope/hub/datasets/downloads/ee3959cc16ee530c43270b123e2d8694a153a70d1b9a10d1e697df701b3fd791


Generating train split: 0 examples [00:00, ? examples/s]

[INFO:modelscope] Context manager of ms-dataset exited.


Map:   0%|          | 0/495 [00:00<?, ? examples/s]

Filter:   0%|          | 0/495 [00:00<?, ? examples/s]

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

Filter:   0%|          | 0/5 [00:00<?, ? examples/s]

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Filter:   0%|          | 0/80 [00:00<?, ? examples/s]

[INFO:swift] train_dataset: Dataset({
    features: ['query', 'response'],
    num_rows: 994
})
[INFO:swift] val_dataset: Dataset({
    features: ['query', 'response'],
    num_rows: 6
})
[INFO:swift] system: You are a helpful assistant.
[INFO:swift] args.lazy_tokenize: False
[INFO:swift] Using num_proc: 1


  0%|          | 0/994 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

[INFO:swift] [INPUT_IDS] [151644, 8948, 198, 2610, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 872, 198, 101042, 87752, 20074, 62926, 101987, 103929, 106073, 8997, 32664, 16, 15, 99605, 103993, 101923, 3837, 56007, 99650, 100399, 104602, 99998, 100019, 1773, 22, 99605, 36587, 100399, 104602, 3837, 18, 99605, 36587, 116889, 1773, 151645, 198, 151644, 77091, 198, 106073, 5122, 100345, 101923, 59151, 3837, 16, 15, 99605, 105656, 22, 15, 4, 100623, 99729, 100399, 104602, 3837, 118233, 102250, 101043, 18, 15, 4, 100623, 99729, 116889, 1773, 43288, 102406, 104596, 101923, 110241, 15946, 3837, 100399, 104602, 100623, 56006, 116889, 100623, 42140, 1773, 100169, 17447, 3837, 104602, 33108, 100019, 99250, 102212, 100140, 20412, 101441, 100218, 106099, 3837, 102527, 73670, 83751, 63789, 96050, 99487, 101923, 110241, 15946, 3837, 104602, 9370, 103215, 100069, 105688, 100019, 1773, 151645]
[INFO:swift] [INPUT] <|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
分析以下数据并展示

[2024-07-23 15:57:54,451] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)


df: /root/.triton/autotune: 没有那个文件或目录


Train:   0%|          | 0/62 [00:00<?, ?it/s]

{'loss': 2.36874175, 'acc': 0.47317716, 'grad_norm': 3.203125, 'learning_rate': 2.5e-05, 'memory(GiB)': 11.46, 'train_speed(iter/s)': 0.252451, 'epoch': 0.02, 'global_step/max_steps': '1/62', 'percentage': '1.61%', 'elapsed_time': '3s', 'remaining_time': '3m 10s'}
{'loss': 2.1540308, 'acc': 0.52516752, 'grad_norm': 2.03125, 'learning_rate': 9.993e-05, 'memory(GiB)': 11.86, 'train_speed(iter/s)': 0.313256, 'epoch': 0.08, 'global_step/max_steps': '5/62', 'percentage': '8.06%', 'elapsed_time': '15s', 'remaining_time': '2m 52s'}
{'loss': 1.86708889, 'acc': 0.58053837, 'grad_norm': 1.5390625, 'learning_rate': 9.738e-05, 'memory(GiB)': 12.72, 'train_speed(iter/s)': 0.328026, 'epoch': 0.16, 'global_step/max_steps': '10/62', 'percentage': '16.13%', 'elapsed_time': '29s', 'remaining_time': '2m 34s'}
{'loss': 1.61841278, 'acc': 0.59912338, 'grad_norm': 1.6328125, 'learning_rate': 9.138e-05, 'memory(GiB)': 13.32, 'train_speed(iter/s)': 0.333229, 'epoch': 0.24, 'global_step/max_steps': '15/62', 'p

Val:   0%|          | 0/6 [00:00<?, ?it/s]

[INFO:swift] Saving model checkpoint to /mnt/workspace/swift/output/qwen-1_8b-chat/v2-20240723-155740/checkpoint-50


{'eval_loss': 1.40224171, 'eval_acc': 0.62159215, 'eval_runtime': 0.3738, 'eval_samples_per_second': 16.051, 'eval_steps_per_second': 16.051, 'epoch': 0.8, 'global_step/max_steps': '50/62', 'percentage': '80.65%', 'elapsed_time': '2m 27s', 'remaining_time': '35s'}
{'loss': 1.20385685, 'acc': 0.70072217, 'grad_norm': 1.359375, 'learning_rate': 3.55e-06, 'memory(GiB)': 13.32, 'train_speed(iter/s)': 0.337627, 'epoch': 0.89, 'global_step/max_steps': '55/62', 'percentage': '88.71%', 'elapsed_time': '2m 42s', 'remaining_time': '20s'}
{'loss': 1.30000114, 'acc': 0.67061954, 'grad_norm': 1.4765625, 'learning_rate': 2.9e-07, 'memory(GiB)': 13.32, 'train_speed(iter/s)': 0.337736, 'epoch': 0.97, 'global_step/max_steps': '60/62', 'percentage': '96.77%', 'elapsed_time': '2m 56s', 'remaining_time': '5s'}



Val:   0%|          | 0/6 [00:00<?, ?it/s]

[INFO:swift] Saving model checkpoint to /mnt/workspace/swift/output/qwen-1_8b-chat/v2-20240723-155740/checkpoint-62
[INFO:swift] last_model_checkpoint: /mnt/workspace/swift/output/qwen-1_8b-chat/v2-20240723-155740/checkpoint-62
[INFO:swift] best_model_checkpoint: /mnt/workspace/swift/output/qwen-1_8b-chat/v2-20240723-155740/checkpoint-50
[INFO:swift] images_dir: /mnt/workspace/swift/output/qwen-1_8b-chat/v2-20240723-155740/images


{'eval_loss': 1.40483391, 'eval_acc': 0.62377317, 'eval_runtime': 0.3784, 'eval_samples_per_second': 15.855, 'eval_steps_per_second': 15.855, 'epoch': 1.0, 'global_step/max_steps': '62/62', 'percentage': '100.00%', 'elapsed_time': '3m 3s', 'remaining_time': '0s'}
{'train_runtime': 183.4146, 'train_samples_per_second': 5.419, 'train_steps_per_second': 0.338, 'train_loss': 1.44525413, 'epoch': 1.0, 'global_step/max_steps': '62/62', 'percentage': '100.00%', 'elapsed_time': '3m 3s', 'remaining_time': '0s'}


[INFO:swift] End time of running main: 2024-07-23 16:01:00.260616


best_model_checkpoint: /mnt/workspace/swift/output/qwen-1_8b-chat/v2-20240723-155740/checkpoint-50


In [17]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

from swift.llm import (
    get_model_tokenizer, get_template, inference, ModelType, get_default_template_type,
)
from swift.utils import seed_everything
from swift.tuners import Swift

seed_everything(42)

ckpt_dir = 'output/qwen-1_8b-chat/v2-20240723-155740/checkpoint-62'
model_type=ModelType.qwen_1_8b_chat

template_type = get_default_template_type(model_type)

model, tokenizer = get_model_tokenizer(model_type, model_kwargs={'device_map': 'auto'})
model.generation_config.max_new_tokens = 128

model = Swift.from_pretrained(model, ckpt_dir, inference_mode=True)
template = get_template(template_type, tokenizer)

query = '介绍你自己'
response, history = inference(model, template, query)
print(f'response: {response}')
print(f'history: {history}')

[INFO:swift] Global seed set to 42
[INFO:swift] Downloading the model from ModelScope Hub, model_id: qwen/Qwen-1_8B-Chat
[INFO:modelscope] Use user-specified model revision: master
[INFO:swift] Loading the model using model_dir: /mnt/workspace/.cache/modelscope/hub/qwen/Qwen-1_8B-Chat
[INFO:swift] Setting torch_dtype: torch.float16
[INFO:swift] model_kwargs: {'device_map': 'auto'}
Try importing flash-attention for faster inference...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[INFO:swift] model.max_model_len: 8192


response: 您好，我是 柯南，一个由 中科大 开发的人工智能助手。我可以回答各种问题、提供信息和执行任务，以帮助用户解决问题并满足他们的需求。
history: [['介绍你自己', '您好，我是 柯南，一个由 中科大 开发的人工智能助手。我可以回答各种问题、提供信息和执行任务，以帮助用户解决问题并满足他们的需求。']]
