In [None]:
!pip install git+https://github.com/OpenLMLab/collie.git

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
from transformers import AutoTokenizer
from collie.config import CollieConfig
from collie.data import CollieDatasetForTraining
from collie.data import CollieDataLoader
from collie.optim.lomo import Lomo
from collie.controller.trainer import Trainer
from collie.controller.evaluator import EvaluatorForPerplexity, EvaluatorForGeneration
from collie.models.moss_moon import Moss003MoonForCausalLM
from collie.utils.monitor import StepTimeMonitor, TGSMonitor, MemoryMonitor, LossMonitor, EvalMonitor
from collie.metrics import DecodeMetric, PPLMetric
from collie.module import GPTLMLoss
from collie.utils.data_provider import GradioProvider

In [None]:
config = CollieConfig.from_pretrained(pretrained_model, trust_remote_code=True)
# Note that tp_size * dp_size * pp_size = the number of GPUs
# Tensor Parallel
config.tp_size = 2
# Data Parallel
config.dp_size = 1
# Pipeline Parallel
config.pp_size = 1
# the number of training epochs
config.train_epochs = 1
# eval per {100} steps
config.eval_per_n_steps = 100
# eval per {1} epoch
config.eval_per_n_epochs = 1
# The batch_size for each GPU is set to {16}
config.train_micro_batch_size = 16
# The batch_size for each eval is {1}
config.eval_batch_size = 1
# DeepSpeed Configuration
config.ds_config = {
        "fp16": {
            "enabled": True
        },
        "zero_allow_untested_optimizer": True,
        "zero_force_ds_cpu_optimizer": False,
        "zero_optimization": {
            "stage": 3,
            "offload_optimizer": {
                "device": "cpu",
                "pin_memory": False
            }
        },
        "monitor_config": {
            "enabled": True,
            "tag": "adan",
            "csv_monitor": {
                "enabled": True,
                "output_path": "./ds_logs/"
            }
        }
}

In [None]:
pretrained_model = "internlm/internlm-7b"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(pretrained_model, trust_remote_code=True)

In [None]:
train_dataset = [
    {
        'input': 'Collie is a python package for ',
        'output': 'finetuning large language models.'
    } for _ in range(10000)
]
train_dataset = CollieDatasetForTraining(train_dataset, tokenizer)
eval_dataset = train_dataset[:32]

In [None]:
model = AutoModelForCausalLM.from_pretrained(pretrained_model, config=config)

In [None]:
optimizer = Lomo(
    model,
    lr = 0.001,
    clip_grad_norm = 5.0
)

In [None]:
monitors = [
    # Time used per step
    StepTimeMonitor(config),
    # Tokens generated per gpu per second
    TGSMonitor(config),
    # Memory used
    MemoryMonitor(config),
    # Loss
    LossMonitor(config),
    # Evaluation Results
    EvalMonitor(config)
]

In [None]:
evaluator_ppl = EvaluatorForPerplexity(
    model = model,
    config = config,
    dataset = eval_dataset,
    monitors = [
        EvalMonitor(config)
    ],
    metrics = {
        'ppl': PPLMetric()
    }
)
evaluator_decode = EvaluatorForGeneration(
    model = model,
    config = config,
    tokenizer = tokenizer,
    dataset = eval_dataset,
    monitors = [
        EvalMonitor(config)
    ],
    metrics = {
        'decode': DecodeMetric()
    }

)

In [None]:
trainer = Trainer(
    model = model,
    config = config,
    loss_fn = GPTLMLoss(-100),
    optimizer = optimizer,
    train_dataset = train_dataset,
    monitors = monitors,
    evaluators = [evaluator_ppl, evaluator_decode],
)
# 开始训练/验证
trainer.train()