In [1]:
"""
requirements:
sentencepiece
transformers
deepspeed
accelerate
"""

'\nrequirements:\nsentencepiece\ntransformers\ndeepspeed\naccelerate\n'

In [2]:
NUM_WORKERS = 16
BATCH_SIZE_PER_WORKER = 8
MODEL_NAME = "lmsys/vicuna-7b-v1.3"

RELATION_TEMPLATE = {
    0: "Cause-Effect({e1},{e2})",
    1: "Cause-Effect({e2},{e1})",
    2: "Component-Whole({e1},{e2})",
    3: "Component-Whole({e2},{e1})",
    4: "Content-Container({e1},{e2})",
    5: "Content-Container({e2},{e1})",
    6: "Entity-Destination({e1},{e2})",
    7: "Entity-Destination({e2},{e1})",
    8: "Entity-Origin({e1},{e2})",
    9: "Entity-Origin({e2},{e1})",
    10: "Instrument-Agency({e1},{e2})",
    11: "Instrument-Agency({e2},{e1})",
    12: "Member-Collection({e1},{e2})",
    13: "Member-Collection({e2},{e1})",
    14: "Message-Topic({e1},{e2})",
    15: "Message-Topic({e2},{e1})",
    16: "Product-Producer({e1},{e2})",
    17: "Product-Producer({e2},{e1})",
    18: "Unknown({e1},{e2})",
}

PROMPT_TEMPLATE = "\"{sentence}\"\n. From the above sentence, the relationship between entity e1 and e2 is: {relation}"


In [3]:
from transformers import AutoConfig, AutoTokenizer

vicuna_config = AutoConfig.from_pretrained(MODEL_NAME)
vicuna_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left", use_fast=False)

In [4]:
import re
import ray
import json
from datasets import load_dataset
from transformers import AutoTokenizer
from ray.data.preprocessors import BatchMapper, Chain

hf_dataset = load_dataset("sem_eval_2010_task_8")
ray_dataset = ray.data.from_huggingface(hf_dataset["train"])


def fill_prompt(batch):
    # Format train data
    batch["e1"] = batch["sentence"].apply(
        lambda x: re.search(r"<e1>(.*?)</e1>", x).group(1)
    )
    batch["e2"] = batch["sentence"].apply(
        lambda x: re.search(r"<e2>(.*?)</e2>", x).group(1)
    )
    batch["input_sentence"] = batch.apply(
        lambda row: PROMPT_TEMPLATE.format(
            sentence=row["sentence"],
            relation=RELATION_TEMPLATE[row["relation"]].format(
                e1="e1", #row["e1"],
                e2="e2"#row["e2"]
            ),
        ),
        axis=1,
    )
    return batch[["input_sentence"]]


def tokenize(batch):
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left", use_fast=False)
    tokenizer.pad_token = tokenizer.eos_token
    ret = tokenizer(
        list(batch["input_sentence"]),
        truncation=True,
        max_length=128,
        padding="max_length",
        return_tensors="np",
    )
    ret["labels"] = ret["input_ids"].copy()
    return dict(ret)


prompt_mapper = BatchMapper(fill_prompt, batch_format="pandas")
tokenize_mapper = BatchMapper(tokenize, batch_format="pandas")
preprocessor = Chain(prompt_mapper, tokenize_mapper)


Found cached dataset sem_eval_2010_task_8 (/home/ray/.cache/huggingface/datasets/sem_eval_2010_task_8/default/1.0.0/8545d1995bbbade386acf5c4e2bef5589d8387ae0a93356407dfb54cdb234416)


  0%|          | 0/2 [00:00<?, ?it/s]

Snapshotting files: 100%|██████████| 46/46 [00:00<00:00, 8300.19file/s]
2023-06-30 15:15:51,482	INFO worker.py:1426 -- Connecting to existing Ray cluster at address: 10.0.59.66:6379...
2023-06-30 15:15:51,489	INFO worker.py:1607 -- Connected to Ray cluster. View the dashboard at [1m[32mhttps://session-vzyh3916u4zwmf1es6fazmbrgm.i.anyscaleuserdata-staging.com [39m[22m
2023-06-30 15:15:51,493	INFO packaging.py:346 -- Pushing file package 'gcs://_ray_pkg_e0ff1e773cbc0c5bc85a192c42b8a628.zip' (0.57MiB) to Ray cluster...
2023-06-30 15:15:51,494	INFO packaging.py:359 -- Successfully pushed file package 'gcs://_ray_pkg_e0ff1e773cbc0c5bc85a192c42b8a628.zip'.

Learn more here: https://docs.ray.io/en/master/data/faq.html#migrating-to-strict-mode[0m


In [5]:
import torch
import transformers
import pytorch_lightning as pl
from transformers import AutoTokenizer, AutoModelForCausalLM
from deepspeed.ops.adam import DeepSpeedCPUAdam


class ZeRO3Config:
    def __init__(self, pl_module):
        self.config = pl_module.trainer.strategy.config

    def __call__(self, *args, **kwargs):
        return self

    def is_zero3(self) -> bool:
        return True


def enable_transformers_pretrained_deepspeed_sharding(
    pl_module: "pl.LightningModule",
) -> None:
    transformers.deepspeed._hf_deepspeed_config_weak_ref = ZeRO3Config(pl_module)


class Vicuna13BModel(pl.LightningModule):
    def __init__(self, inference=False):
        super().__init__()
        torch.backends.cuda.matmul.allow_tf32 = True
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left", use_fast=False)
        if inference:
            with init_empty_weights():
                self.model_config = AutoConfig.from_pretrained(MODEL_NAME)
                self.model = AutoModelForCausalLM.from_config(self.model_config)
            self.model.tie_weights()

    def setup(self, stage) -> None:
        if not hasattr(self, "model"):
            enable_transformers_pretrained_deepspeed_sharding(self)
            self.model = AutoModelForCausalLM.from_pretrained(
                MODEL_NAME, trust_remote_code=True
            )
        if self.global_rank == 0:
            print("DeepSpeed Configs: ", self.trainer.strategy.config)
            print("Model Archetecture: ", self.model)

    def forward(self, batch):
        outputs = self.model(
            batch["input_ids"],
            attention_mask=batch["attention_mask"],
            labels=batch["labels"],
        )
        return outputs.loss

    def training_step(self, batch, batch_idx):
        # torch.cuda.empty_cache()
        loss = self.forward(batch)
        self.log("train_loss", loss, prog_bar=True, on_step=True, sync_dist=True)
        return loss

    def configure_optimizers(self):
        return DeepSpeedCPUAdam(self.parameters(), lr=2e-5)


[2023-06-30 15:15:55,034] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [6]:
from pytorch_lightning.callbacks import TQDMProgressBar


# Create a customized progress bar for LightningTrainer
class VicunaProgressBar(TQDMProgressBar):
    def __init__(self, num_iters_per_epoch, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.num_iters_per_epoch = num_iters_per_epoch

    def on_train_epoch_start(self, trainer, *_):
        super().on_train_epoch_start(trainer, *_)
        self.train_progress_bar.reset(self.num_iters_per_epoch)


total_batches = ray_dataset.count()
num_iters_per_epoch = total_batches // (NUM_WORKERS * BATCH_SIZE_PER_WORKER)
progress_bar = VicunaProgressBar(num_iters_per_epoch)


In [7]:
from ray.train.lightning import LightningTrainer, LightningConfigBuilder
from transformers import AutoConfig

config = AutoConfig.from_pretrained(MODEL_NAME)
HIDDEN_SIZE = config.hidden_size

# We are using default values from huggingface
deepspeed_configs = {
    "zero_allow_untested_optimizer": True,
    "bf16": {"enabled": True},
    "zero_optimization": {
        "stage": 3,
        "offload_optimizer": {"device": "cpu", "pin_memory": True},
        "overlap_comm": True,
        "contiguous_gradients": True,
        "reduce_bucket_size": HIDDEN_SIZE * HIDDEN_SIZE,
        "stage3_prefetch_bucket_size": 0.9 * HIDDEN_SIZE * HIDDEN_SIZE,
        "stage3_param_persistence_threshold": 10 * HIDDEN_SIZE,
    },
}

lightning_config = (
    LightningConfigBuilder()
    .module(cls=Vicuna13BModel)
    .trainer(
        max_epochs=1,
        accelerator="gpu",
        precision="bf16-mixed",
        callbacks=[progress_bar],
        accumulate_grad_batches=2,
        limit_val_batches=1,
        num_sanity_val_steps=0,
    )
    .strategy(name="deepspeed", config=deepspeed_configs)
    .checkpointing(save_top_k=0, save_weights_only=True, save_last=True)
    .build()
)


In [8]:
from ray.air.config import CheckpointConfig, RunConfig, ScalingConfig

trainer = LightningTrainer(
    lightning_config=lightning_config,
    run_config=RunConfig(
        name="vicuna-13b-relation-extraction",
        storage_path="s3://anyscale-staging-data-cld-kvedzwag2qa8i5bjxuevf5i7/yunxuanx-test/vicuna-13b-test",
        checkpoint_config=CheckpointConfig(
            num_to_keep=1,
            _checkpoint_keep_all_ranks=True,
            _checkpoint_upload_from_workers=True,
        ),
    ),
    scaling_config=ScalingConfig(
        num_workers=NUM_WORKERS,
        use_gpu=True,
        resources_per_worker={"CPU": 15, "GPU": 1},
    ),
    datasets={"train": ray_dataset},
    datasets_iter_config={"batch_size": BATCH_SIZE_PER_WORKER},
    preprocessor=preprocessor,
)




In [9]:
result = trainer.fit()


0,1
Current time:,2023-06-30 15:36:40
Running for:,00:20:44.24
Memory:,16.0/249.1 GiB

Trial name,status,loc,iter,total time (s),train_loss,epoch,step
LightningTrainer_af77e_00000,TERMINATED,10.0.59.66:146292,1,1215.48,0.800781,0,31


[2m[36m(pid=146292)[0m [2023-06-30 15:16:02,052] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)


[2m[36m(LightningTrainer pid=146292)[0m The `preprocessor` arg to Trainer is deprecated. Apply preprocessor transformations ahead of time by calling `preprocessor.transform(ds)`. Support for the preprocessor arg will be dropped in a future release.
[2m[36m(LightningTrainer pid=146292)[0m [33mImportant: Ray Data requires schemas for all datasets in Ray 2.5. This means that standalone Python objects are no longer supported. In addition, the default batch format is fixed to NumPy. To revert to legacy behavior temporarily, set the environment variable RAY_DATA_STRICT_MODE=0 on all cluster processes.
[2m[36m(LightningTrainer pid=146292)[0m 
[2m[36m(LightningTrainer pid=146292)[0m Learn more here: https://docs.ray.io/en/master/data/faq.html#migrating-to-strict-mode[0m
[2m[36m(LightningTrainer pid=146292)[0m Starting distributed worker processes: ['146456 (10.0.59.66)', '77983 (10.0.42.136)', '77725 (10.0.12.187)', '78274 (10.0.42.159)', '78357 (10.0.16.61)', '77971 (10.0.39.

(pid=146292) - RandomizeBlockOrder 1:   0%|          | 0/1 [00:00<?, ?it/s]

(pid=146292) Running 0:   0%|          | 0/1 [00:00<?, ?it/s]

[2m[36m(RayTrainWorker pid=77983, ip=10.0.42.136)[0m [2023-06-30 15:16:15,404] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)


[2m[36m(RayTrainWorker pid=146456)[0m GPU available: True (cuda), used: True
[2m[36m(RayTrainWorker pid=146456)[0m TPU available: False, using: 0 TPU cores
[2m[36m(RayTrainWorker pid=146456)[0m IPU available: False, using: 0 IPUs
[2m[36m(RayTrainWorker pid=146456)[0m HPU available: False, using: 0 HPUs
[2m[36m(RayTrainWorker pid=146456)[0m `Trainer(limit_val_batches=1)` was configured so 1 batch will be used.
[2m[36m(RayTrainWorker pid=77844, ip=10.0.24.253)[0m initializing deepspeed distributed: GLOBAL_RANK: 10, MEMBER: 11/16
[2m[36m(RayTrainWorker pid=77844, ip=10.0.24.253)[0m Missing logger folder: /home/ray/ray_results/vicuna-13b-relation-extraction/LightningTrainer_af77e_00000_0_2023-06-30_15-15-56/rank_all/lightning_logs




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]
[2m[36m(RayTrainWorker pid=77725, ip=10.0.12.187)[0m initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/16[32m [repeated 15x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m
[2m[36m(RayTrainWorker pid=78093, ip=10.0.54.109)[0m Missing logger folder: /home/ray/ray_results/vicuna-13b-relation-extraction/LightningTrainer_af77e_00000_0_2023-06-30_15-15-56/rank_all/lightning_logs[32m [repeated 15x across cluster][0m
Loading checkpoint shards:  50%|█████     | 1/2 [00:22<00:22, 22.40s/it]
Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 15x across cluster][0m
Loading checkpoint shards: 100%|██████████| 2/2 [00:28<00:00, 14.19s/it]
Loading checkpoint shards:  50%|█████     | 1/2 [00:26<00:26, 26.31s/it][32m [repea

[2m[36m(RayTrainWorker pid=146456)[0m DeepSpeed Configs:  {'zero_allow_untested_optimizer': True, 'bf16': {'enabled': True}, 'zero_optimization': {'stage': 3, 'offload_optimizer': {'device': 'cpu', 'pin_memory': True}, 'overlap_comm': True, 'contiguous_gradients': True, 'reduce_bucket_size': 16777216, 'stage3_prefetch_bucket_size': 15099494.4, 'stage3_param_persistence_threshold': 40960}, 'gradient_accumulation_steps': 2, 'train_micro_batch_size_per_gpu': 1, 'gradient_clipping': 0.0}
[2m[36m(RayTrainWorker pid=146456)[0m Model Archetecture:  LlamaForCausalLM(
[2m[36m(RayTrainWorker pid=146456)[0m   (model): LlamaModel(
[2m[36m(RayTrainWorker pid=146456)[0m     (embed_tokens): Embedding(32000, 4096, padding_idx=0)
[2m[36m(RayTrainWorker pid=146456)[0m     (layers): ModuleList(
[2m[36m(RayTrainWorker pid=146456)[0m       (0-31): 32 x LlamaDecoderLayer(
[2m[36m(RayTrainWorker pid=146456)[0m         (self_attn): LlamaAttention(
[2m[36m(RayTrainWorker pid=146456)[0m 

[2m[36m(RayTrainWorker pid=77844, ip=10.0.24.253)[0m Using /home/ray/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...
Loading checkpoint shards: 100%|██████████| 2/2 [00:32<00:00, 16.15s/it][32m [repeated 15x across cluster][0m
[2m[36m(RayTrainWorker pid=79012, ip=10.0.33.57)[0m Detected CUDA files, patching ldflags
[2m[36m(RayTrainWorker pid=79012, ip=10.0.33.57)[0m Emitting ninja build file /home/ray/.cache/torch_extensions/py310_cu118/cpu_adam/build.ninja...
[2m[36m(RayTrainWorker pid=79012, ip=10.0.33.57)[0m Building extension module cpu_adam...
[2m[36m(RayTrainWorker pid=79012, ip=10.0.33.57)[0m Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
[2m[36m(RayTrainWorker pid=77844, ip=10.0.24.253)[0m Loading extension module cpu_adam...


[2m[36m(RayTrainWorker pid=77844, ip=10.0.24.253)[0m ninja: no work to do.
[2m[36m(RayTrainWorker pid=77844, ip=10.0.24.253)[0m Time to load cpu_adam op: 2.334331512451172 seconds


[2m[36m(RayTrainWorker pid=78320, ip=10.0.24.147)[0m LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0][32m [repeated 15x across cluster][0m
[2m[36m(RayTrainWorker pid=78274, ip=10.0.42.159)[0m Building extension module utils...
[2m[36m(RayTrainWorker pid=77674, ip=10.0.50.184)[0m Loading extension module utils...


[2m[36m(RayTrainWorker pid=77674, ip=10.0.50.184)[0m Time to load utils op: 0.07638359069824219 seconds
[2m[36m(RayTrainWorker pid=146456)[0m Parameter Offload: Total persistent parameters: 266240 in 65 params


[2m[36m(RayTrainWorker pid=77725, ip=10.0.12.187)[0m No modifications detected for re-loaded extension module utils, skipping build step...
[2m[36m(RayTrainWorker pid=77725, ip=10.0.12.187)[0m Using /home/ray/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...[32m [repeated 32x across cluster][0m
[2m[36m(RayTrainWorker pid=78843, ip=10.0.34.160)[0m Detected CUDA files, patching ldflags[32m [repeated 15x across cluster][0m
[2m[36m(RayTrainWorker pid=146456)[0m Emitting ninja build file /home/ray/.cache/torch_extensions/py310_cu118/utils/build.ninja...[32m [repeated 31x across cluster][0m
[2m[36m(RayTrainWorker pid=78843, ip=10.0.34.160)[0m Building extension module cpu_adam...[32m [repeated 15x across cluster][0m
[2m[36m(RayTrainWorker pid=146456)[0m Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)[32m [repeated 31x across cluster][0m
[2m[36m(RayTrainWorker pid=78843, ip=10.0.34.

[2m[36m(RayTrainWorker pid=146456)[0m ninja: no work to do.[32m [repeated 31x across cluster][0m
[2m[36m(RayTrainWorker pid=78843, ip=10.0.34.160)[0m Time to load cpu_adam op: 2.3675966262817383 seconds[32m [repeated 15x across cluster][0m
[2m[36m(RayTrainWorker pid=77725, ip=10.0.12.187)[0m Time to load utils op: 0.0006890296936035156 seconds[32m [repeated 16x across cluster][0m


[2m[36m(RayTrainWorker pid=146456)[0m 
[2m[36m(RayTrainWorker pid=146456)[0m   | Name  | Type             | Params | Params per Device
[2m[36m(RayTrainWorker pid=146456)[0m ---------------------------------------------------------------
[2m[36m(RayTrainWorker pid=146456)[0m 0 | model | LlamaForCausalLM | 6.7 B  | 421 M            
[2m[36m(RayTrainWorker pid=146456)[0m ---------------------------------------------------------------
[2m[36m(RayTrainWorker pid=146456)[0m 6.7 B     Trainable params
[2m[36m(RayTrainWorker pid=146456)[0m 0         Non-trainable params
[2m[36m(RayTrainWorker pid=146456)[0m 6.7 B     Total params
[2m[36m(RayTrainWorker pid=146456)[0m 26,953.662Total estimated model params size (MB)


Epoch 0:   0%|          | 0/62 [00:00<?, ?it/s]


[2m[36m(RayTrainWorker pid=146456)[0m   rank_zero_warn(


Epoch 0:   2%|▏         | 1/62 [00:20<20:21, 20.03s/it, v_num=0, train_loss=6.250]
[2m[36m(RayTrainWorker pid=146456)[0m Time to load utils op: 0.0003604888916015625 seconds[32m [repeated 15x across cluster][0m
Epoch 0:   3%|▎         | 2/62 [00:39<19:58, 19.97s/it, v_num=0, train_loss=6.280]
Epoch 0:   5%|▍         | 3/62 [00:55<18:14, 18.55s/it, v_num=0, train_loss=5.120]
Epoch 0:   6%|▋         | 4/62 [01:15<18:19, 18.96s/it, v_num=0, train_loss=5.060]
Epoch 0:   8%|▊         | 5/62 [01:32<17:39, 18.58s/it, v_num=0, train_loss=2.670]
Epoch 0:  10%|▉         | 6/62 [01:53<17:38, 18.90s/it, v_num=0, train_loss=2.700]
Epoch 0:  11%|█▏        | 7/62 [02:10<17:05, 18.65s/it, v_num=0, train_loss=1.690]
Epoch 0:  13%|█▎        | 8/62 [02:29<16:49, 18.69s/it, v_num=0, train_loss=1.730]
Epoch 0:  15%|█▍        | 9/62 [02:45<16:14, 18.38s/it, v_num=0, train_loss=1.660]
Epoch 0:  16%|█▌        | 10/62 [03:05<16:02, 18.51s/it, v_num=0, train_loss=1.620]
Epoch 0:  18%|█▊        | 11/62 [03:

[2m[36m(RayTrainWorker pid=146456)[0m No modifications detected for re-loaded extension module utils, skipping build step...[32m [repeated 15x across cluster][0m
[2m[36m(RayTrainWorker pid=146456)[0m Using /home/ray/.cache/torch_extensions/py310_cu118 as PyTorch extensions root...[32m [repeated 15x across cluster][0m
[2m[36m(RayTrainWorker pid=146456)[0m Loading extension module utils...[32m [repeated 15x across cluster][0m


Epoch 0: : 63it [18:36, 17.72s/it, v_num=0, train_loss=0.801]                      


[2m[36m(RayTrainWorker pid=146456)[0m Uploading checkpoint files from worker rank 0 to cloud URI s3://anyscale-staging-data-cld-kvedzwag2qa8i5bjxuevf5i7/yunxuanx-test/vicuna-13b-test/vicuna-13b-relation-extraction/LightningTrainer_af77e_00000_0_2023-06-30_15-15-56/checkpoint_000000.
[2m[36m(RayTrainWorker pid=78070, ip=10.0.32.230)[0m Done uploading checkpoint files.
[2m[36m(RayTrainWorker pid=78070, ip=10.0.32.230)[0m Uploading checkpoint files from worker rank 12 to cloud URI s3://anyscale-staging-data-cld-kvedzwag2qa8i5bjxuevf5i7/yunxuanx-test/vicuna-13b-test/vicuna-13b-relation-extraction/LightningTrainer_af77e_00000_0_2023-06-30_15-15-56/checkpoint_000000.[32m [repeated 15x across cluster][0m
[2m[36m(RayTrainWorker pid=78053, ip=10.0.42.191)[0m Done uploading checkpoint files.[32m [repeated 15x across cluster][0m


Epoch 0: : 63it [19:04, 18.17s/it, v_num=0, train_loss=0.801]


[2m[36m(RayTrainWorker pid=146456)[0m `Trainer.fit` stopped: `max_epochs=1` reached.
[2m[36m(LightningTrainer pid=146292)[0m Uploading trial artifacts took 16.813 s, which may be a performance bottleneck. Consider saving fewer/smaller artifacts to the trial log directory, or disable artifact syncing with `SyncConfig(sync_artifacts=False)`.
2023-06-30 15:36:40,912	INFO tune.py:1148 -- Total run time: 1244.32 seconds (1243.92 seconds for the tuning loop).


In [19]:
result

Result(
  metrics={'_report_on': 'train_epoch_end', 'train_loss': 0.80078125, 'epoch': 0, 'step': 31, 'should_checkpoint': True, 'done': True, 'trial_id': 'af77e_00000', 'experiment_tag': '0'},
  path='s3://anyscale-staging-data-cld-kvedzwag2qa8i5bjxuevf5i7/yunxuanx-test/vicuna-13b-test/vicuna-13b-relation-extraction/LightningTrainer_af77e_00000_0_2023-06-30_15-15-56',
  checkpoint=LightningCheckpoint(uri=s3://anyscale-staging-data-cld-kvedzwag2qa8i5bjxuevf5i7/yunxuanx-test/vicuna-13b-test/vicuna-13b-relation-extraction/LightningTrainer_af77e_00000_0_2023-06-30_15-15-56/checkpoint_000000)
)

In [11]:
!awsv2 configure set s3.max_concurrent_requests 32
!awsv2 configure set default.s3.preferred_transfer_client crt
!awsv2 configure set default.s3.target_bandwidth 100Gb/s
!awsv2 configure set default.s3.multipart_chunksize 8MB

In [12]:
import os

os.system(f"awsv2 s3 sync {result.checkpoint.uri} /mnt/local_storage/checkpoint")

download: s3://anyscale-staging-data-cld-kvedzwag2qa8i5bjxuevf5i7/yunxuanx-test/vicuna-13b-test/vicuna-13b-relation-extraction/LightningTrainer_af77e_00000_0_2023-06-30_15-15-56/checkpoint_000000/.RANK_0.files to ../../../mnt/local_storage/checkpoint/.RANK_0.files
download: s3://anyscale-staging-data-cld-kvedzwag2qa8i5bjxuevf5i7/yunxuanx-test/vicuna-13b-test/vicuna-13b-relation-extraction/LightningTrainer_af77e_00000_0_2023-06-30_15-15-56/checkpoint_000000/.RANK_4.files to ../../../mnt/local_storage/checkpoint/.RANK_4.files
download: s3://anyscale-staging-data-cld-kvedzwag2qa8i5bjxuevf5i7/yunxuanx-test/vicuna-13b-test/vicuna-13b-relation-extraction/LightningTrainer_af77e_00000_0_2023-06-30_15-15-56/checkpoint_000000/.tune_metadata to ../../../mnt/local_storage/checkpoint/.tune_metadata
download: s3://anyscale-staging-data-cld-kvedzwag2qa8i5bjxuevf5i7/yunxuanx-test/vicuna-13b-test/vicuna-13b-relation-extraction/LightningTrainer_af77e_00000_0_2023-06-30_15-15-56/checkpoint_000000/_metada

0

In [13]:
import torch
import ray
import pytorch_lightning as pl
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM
from accelerate import (
    init_empty_weights,
    infer_auto_device_map,
    load_checkpoint_and_dispatch,
)


In [14]:
from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint


def extract_fp32_ckpt_from_zero(zero_ckpt_dir):
    state_dict = get_fp32_state_dict_from_zero_checkpoint(zero_ckpt_dir)
    vicuna_state_dict = {
        k.replace("_forward_module.model.", ""): v for k, v in state_dict.items()
    }
    torch.save(vicuna_state_dict, os.path.join(zero_ckpt_dir, "full_model.pt"))


full_model_ckpt_path = "/mnt/local_storage/checkpoint/model/full_model.pt"
extract_fp32_ckpt_from_zero("/mnt/local_storage/checkpoint/model")


Processing zero checkpoint '/mnt/local_storage/checkpoint/model/checkpoint'
Detected checkpoint of type zero stage 3, world_size: 16
Parsing checkpoint created by deepspeed==0.9.4
Reconstructed Trainable fp32 state dict with 291 params 6738415616 elements


In [15]:
# Initialize a model on meta device
with init_empty_weights():
    config = AutoConfig.from_pretrained(MODEL_NAME)
    meta_model = AutoModelForCausalLM.from_config(config)
meta_model.tie_weights()

# Define the device mapping
device_map = infer_auto_device_map(
    meta_model,
    max_memory={0: "15GB", "cpu": "60GB"},
    no_split_module_classes=["LlamaDecoderLayer"],
)

# Load the model parameters
model = load_checkpoint_and_dispatch(
    meta_model,
    checkpoint=full_model_ckpt_path,
    device_map=device_map,
)


The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.
The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.


In [16]:
from transformers import pipeline

generator = pipeline(
    "text-generation",
    model=model,
    device_map=device_map,
    tokenizer=AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left", use_fast=False),
)


Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [21]:
for i in range(10):
    testcase = hf_dataset["test"][i]
    prompt = PROMPT_TEMPLATE.format(sentence=testcase["sentence"], relation="")
    output = generator(prompt, max_new_tokens=20, do_sample=False)

    print("Answer:", RELATION_TEMPLATE[testcase["relation"]])
    print("Model Output:", output)


Answer: Message-Topic({e1},{e2})
Model Output: [{'generated_text': 'The most common <e1>audits</e1> were about <e2>waste</e2> and recycling.\nIn the above sentence, the relationship between the two tagged entities is: 1. Entity-Origin(e1,e2)'}]
Answer: Product-Producer({e2},{e1})
Model Output: [{'generated_text': 'The <e1>company</e1> fabricates plastic <e2>chairs</e2>.\nIn the above sentence, the relationship between the two tagged entities is: 1. Unknown(e1,e2)'}]
Answer: Instrument-Agency({e2},{e1})
Model Output: [{'generated_text': 'The school <e1>master</e1> teaches the lesson with a <e2>stick</e2>.\nIn the above sentence, the relationship between the two tagged entities is: 1. Entity-Origin(e1,e2)'}]
Answer: Entity-Destination({e1},{e2})
Model Output: [{'generated_text': 'The suspect dumped the dead <e1>body</e1> into a local <e2>reservoir</e2>.\nIn the above sentence, the relationship between the two tagged entities is: 1. Unknown(e1,e2)'}]
Answer: Cause-Effect({e2},{e1})
Model 



Answer: Component-Whole({e1},{e2})
Model Output: [{'generated_text': 'The disgusting scene was retaliation against her brother Philip who rents the <e1>room</e1> inside this apartment <e2>house</e2> on Lombard street.\nIn the above sentence, the relationship between the two tagged entities is: 1. Unknown(e1,e2)'}]
Answer: Message-Topic({e1},{e2})
Model Output: [{'generated_text': 'This <e1>thesis</e1> defines the <e2>clinical characteristics</e2> of amyloid disease.\nIn the above sentence, the relationship between the two tagged entities is: 1. Entity-Origin(e1,e2)'}]


In [18]:
print("Testcase:", testcase)
print("Output:", output)

Testcase: {'sentence': 'The most common <e1>audits</e1> were about <e2>waste</e2> and recycling.', 'relation': 14}
Output: [{'generated_text': "This is my first time using the <e1>saw</e1> and I'm not sure"}]
