# BERT PRETRAINING

In [1]:
%%capture
# !/usr/bin/python
# Copyright 2022 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0

In [2]:
# imports for setting seed 
import torch 
import random
import numpy as np

# required to launch training with accelerate in a jupyter notebook
from accelerate import notebook_launcher

# import the config, training function from the BERT Pretraining library
from bert_pretraining.bert_pretraining import run_pretraining, Pretraining_Config

### Fix the seed for reproducability

In [3]:
#set seed value for reproducability

SEED_VALUE = 0
random.seed(SEED_VALUE)
torch.manual_seed(SEED_VALUE)
np.random.seed(SEED_VALUE)

### Customize the Config parameters

default parameters:

	model_name:	CUSTOM_BERT
	is_base:	True
	max_seq_length:	128
	max_predictions_per_seq:	20
	num_train_steps:	10000
	num_warmup_steps:	10
	learning_rate:	1e-05
	train_batch_size:	258
	save_intermediate_checkpoints:	True
	save_intermediate_checkpoints_steps:	25000
	eval_batch_size:	258
	max_eval_steps:	10000
	eval_point:	10000
	split_ratio:	None
	init_checkpoint:	
	input_file:	./input/demo_MSL128.tfrecord
	eval_file:	./input/demo_MSL128.tfrecord
	log_csv:	./eval_results.csv
	output_dir:	./ckpts
	num_gpu:	3


In [None]:
pretraining_config = Pretraining_Config()

# Modify your parameters 
# The parameters listed are for a demo run
pretraining_config.model_name = 'DEMOBERT'
pretraining_config.train_batch_size = 8
pretraining_config.eval_batfch_size = 8
pretraining_config.num_train_steps = 500
pretraining_config.num_warmup_steps = 8
pretraining_config.max_eval_steps = 100
pretraining_config.num_gpu = 2


### Pretrain your model

Expect the warning :

`Some weights of BertForPreTraining were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.`

It's telling us to train it on a downstream task before using it for inference but we dont need to worry about it now as we are just pretraining!


In [5]:
notebook_launcher(run_pretraining, [pretraining_config], num_processes=pretraining_config.num_gpu)

Launching training on 2 GPUs.
PRETRAINING_CONFIG PARAMS:
	model_name:	DEMOBERT
	is_base:	True
	max_seq_length:	128
	max_predictions_per_seq:	20
	num_train_steps:	500
	num_warmup_steps:	8
	learning_rate:	1e-05
	train_batch_size:	8
	save_intermediate_checkpoints:	True
	save_intermediate_checkpoints_steps:	25000
	eval_batch_size:	258
	max_eval_steps:	100
	eval_point:	10000
	split_ratio:	None
	init_checkpoint:	
	input_file:	./input/demo_MSL128.tfrecord
	eval_file:	./input/demo_MSL128.tfrecord
	log_csv:	./eval_results.csv
	output_dir:	./ckpts
	num_gpu:	2
	
DEVICES IN USE:cuda:1

cuda:0


Some weights of BertForPreTraining were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DATASET PREPARED


Some weights of BertForPreTraining were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['cls.predictions.decoder.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


_TRAINING BERT MODEL_
DEMOBERT


TRAINING:	:   0%|          | 0/508 [00:00<?, ?it/s]



EVALUATING:	:   0%|          | 0/100 [00:00<?, ?it/s]



EVALUATING:	:   0%|          | 0/100 [00:00<?, ?it/s]



SAVING THE TRAINING LOG CSV
PRETRAINING TIME:	174.1887822151184
