In [None]:
import tensorflow as tf
import os

In [None]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)

In [None]:
strategy = tf.distribute.TPUStrategy(resolver)

In [None]:
!pip install ratsnlp

In [None]:
from google.colab import drive
drive.mount('/gdrive', force_remount = True)

In [None]:
from torch.cuda import is_available
import torch
from ratsnlp.nlpbook.qa import QATrainArguments

args = QATrainArguments(
    pretrained_model_name = "beomi/kcbert-base",
    downstream_corpus_name = "korquad-v1",
    downstream_model_dir="/gdrive/My Drive/nlpbook/checkpoint-qa",
    max_seq_length = 128,
    max_query_length =32,
    doc_stride=64,
    batch_size=32 if torch.cuda.is_available() else 4,
    learning_rate = 5e-5,
    epochs=3,
    tpu_cores=0 if torch.cuda.is_available() else 8,
    seed=7,

)

In [None]:
from ratsnlp import nlpbook
nlpbook.set_seed(args)

In [None]:
nlpbook.set_logger(args)

In [None]:
nlpbook.download_downstream_dataset(args)

In [None]:
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained(
    args.pretrained_model_name,
    do_lower_case = False,
)

In [None]:
from ratsnlp.nlpbook.qa import KorQuADV1Corpus, QADataset
corpus = KorQuADV1Corpus()
train_dataset = QADataset(
    args= args,
    corpus = corpus,
    tokenizer = tokenizer,
    mode="train",
)

In [None]:
from torch.utils.data import DataLoader, RandomSampler
train_dataloader = DataLoader(
    train_dataset,
    batch_size = args.batch_size,
    sampler = RandomSampler(train_dataset, replacement = False),
    collate_fn=nlpbook.data_collator,
    drop_last=False,
    num_workers=args.cpu_workers,
)

In [None]:
from torch.utils.data import SequentialSampler
val_dataset = QADataset(
    args=args,
    corpus = corpus,
    tokenizer = tokenizer,
    mode="val",
)

val_dataloader = DataLoader(
    val_dataset,
    batch_size=args.batch_size,
    sampler = SequentialSampler(val_dataset),
    collate_fn = nlpbook.data_collator,
    drop_last=False,
    num_workers=args.cpu_workers,
)

In [None]:
from transformers import BertConfig, BertForQuestionAnswering
pretrained_model_config = BertConfig.from_pretrained(
    args.pretrained_model_name,
)

model = BertForQuestionAnswering.from_pretrained(
    args.pretrained_model_name,
    config = pretrained_model_config,
)

In [None]:
from ratsnlp.nlpbook.qa import QATask
task = QATask(model, args)

In [None]:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)

In [None]:
strategy = tf.distribute.TPUStrategy(resolver)

In [None]:
trainer = nlpbook.get_trainer(args)

In [None]:
trainer.fit(
    task,
    train_dataloader = train_dataloader,
    val_dataloaders = val_dataloader,
)

In [None]:
!pip install ratsnlp

In [None]:
from google.colab import drive
drive.mount('/gdrive', force_remount = True)

In [None]:
from ratsnlp.nlpbook.qa import QADeployArguments
args = QATrainArguments(
    pretrained_model_name = "beomi/kcbert-base",
    downstream_model_dir="/gdrive/My Drive/nlpbook/checkpoint-qa",
    max_seq_length = 128,
    max_query_length =32,
)

In [None]:
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained(
    args.pretrained_model_name,
    do_lower_case = False,
)

In [None]:
import torch
fine_tuned_model_ckpt = torch.load(
    args.downstream_model_checkpoint_path,
    map_location = torch.device("cpu"),
)

In [None]:
from transformers import Bertconfig
pretrained_model_config = BertConfig.from_pretrained(
    args.pretrained_model_name,
)

In [None]:
from transformers import BertForQuestionAnswering
model = BertForQuestionAnswering(pretrained_model_config)

In [None]:
model.load_state_dict({k.replace("model.",""): v for k, v in fine_tuned_model_
ckpt['state_dict'].items()})

In [None]:
model.eval()