In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/bert-base-uncased/config.json
/kaggle/input/bert-base-uncased/pytorch_model.bin
/kaggle/input/bert-base-uncased/vocab.txt
/kaggle/input/google-quest-challenge/sample_submission.csv
/kaggle/input/google-quest-challenge/train.csv
/kaggle/input/google-quest-challenge/test.csv


In [2]:
!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
!python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  5116  100  5116    0     0  17950      0 --:--:-- --:--:-- --:--:-- 17950
Updating... This may take around 2 minutes.
Updating TPU runtime to pytorch-nightly ...
Found existing installation: torch 1.5.0
Uninstalling torch-1.5.0:
  Successfully uninstalled torch-1.5.0
Found existing installation: torchvision 0.6.0a0+35d732a
Uninstalling torchvision-0.6.0a0+35d732a:
Done updating TPU runtime
  Successfully uninstalled torchvision-0.6.0a0+35d732a
Copying gs://tpu-pytorch/wheels/torch-nightly-cp37-cp37m-linux_x86_64.whl...
\ [1 files][121.5 MiB/121.5 MiB]                                                
Operation completed over 1 objects/121.5 MiB.                                    
Copying gs://tpu-pytorch/wheels/torch_xla-nightly-cp37-cp37m-linux_x86_64.whl...
\ [1 files][128.0 MiB/128.0 MiB]                                    

In [20]:
import transformers
from transformers import AdamW, get_linear_schedule_with_warmup

import torch
import torch.nn as nn 

import pandas as pd
import numpy as np
from sklearn import model_selection

import torch_xla.core.xla_model as xm #using TPUs
import torch_xla.distributed.xla_multiprocessing as xmp
import torch_xla.distributed.parallel_loader as pl
from scipy import stats

import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')


In [5]:
class BERTModel(nn.Module):
    def __init__(self, bert_path):
        super(BERTModel, self).__init__()

        self.bert_path = bert_path
        self.bert = transformers.BertModel.from_pretrained(self.bert_path)
        self.bert_drop = nn.Dropout(0.3)
        self.out = nn.Linear(768, 30)

    def forward(self, ids, attention_mask , token_type_ids):
        _, output2 = self.bert(ids, attention_mask, token_type_ids)
        dropout = self.bert_drop(output2)
        output = self.out(dropout)
        return output 

In [42]:
class BERTdataset():
    def __init__(self, qtitle, qbody, answer, targets, tokenizer, max_len):
        super(BERTdataset, self).__init__()
        self.qtitle = qtitle
        self.qbody = qbody
        self.answer = answer
        self.targets = targets
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.answer)
    
    def __getitem__(self, idx):
        title = str(self.qtitle[idx])
        body = str(self.qbody[idx])
        answer = str(self.answer[idx])

        input = self.tokenizer.encode_plus(
            f"{title} {body}",
            answer,
            add_special_tokens=True,
            max_len= self.max_len 
        )

        ids = input["input_ids"][0:511]
        mask = input["attention_mask"][0:511]
        token_type_ids = input["token_type_ids"][0:511]

        padding = int(self.max_len - len(ids))
        
        padded_ids = ids + ([0] * padding)
        padded_mask = mask + ([0] * padding)
        padded_token = token_type_ids + ([0] * padding)

        return {
            "ids" : torch.tensor(padded_ids, dtype=torch.long) ,
            "mask" : torch.tensor(padded_mask, dtype=torch.long),
            "token" : torch.tensor(padded_token, dtype=torch.long),
            "targets" : torch.tensor(self.targets[idx,:][0:513], dtype= torch.float)
        }

In [46]:
def loss_fn(outputs, targets):
    return nn.BCEWithLogitsLoss()(outputs, targets)


def train_fn(model, dataloader, optimizer, device, scheduler=None):
    model.train()
    for batch_id, data in enumerate(dataloader):
        ids = (data["ids"]).to(device, dtype=torch.long)
        mask = (data["mask"]).to(device, dtype=torch.long)
        token = (data["token"]).to(device, dtype=torch.long)
        target = (data["targets"]).to(device, dtype=torch.float)

        optimizer.zero_grad()
        output = model(ids=ids, attention_mask=mask, token_type_ids=token)
        loss = loss_fn(output, targets=target)
        loss.backward()
        xm.optimizer_step(optimizer)

        if scheduler is not None:
            scheduler.step()
        
        if batch_id % 10 == 0:
            xm.master_print(f"batch = {batch_id}, loss = {loss}")


def eval_fn(model, dataloader, device):
    model.eval()
    targets=[]
    outputs=[]
    for batch_id, data in enumerate(dataloader):
        ids = (data["ids"]).to(device, dtype=torch.long)
        mask = (data["mask"]).to(device, dtype=torch.long)
        token = (data["token"]).to(device, dtype=torch.long)
        target = (data["targets"]).to(device, dtype=torch.float)

        output = model(ids=ids, attention_mask=mask, token_type_ids=token)
        loss = loss_fn(output, targets=target)

        targets.append(target.cpu().detach().numpy())
        outputs.append(output.cpu().detach().numpy())

        target = np.vstack(targets)
        output =  np.vstack(outputs)

        return output, target

In [30]:
def training(index):
    TRAIN_BS = 16
    MAX_LEN = 512
    TEST_BS = 8
    EPOCHS = 20
    DEVICE = xm.xla_device()

    df = pd.read_csv("../input/google-quest-challenge/train.csv").fillna("none")
    train_df, valid_df = model_selection.train_test_split(df, test_size=0.1, random_state=45)
    
    train_df.reset_index(drop=True)
    valid_df.reset_index(drop=True)

    sample = pd.read_csv("../input/google-quest-challenge/sample_submission.csv")
    columns = list(sample.drop("qa_id", axis=1).columns)

    train_targets = train_df[columns].values
    valid_targets = valid_df[columns].values

    tokenizer = transformers.BertTokenizer.from_pretrained("../input/bert-base-uncased")

    train_dataset = BERTdataset(
        qtitle = train_df.question_title.values,
        qbody = train_df.question_body.values,
        answer = train_df.answer.values,
        targets = train_targets,
        tokenizer= tokenizer,
        max_len= MAX_LEN
    )

    train_sampler = torch.utils.data.DistributedSampler(
        train_dataset,
        num_replicas = xm.xrt_world_size(), #gets the number of devices
        rank = xm.get_ordinal(),
        shuffle=True
    )

    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size = TRAIN_BS,
        sampler = train_sampler, #replace shuffle with sampler for multiprocessing on TPU
    )

    valid_dataset = BERTdataset(
        qtitle = valid_df.question_title.values,
        qbody = valid_df.question_body.values,
        answer = valid_df.answer.values,
        targets = valid_targets,
        tokenizer= tokenizer,
        max_len= MAX_LEN
    )

    valid_sampler = torch.utils.data.DistributedSampler(
        valid_dataset,
        num_replicas = xm.xrt_world_size(),    #gets the number of devices
        rank = xm.get_ordinal(),
    )

    valid_dataloader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size = TEST_BS,
        sampler= valid_sampler    #replace shuffle with sampler for multiprocessing on TPU
    )

    model = BERTModel("../input/bert-base-uncased")
    model.to(DEVICE)

    optimizer = AdamW(model.parameters(), lr = (3e-5 * xm.xrt_world_size())) 
    num_training_steps = int((len(train_dataset)/TRAIN_BS/xm.xrt_world_size()) * EPOCHS)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps = 0,
                                                num_training_steps = num_training_steps)
    
    for epoch in range(EPOCHS):
        train_pl = pl.ParallelLoader(train_dataloader, [DEVICE])
        valid_pl = pl.ParallelLoader(valid_dataloader, [DEVICE])

        train_fn(model,train_pl.per_device_loader(DEVICE),optimizer,device=DEVICE,scheduler=scheduler)
        
        output, target = eval_fn(model, valid_pl.per_device_loader(DEVICE),device=DEVICE)

        spear=[]
        for i in range(target.shape[1]):
            p1 = list(target[:,i])
            p2 = list(output[:,i])
            coef, _ = np.nan_to_num(stats.spearmanr(p1, p2))
            spear.append(coef)
        spear = np.mean(spear)
        xm.master_print(f"epoch = {epoch}, spearman rank = {spear}")
        xm.save(model.state_dict(), "./model.bin")


In [47]:
if __name__ == "__main__":
    xmp.spawn(training, nprocs=8, start_method='fork')

batch = 0, loss = 0.7099218964576721
batch = 10, loss = 0.4540395140647888
batch = 20, loss = 0.44330894947052
batch = 30, loss = 0.41578179597854614
batch = 40, loss = 0.3951038420200348


Exception in device=TPU:2: can't convert xla:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
Exception in device=TPU:1: can't convert xla:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 329, in _mp_start_fn
    _start_fn(index, pf_cfg, fn, args)
  File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 323, in _start_fn
    fn(gindex, *args)
  File "<ipython-input-30-e3f4c26702b5>", line 80, in training
    output, target = eval_fn(model, valid_pl.per_device_loader(DEVICE),device=DEVICE)
Traceback (most recent call last):
  File "<ipython-input-46-fef913b7c254>", line 43, in eval_fn
    output =  np.vstack(output)
  File "/opt/conda/lib/python3.7/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 329, in _mp_start_fn
 

  File "<ipython-input-46-fef913b7c254>", line 43, in eval_fn
    output =  np.vstack(output)
  File "<ipython-input-46-fef913b7c254>", line 43, in eval_fn
    output =  np.vstack(output)
  File "<__array_function__ internals>", line 6, in atleast_2d
  File "<__array_function__ internals>", line 6, in vstack
  File "/opt/conda/lib/python3.7/site-packages/numpy/core/shape_base.py", line 123, in atleast_2d
    ary = asanyarray(ary)
  File "<__array_function__ internals>", line 6, in atleast_2d
  File "<__array_function__ internals>", line 6, in atleast_2d
  File "/opt/conda/lib/python3.7/site-packages/numpy/core/shape_base.py", line 123, in atleast_2d
    ary = asanyarray(ary)
  File "/opt/conda/lib/python3.7/site-packages/numpy/core/shape_base.py", line 280, in vstack
    arrs = atleast_2d(*tup)
  File "/opt/conda/lib/python3.7/site-packages/numpy/core/_asarray.py", line 138, in asanyarray
    return array(a, dtype, copy=False, order=order, subok=True)
  File "<__array_function__ intern

ProcessExitedException: process 2 terminated with exit code 17

In [None]:
def _mp_fn(index, flags):
    torch.set_default_tensor_type('torch.FloatTensor')
    a = training(index)

In [None]:
if __name__ == "__main__":
    FLAGS={}
    xmp.spawn(_mp_fn, args=(FLAGS,), nprocs=8, start_method='fork')