In [1]:
!pip install cloud-tpu-client==0.10 https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8-cp37-cp37m-linux_x86_64.whl
!pip install -q torch==1.8 torchvision torchtext
!pip install -q pytorch-lightning==1.4.9 torchmetrics
!pip install transformers

Collecting torch-xla==1.8
  Downloading https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.8-cp37-cp37m-linux_x86_64.whl (144.6 MB)
[K     |████████████████████████████████| 144.6 MB 66 kB/s 
[?25hCollecting cloud-tpu-client==0.10
  Downloading cloud_tpu_client-0.10-py3-none-any.whl (7.4 kB)
Collecting google-api-python-client==1.8.0
  Downloading google_api_python_client-1.8.0-py3-none-any.whl (57 kB)
[K     |████████████████████████████████| 57 kB 2.8 MB/s 
Installing collected packages: google-api-python-client, torch-xla, cloud-tpu-client
  Attempting uninstall: google-api-python-client
    Found existing installation: google-api-python-client 1.12.8
    Uninstalling google-api-python-client-1.12.8:
      Successfully uninstalled google-api-python-client-1.12.8
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
earthengine-api 0.1.290 requir

In [2]:
import pathlib
import json
import glob
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from transformers import AdamW
from transformers import BertConfig, BertForMaskedLM, BertModel, BertForSequenceClassification
import pytorch_lightning as pl
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning.callbacks import ModelCheckpoint




In [3]:

# TRAIN_LOG_DIRECTORY='drive/MyDrive/dl/wahaha/log'
TRAIN_LOG_DIRECTORY='drive/MyDrive/dl/wahaha/log2'
TRAIN_MLM_MODEL_DIR = 'drive/MyDrive/dl/wahaha/model'
TRAIN_PRETRAIN_MODEL_BIN = 'drive/MyDrive/dl/wahaha/model/pretrain_mlm/pytorch_model.bin'

TRAIN_FILE='drive/MyDrive/dl/wahaha/train/train_feature.txt'
TEST_FILE='drive/MyDrive/dl/wahaha/train/test_feature.txt'
MINI_TEST_FILE='drive/MyDrive/dl/wahaha/train/mini_test_feature_100.txt'
# TRAIN_DATA_DIRECTORY='drive/MyDrive/dl/kanachan/train'
# TEST_DATA_DIRECTORY='drive/MyDrive/dl/kanachan/test'

TRAIN_POSSIBLE_LABEL_COUNT = 219 # label count

TRAIN_TOKEN_CLS     = 0
TRAIN_TOKEN_SEP     = 1
TRAIN_TOKEN_PADDING = 2 + 382 + 648 + 219 # 2(CLS,SEP)+382(sparse)+648(progression)+219(possible)
TRAIN_TOKEN_MASK    = TRAIN_TOKEN_PADDING + 1

TRAIN_TOKEN_VOCAB_COUNT = TRAIN_TOKEN_PADDING + 2 # 2(PADDING, MASK)
TRAIN_MAX_TOKEN_LENGTH = 2 + 139 + 1 # 26(sparse)+81(progression)+32(possible)
TRAIN_ID_LIST = list(range(2, TRAIN_TOKEN_PADDING))

TRAIN_HIDDEN_SIZE = 768

In [14]:
# https://github.com/PyTorchLightning/pytorch-lightning/issues/2534#issuecomment-674582085
class CheckpointEveryNSteps(pl.Callback):
    """
    Save a checkpoint every N steps, instead of Lightning's default that checkpoints
    based on validation loss.
    """

    def __init__(
        self,
        save_step_frequency,
        prefix="N-Step-Checkpoint",
        use_modelcheckpoint_filename=False,
    ):
        """
        Args:
            save_step_frequency: how often to save in steps
            prefix: add a prefix to the name, only used if
                use_modelcheckpoint_filename=False
            use_modelcheckpoint_filename: just use the ModelCheckpoint callback's
                default filename, don't use ours.
        """
        self.save_step_frequency = save_step_frequency
        self.prefix = prefix
        self.use_modelcheckpoint_filename = use_modelcheckpoint_filename
    def save_checkpoint(self) :
        if self.use_modelcheckpoint_filename:
            filename = trainer.checkpoint_callback.filename
        else:
            #filename = f"{self.prefix}_{epoch}_{global_step}.ckpt"
            filename = f"{self.prefix}.ckpt"
        ckpt_path = os.path.join(TRAIN_LOG_DIRECTORY, filename)
        trainer.save_checkpoint(ckpt_path)

    def on_batch_end(self, trainer: pl.Trainer, _):
        """ Check if we should save a checkpoint after every train batch """
        # epoch = trainer.current_epoch
        global_step = trainer.global_step
        if global_step % self.save_step_frequency == 0:
            self.save_checkpoint()

    def on_epoch_end(self, trainer, _ = None) :
        self.save_checkpoint()
class TrainDataset_old :
    """
     Read train dataset from text file 
    """
    def __init__(self, filename) :
        self.filename = filename
        with open(filename, 'r') as file :
            print("[TrainDataSet]--- start setup ---", filename)
            self.content = file.readlines()
            print("[TrainDataSet]--- loading is completed ---", filename)
    def __len__(self) :
        return len(self.content)
    def __getitem__(self, idx) :
        line = self.content[idx]
        inputs, label = line.split(',')
        inputs = inputs.split(' ')
        inputs = [int(n) for n in inputs]
        label = int(label)
        return inputs, label    
class TrainDataset :
    """
     Read train dataset from text file 
    """
    def __init__(self, filename) :
        self.filename = filename
        with open(filename, 'r') as file :
            print("[TrainDataSet]--- start setup ---", filename)
            self.content = file.readlines()
            print("[TrainDataSet]--- loading is completed ---", filename)
    def __len__(self) :
        return len(self.content)
    def __getitem__(self, idx) :
        line = self.content[idx]
        inputs, label = line.split(',')
        inputs = inputs.split(' ')
        inputs = [int(n) for n in inputs]
        label = int(label)
        return inputs, label

## mlm

In [5]:
TRAIN_MLM_CONFIG = {
    'vocab_size': TRAIN_TOKEN_VOCAB_COUNT,  # MASK_TOKEN_ID, MASK, CLS, SEP
    'hidden_size': 768,
    'num_hidden_layers': 12,
    'num_attention_heads': 12,
    'intermediate_size': 3072,  # hidden_size * 4が目安
    'hidden_act': 'gelu',
    'hidden_dropout_prob': 0.1,
    'attention_probs_dropout_prob': 0.1,
    'max_position_embeddings': 512,  # 95(=81(マス目)+7(先手持駒)+7(後手持駒))でいいかも
    'type_vocab_size': 1,  # 対の文章を入れない。つまりtoken_type_embeddingsは完全に無駄になっている。
    'initializer_range': 0.02,
}

TRAIN_MLM_MODEL_CONFIG = { 
    'checkpoint_callback' : False, 
    #'gpus' : -1, # 環境にあわせて変更
    #'tpu_core' : 8,
    #'val_check_interval' : 5000, # validationおよびcheckpointの間隔step数
    #'max_epochs' : 5,
    'model_dir' : None, # インポートするモデル
}
TRAIN_MLM_TRAINER_CONFIG = {
    'checkpoint_callback' : False, 
    #'gpus' : -1, # 環境にあわせて変更
    'tpu_cores' : 8,
    'val_check_interval' : 5000, # validationおよびcheckpointの間隔step数
    'max_epochs' : 5
}
TRAIN_MLM_LOADER_CONFIG = {
    'batch_size' : 64,
    'num_workers' : 32,
    'shuffle' : True,
    'pin_memory' : False,
    'drop_last' : True
}
VAL_MLM_LOADER_CONFIG = {
    'batch_size' : 64,
    'num_workers' : 8,
    'pin_memory' : False,
    'drop_last' : False
}


In [6]:
config = BertConfig.from_dict(TRAIN_MLM_CONFIG)
class MLMModule(pl.LightningModule):
    def __init__(self, hparams):
        super().__init__()
        self.save_hyperparameters(hparams)
        self.model = BertForMaskedLM(config)

    def forward(self, batch):
        input_ids = batch['input_ids']
        labels = batch['labels']
        attention_mask = batch['attention_mask']
        return self.model(input_ids=input_ids, labels=labels, attention_mask = attention_mask)

    def training_step(self, batch, batch_idx):
        outputs = self(batch)
        loss = outputs[0]
        self.log('loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        outputs = self(batch)
        loss = outputs[0].detach().cpu().numpy()
        return {'loss': loss}

    def validation_epoch_end(self, outputs):
        val_loss = np.mean([out['loss'] for out in outputs])
        self.log('val_loss', val_loss)

    def configure_optimizers(self):
        return AdamW(self.parameters(), lr=5e-5)

    def test_step(self, batch, batch_idx):
        return self.validation_step(batch, batch_idx)
        
    def test_epoch_end(self, outputs):
        self.validation_epoch_end(outputs)

class MLMDataset(Dataset):
    def __init__(self, filename):
        self.content = TrainDataset(filename)

    def __len__(self):
        return len(self.content)

    def __getitem__(self, idx):
        inputs, _ = self.content[idx]
        inputs = np.array(inputs)
        labels = inputs.copy()

        # 予想対象
        masked_indices = np.random.random(labels.shape) < 0.15
        labels[~masked_indices] = -100

        # 80%はマスクトークンに
        indices_replaced = (np.random.random(labels.shape) < 0.8) & masked_indices
        inputs[indices_replaced] = 0 # [MASK_ID]

        # 10%はランダムに置き換え
        indices_random = (np.random.random(labels.shape) < 0.5) & masked_indices & ~indices_replaced
        random_words = np.random.choice(TRAIN_ID_LIST, labels.shape)
        inputs[indices_random] = random_words[indices_random]
        
        # attention mask
        pad_length = TRAIN_MAX_TOKEN_LENGTH - len(inputs)
        attention_mask = np.array([1] * len(inputs) + [0] * pad_length)
        
        # padding
        inputs = np.pad(inputs,(0, pad_length), constant_values = 0) # [PAD]
        labels = np.pad(labels,(0, pad_length), constant_values = -100)

        ret_dict = {'input_ids': torch.tensor(inputs, dtype=torch.long),
                    'labels': torch.tensor(labels, dtype=torch.long),
                    'attention_mask' : torch.tensor(attention_mask, dtype=torch.float)
                   }
        return ret_dict

class MLMDataModule(pl.LightningDataModule):
    def __init__(self):
        super().__init__()

    def setup(self, stage=None) :
        self.train_dataset = MLMDataset(TRAIN_FILE)
        self.val_dataset = MLMDataset(TEST_FILE)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, **TRAIN_MLM_LOADER_CONFIG)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, **VAL_MLM_LOADER_CONFIG)


In [None]:
hparams = TRAIN_MLM_MODEL_CONFIG
#trainer = pl.Trainer(gpus = hparams["gpus"], enable_checkpointing=hparams["enable_checkpointing"], max_epochs = hparams["max_epochs"], val_check_interval = hparams["val_check_interval"])
#trainer = pl.Trainer(tpu_cores=8, checkpoint_callback=hparams["enable_checkpointing"], max_epochs = hparams["max_epochs"], val_check_interval = hparams["val_check_interval"])
trainer = pl.Trainer(**TRAIN_MLM_TRAINER_CONFIG)

# trainer.logger = pl_loggers.TensorBoardLogger(save_dir=TRAIN_LOG_DIRECTORY, name="mlm", default_hp_metric=False)
# trainer.callbacks.append(CheckpointEveryNSteps(save_step_frequency = 5000, prefix="chk_mlm"))
### trainer.callbacks.append(ModelCheckpoint(filename='', monitor='val_loss', save_top_k=1, save_last=True))

model = MLMModule(TRAIN_MLM_MODEL_CONFIG)
#model = MLMModule.load_from_checkpoint(TRAIN_LOG_DIRECTORY + '/chk_mlm_220108a.ckpt')

data = MLMDataModule()
trainer.fit(model, datamodule=data)

result = trainer.test()

In [40]:
from tempfile import mkdtemp
TRAIN_DATA_MAX_LEN_TOKEN = 112

class TrainDataset :
    """
     Read train dataset from text file 
    """
    def __init__(self, filename) :
        print("[TrainDatasetMmap]--- start setup ---", filename)

        with open(filename, 'r') as file :
            content = file.readlines()
            
            temp_filename = os.path.join(mkdtemp(), 'train_data.dat')
            print("tempfile:", temp_filename)
            mmap_shape = (len(content), TRAIN_DATA_MAX_LEN_TOKEN + 2) # label, len(inputs)分を加算
            print(mmap_shape)
            
            blob_content = np.empty(mmap_shape, dtype = "int32")
            print(blob_content)

            for idx, line in enumerate(content) :
                inputs, label = line.split(',')
                inputs = inputs.split(' ')
                inputs = [int(n) for n in inputs]
                label = int(label)

                # 最適化 : inputs, labelを一次元配列にエンコードする
                # [label, len(inputs), inputs[0], ...]
                
                pad_length = TRAIN_DATA_MAX_LEN_TOKEN - len(inputs)
                # 長さをそろえるためpaddingを付与
                inputs = np.pad(inputs,(0, pad_length), constant_values = 0)
                data = np.concatenate([ [label, len(inputs)], inputs])
                blob_content[idx] = data

            fp = np.memmap(temp_filename, dtype='uint32', mode='w+', shape = mmap_shape)
            fp[:] = blob_content[:]
            del fp
            
        self.mmap_content = np.memmap(temp_filename, dtype='uint32', mode='r', shape = mmap_shape)
        del content # free

        print("[TrainDatasetMmap]--- loading is completed ---", filename)

    def __len__(self) :
        return self.mmap_content.shape[0]  # len(self.mmap_content)
    def __getitem__(self, idx) :
        data = self.mmap_content[idx]
        label = data[0]
        length = data[1]
        inputs = data[2:length]
        return (inputs, label)

# dataset = MLMDataset('../mini_sample.txt')
dataset = TrainDataset_old('../mini_sample.txt')
print(f"load data len={len(dataset)}")
print(f"output[100] = {dataset[100]}")
dataset2 = TrainDataset('../mini_sample.txt')
print(f"load data len={len(dataset2)}")
print(f"output[100] = {dataset2[100]}")


[TrainDataSet]--- start setup --- ../mini_sample.txt
[TrainDataSet]--- loading is completed --- ../mini_sample.txt
load data len=120
output[100] = ([1, 4, 6, 9, 13, 14, 18, 54, 79, 241, 303, 440, 444, 460, 461, 462, 472, 476, 480, 508, 509, 573, 1003, 801, 1013, 788, 583, 801, 1005, 575, 800, 1015, 620, 804, 1036, 609, 804, 1038, 898, 805, 1031, 782, 606, 824, 1022, 601, 1003, 844, 1062, 640, 858, 1055, 612, 854, 1076, 646, 855, 1069, 595, 814, 1042, 590, 854, 1014, 645, 845, 1029, 648, 596, 886, 813, 2], 216)
[TrainDatasetMmap]--- start setup --- ../mini_sample.txt
tempfile: C:\Users\user\AppData\Local\Temp\tmp7zla150i\train_data.dat
(120, 114)
[[1461385040        494 1463220064 ...        494 1184409008        494]
 [1184409008        494 1189366896 ...        494 1191626800        494]
 [1184144176        494 1184312368 ...        494 1191626800        494]
 ...
 [1183803632        494          3 ...          0 1183803632        494]
 [         3          0 1000000000 ...        494

In [None]:
# TPU学習結果テスト
hparams = TRAIN_MLM_MODEL_CONFIG
trainer = pl.Trainer(**TRAIN_MLM_TRAINER_CONFIG)

#model = MLMModule(TRAIN_MLM_MODEL_CONFIG)
model = MLMModule.load_from_checkpoint(TRAIN_LOG_DIRECTORY + '/chk_mlm_220108a.ckpt')
ckpt_path = TRAIN_LOG_DIRECTORY + '/chk_mlm.ckpt'
model = MLMModule.load_from_checkpoint(ckpt_path)

# data = MLMDataModule()
# Test Dataset
dataset = MLMDataset(TEST_FILE)
dataloader = DataLoader(dataset, **VAL_MLM_LOADER_CONFIG)
# start test
result = trainer.test(model = model, dataloaders = dataloader, ckpt_path =ckpt_path)
print(result)


GPU available: False, used: False
TPU available: True, using: 8 TPU cores
IPU available: False, using: 0 IPUs


[TrainDataSet]--- start setup --- drive/MyDrive/dl/wahaha/train/test_feature.txt
[TrainDataSet]--- loading is completed --- drive/MyDrive/dl/wahaha/train/test_feature.txt


Layer count: [Before: 202 After: 204]
  f"The model layers do not match after moving to the target device."


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'val_loss': 1.284466028213501}
--------------------------------------------------------------------------------


  rank_zero_warn("cleaning up tpu spawn environment...")


[{'val_loss': 1.284466028213501}]


In [None]:
# GPU学習結果テスト
hparams = TRAIN_MLM_MODEL_CONFIG
trainer = pl.Trainer(**TRAIN_MLM_TRAINER_CONFIG)

mlm_dirconfig = { 
    'model_dir' : TRAIN_MLM_MODEL_DIR + "/mlm_220108a.bin", # インポートするモデル
}
model = MLMModule(mlm_dirconfig)

# Test Dataset
dataset = MLMDataset(TEST_FILE)
dataloader = DataLoader(dataset, **VAL_MLM_LOADER_CONFIG)
# start test
result = trainer.test(model = model, dataloaders = dataloader)
print(result)

GPU available: False, used: False
TPU available: True, using: 8 TPU cores
IPU available: False, using: 0 IPUs


[TrainDataSet]--- start setup --- drive/MyDrive/dl/wahaha/train/test_feature.txt
[TrainDataSet]--- loading is completed --- drive/MyDrive/dl/wahaha/train/test_feature.txt


Layer count: [Before: 202 After: 204]
  f"The model layers do not match after moving to the target device."


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'val_loss': 1.1452120542526245}
--------------------------------------------------------------------------------


  rank_zero_warn("cleaning up tpu spawn environment...")


[{'val_loss': 1.1452120542526245}]


## Classification

In [14]:
#!echo "start" > start_test.txt

TRAIN_CLASSIFICATION_CONFIG = {
    'vocab_size': TRAIN_TOKEN_VOCAB_COUNT,  # MASK_TOKEN_ID, MASK, CLS, SEP
    'hidden_size': TRAIN_HIDDEN_SIZE,
    'num_hidden_layers': 12,
    'num_attention_heads': 12,
    'intermediate_size': 3072,  # hidden_size * 4が目安
    'hidden_act': 'gelu',
    'hidden_dropout_prob': 0.1,
    'attention_probs_dropout_prob': 0.1,
    'max_position_embeddings': 512,  # 95(=81(マス目)+7(先手持駒)+7(後手持駒))でいいかも
    'type_vocab_size': 1,  # 対の文章を入れない。つまりtoken_type_embeddingsは完全に無駄になっている。
    'initializer_range': 0.02,
    'num_labels' : TRAIN_POSSIBLE_LABEL_COUNT, 
}
TRAIN_CLASSIFICATION_MODEL_CONFIG = {
    'checkpoint_callback' : False, 
    # 'gpus' : 1, # 環境にあわせて変更
    'tpu_cores' : 8,
    'val_check_interval' : 5000, # validationおよびcheckpointの間隔step数
    'max_epochs' : 3,
    #'model_dir' : TRAIN_MLM_MODEL_DIR, # インポートするモデル
    # 'enable_checkpointing' : False, 
}
TEST_CLASSIFICATION_MODEL_CONFIG = {
    # 'gpus' : 1, # 環境にあわせて変更
    'tpu_cores' : 1,
}
TRAIN_CLASSIFICATION_LOADER_CONFIG = {
    'batch_size' : 64,
    'num_workers' : 32,
    'shuffle' : True,
    'pin_memory' : True,
    'drop_last' : True
}
VAL_CLASSIFICATION_LOADER_CONFIG = {
    'batch_size' : 64,
    'num_workers' : 8,
    'pin_memory' : True,
    'drop_last' : False
}
TEST_CLASSIFICATION_LOADER_CONFIG = {
    'batch_size' : 64,
    'num_workers' : 8,
    'drop_last' : False
}
BERT_CLASSIFICATION_CONFIG = BertConfig.from_dict(TRAIN_CLASSIFICATION_CONFIG)

class BertClassification(nn.Module) :
    def __init__(self, model_dir):
        super().__init__()
        if model_dir is None :
            self.bert = BertForSequenceClassification(BERT_CLASSIFICATION_CONFIG)
        else :
            model_bin = model_dir
            self.bert = BertForSequenceClassification.from_pretrained(model_bin, config = BERT_CLASSIFICATION_CONFIG)

    def forward(self, input_ids, labels, attention_mask):
        outputs = self.bert(input_ids, labels = labels, attention_mask = attention_mask)
        loss = outputs[0]
        logit = outputs[1]
        return {'loss' : loss, 'logit' : logit}

class BertClassificationModule(pl.LightningModule):
    def __init__(self, hparams):
        super().__init__()
        model_dir = hparams["model_dir"]
        self.save_hyperparameters(hparams)
        self.model = BertClassification(model_dir)

    def forward(self, input_ids, labels, attention_mask):
        return self.model(input_ids=input_ids, labels=labels, attention_mask = attention_mask)

    def training_step(self, batch, batch_idx):
        input_ids = batch.pop('input_ids')
        labels = batch.pop('labels')
        attention_mask = batch.pop('attention_mask')
        outputs = self(input_ids, labels, attention_mask)
        loss = outputs['loss']
        return loss

    def validation_step(self, batch, batch_idx):
        input_ids = batch.pop('input_ids')
        labels = batch.pop('labels')
        attention_mask = batch.pop('attention_mask')
        outputs = self(input_ids, labels, attention_mask)
        loss = outputs['loss']
        return loss

    def validation_epoch_end(self, outputs):
        val_loss = np.mean([out.to('cpu').detach().numpy() for out in outputs])
        self.log('val_loss', val_loss)

    def configure_optimizers(self):
        return AdamW(self.parameters(), lr=5e-5)
    
    def test_step(self, batch, batch_idx): #testの実行；最終的なlossとaccの確認
        input_ids = batch.pop('input_ids')
        labels = batch.pop('labels')
        attention_mask = batch.pop('attention_mask')
        outputs = self(input_ids, labels, attention_mask)
        loss = outputs['loss']
        logit = outputs['logit']        
        prediction = torch.argmax(logit, dim=1)
        
        #print("logit:", logit, ",label:", labels)

        return {'loss' : loss, 'pred' : prediction, 'label' : labels}

    def test_epoch_end(self, outputs):
        epoch_preds = torch.cat([x['pred'] for x in outputs])
        epoch_labels = torch.cat([x['label'] for x in outputs])
        epoch_labels = torch.flatten(epoch_labels)
        epoch_loss = np.mean([x['loss'] for x in outputs])
        self.log("test_loss", epoch_loss)

        # accuracy計算
        num_correct = (epoch_preds == epoch_labels).sum().item()
        epoch_accuracy = num_correct / len(epoch_labels)
        self.log(f"test_accuracy", epoch_accuracy)

class BertClassificationDataset(Dataset):
    def __init__(self, filename):
        self.train_dataset = TrainDataset(filename)

    def __len__(self):
        return len(self.train_dataset)

    def __getitem__(self, idx):
        inputs, label = self.train_dataset[idx]
        inputs = [TRAIN_TOKEN_CLS] + inputs + [TRAIN_TOKEN_SEP]
        
        inputs = np.array(inputs)
        label = [label]
        
        # attention mask
        pad_length = TRAIN_MAX_TOKEN_LENGTH - len(inputs)
        attention_mask = np.array([1] * len(inputs) + [0] * pad_length)
        
        # padding
        inputs = np.pad(inputs,(0, pad_length), constant_values = TRAIN_TOKEN_PADDING)

        ret_dict = {'input_ids': torch.tensor(inputs, dtype=torch.long),
                    'labels': torch.tensor(label, dtype=torch.long),
                    'attention_mask' : torch.tensor(attention_mask, dtype = torch.float)
                    }
        return ret_dict

class BertClassificationDataModule(pl.LightningDataModule):
    def __init__(self):
        super().__init__()

    def setup(self, stage=None) :
        self.train_dataset = BertClassificationDataset(TRAIN_FILE)
        self.val_dataset = BertClassificationDataset(TEST_FILE)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, **TRAIN_CLASSIFICATION_LOADER_CONFIG)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, **VAL_CLASSIFICATION_LOADER_CONFIG)
    
    def test_dataloader(self) : 
        # wip
        return DataLoader(self.val_dataset, **TEST_CLASSIFICATION_LOADER_CONFIG)


In [None]:
# Train

trainer = pl.Trainer(**TRAIN_CLASSIFICATION_MODEL_CONFIG)
trainer.logger = pl_loggers.TensorBoardLogger(save_dir=TRAIN_LOG_DIRECTORY, name="wahaha_classification", default_hp_metric=False)
trainer.callbacks.append(CheckpointEveryNSteps(save_step_frequency = 5000, prefix="chk_class"))

# Model
mlm_dirconfig = { 
    'model_dir' : TRAIN_MLM_MODEL_DIR + "/mlm_220108a.bin", # インポートするモデル
}
model = BertClassificationModule(mlm_dirconfig) 
# model = BertClassificationModule.load_from_checkpoint(TRAIN_LOG_DIRECTORY + '/N-Step-Checkpoint.ckpt')

data = BertClassificationDataModule()
trainer.fit(model, datamodule=data)


GPU available: False, used: False
TPU available: True, using: 8 TPU cores
IPU available: False, using: 0 IPUs
Some weights of the model checkpoint at drive/MyDrive/dl/wahaha/model/mlm_220108a.bin were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassifi

[TrainDataSet]--- start setup --- drive/MyDrive/dl/wahaha/train/train_feature.txt
[TrainDataSet]--- loading is completed --- drive/MyDrive/dl/wahaha/train/train_feature.txt
[TrainDataSet]--- start setup --- drive/MyDrive/dl/wahaha/train/test_feature.txt
[TrainDataSet]--- loading is completed --- drive/MyDrive/dl/wahaha/train/test_feature.txt



  | Name  | Type               | Params
---------------------------------------------
0 | model | BertClassification | 87.2 M
---------------------------------------------
87.2 M    Trainable params
0         Non-trainable params
87.2 M    Total params
348.685   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

  rank_zero_warn("cleaning up tpu spawn environment...")


In [13]:
# TEST
hparams = TEST_CLASSIFICATION_MODEL_CONFIG
trainer = pl.Trainer(**hparams)
# trainer.logger = pl_loggers.TensorBoardLogger(save_dir=TRAIN_LOG_DIRECTORY, name="wahaha_classification", default_hp_metric=False)

chkpoint = TRAIN_LOG_DIRECTORY + '/chk_class_220109a.ckpt'
model = BertClassificationModule.load_from_checkpoint(chkpoint)
#data = BertClassificationDataModule()
# Test Dataset
dataset = BertClassificationDataset(TEST_FILE)
dataloader = DataLoader(dataset, **TEST_CLASSIFICATION_LOADER_CONFIG)
# start test
result = trainer.test(model = model, dataloaders = dataloader, ckpt_path = chkpoint)
print(result)

GPU available: False, used: False
TPU available: True, using: 1 TPU cores
IPU available: False, using: 0 IPUs
Some weights of the model checkpoint at drive/MyDrive/dl/wahaha/model/mlm_220108a.bin were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassifi

[TrainDataSet]--- start setup --- drive/MyDrive/dl/wahaha/train/test_feature.txt
[TrainDataSet]--- loading is completed --- drive/MyDrive/dl/wahaha/train/test_feature.txt


Testing: 0it [00:00, ?it/s]

KeyboardInterrupt: ignored