In [31]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import BertModel, BertTokenizer, BertConfig
from pathlib2 import Path
from torch.autograd import Variable
from tqdm import tqdm
import torch.nn.functional as F

In [32]:
okenizer = BertTokenizer.from_pretrained('bert-base-chinese')

def collate_fn(batch):
    batched_data = []
    batched_targets = []
    paths = []
    
    for text, targets, path in batch:
        paths.append(path)
        
        tensor_targets = torch.LongTensor(targets)
        batched_targets.append(tensor_targets)
        
        bert_input = tokenizer.batch_encode_plus(text, pad_to_max_length=True, return_tensors='pt')
        batched_data.append(bert_input)
        
    return batched_data, batched_targets, paths
    

In [33]:
class DRCDdataset(Dataset):
    def __init__(self, data_path):
        super().__init__()
        self.files = list(Path(data_path).glob('*.txt'))
    
    def __getitem__(self, index):
        path = self.files[index]
        
        return self.read_DRCD_file(path)
    
    def __len__(self):
        return len(self.files)
    
    def read_DRCD_file(self, path):
        seperator = '=========='
        with Path(path).open('r', encoding='utf-8') as f:
            raw_text = f.read()
        paragraphs = [p for p in raw_text.strip().split(seperator) if len(p)>2]
        
        targets = []
        text = []
        for paragraph in paragraphs:
            sentences = [s for s in paragraph.split('\n') if len(s.split()) > 0]
            sentences_targets = [0 for s in sentences[:-1]]
            sentences_targets.append(1)
            targets.extend(sentences_targets)
        
        
            for sentence in sentences:
                text.append(sentence)
        
        return text, targets, path

In [34]:
class Model(nn.Module):
    def __init__(self, hidden_dim, hidden_layer, batch_size):
        super().__init__()
        
        self.config = BertConfig.from_pretrained('bert-base-chinese', output_hidden_states=True)
        self.bert = BertModel.from_pretrained('bert-base-chinese', config=self.config)
        self.hidden_dim = hidden_dim
        self.hidden_layer = hidden_layer
        self.batch_size = batch_size
        self.criterion = nn.CrossEntropyLoss()
        
        
        self.lstm = nn.LSTM(768, hidden_dim, hidden_layer, bidirectional=True)
        self.linear = nn.Linear(hidden_dim * 2,2)
        
    def pad_document(self, d, max_document_length):
        d_length = d.size()[0]
        v = d.unsqueeze(0).unsqueeze(0)
        padded = F.pad(v, (0,0,0, max_document_length - d_length ))  # (1, 1, max_length, 768)
        shape = padded.size()
        return padded.view(shape[2], 1, shape[3])  # (max_length, 1, 768)
    
    def forward(self, batch):
        
        batched_cls_lhs = []
        doc_len = []
        for x in batch:
            _, cls_lhs, _ = self.bert(x['input_ids'], x['attention_mask'], return_dict=False)
            doc_len.append(cls_lhs.shape[0])
            batched_cls_lhs.append(cls_lhs)
        max_doc_len = max(doc_len)
        padded_doc = [self.pad_document(d,max_doc_len) for d in batched_cls_lhs]
        docs_tensor = torch.cat(padded_doc, 1)
        
        x, _ = self.lstm(docs_tensor)
        x = self.linear(x)

        return x

In [35]:
train_dataset = DRCDdataset(data_path=r'C:\Users\vince_wang\research\evaluate\8.24\DRCD\train')
train_dl = DataLoader(train_dataset, batch_size=3, collate_fn=collate_fn, shuffle=True)
val_dl = DataLoader(train_dataset, batch_size=5, collate_fn=collate_fn, shuffle=True)
model = Model(300,1,1)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [36]:
from transformers import BertModel, BertTokenizer, BertConfig
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')


In [37]:
from lstm import DRCDdataset

train_dataset = DRCDdataset(data_path=r'C:\Users\vince_wang\research\evaluate\8.24\DRCD\train')
train_dl = DataLoader(train_dataset, batch_size=3, collate_fn=collate_fn, shuffle=True)

stored = []
for i, (data, targets, path) in enumerate(val_dl):
    stored.append(data)
    break
tensor = torch.cat(stored, 1)

TypeError: expected Tensor as element 0 in argument 0, but got list

In [None]:
tensored_targets = torch.zeros(6).long()
tensored_targets[torch.LongTensor([1,3])] = 1
tensored_targets

tensor([0, 1, 0, 1, 0, 0])

In [None]:
t = torch.rand(10,5,300)
b = t[0:5,1,:]
print(b.shape)

torch.Size([5, 300])


In [None]:
torch.zeros(3,4).cuda()

AssertionError: Torch not compiled with CUDA enabled

In [None]:
import torch
a = torch.tensor([[1,2],[2,3],[4,5]])
a = a.numpy()
(a[0:3])[:,1]

array([2, 3, 5], dtype=int64)

In [None]:
a = torch.tensor([1,2])
b = [a,a,a]
torch.cat(b,0)

tensor([1, 2, 1, 2, 1, 2])

In [6]:
import torch
from torch.utils.data import Dataset, DataLoader
from lstm import DRCDdataset, Model, collate_fn
from transformers import BertModel, BertTokenizer, BertConfig
from pathlib import Path
from torch.autograd import Variable
import utils
from tqdm import tqdm

utils.read_config_file('config.json')
utils.config.update({'cuda':False})

tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')

dataset_path = Path(utils.config['DRCDdataset'])
train_dataset = DRCDdataset(dataset_path / 'test')
train_dl = DataLoader(train_dataset, batch_size=1, collate_fn=collate_fn, shuffle=True)
model = Model(300, 1, 1)

Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [8]:
total_loss = float(0)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
with tqdm(desc='Training', total=len(train_dl)) as pbar:
    for data,target,_ in train_dl:
        model.zero_grad()
        output = model(data)
        target_var = Variable(torch.cat(target, 0), requires_grad=False)
        loss = model.criterion(output, target_var)
        loss.backward()

        optimizer.step()
        total_loss += loss.item()
total_loss = total_loss / len(train_dl)
print(total_loss)

Training:   0%|          | 0/2 [00:35<?, ?it/s]

0.7525798976421356





In [14]:
import torch.nn as nn
import torch.nn.functional as F
def pad_document(d, max_document_length):
    d_length = d.size()[0]
    v = d.unsqueeze(0).unsqueeze(0)
    # 進行 padding 
    padded = F.pad(v, (0,0,0, max_document_length - d_length ))  # (1, 1, max_length, 768)
    shape = padded.size()
    return padded.view(shape[2], 1, shape[3])  # (max_length, 1, 768)

def forward(batch):
    config = BertConfig.from_pretrained('bert-base-chinese', output_hidden_states=True)
    bert = BertModel.from_pretrained('bert-base-chinese', config=config)
    hidden_dim = 300
    hidden_layer = 1
    batch_size = 1
    criterion = nn.CrossEntropyLoss()
    
    lstm = nn.LSTM(768, hidden_dim, hidden_layer, bidirectional=True)
    linear = nn.Linear(hidden_dim * 2,2)
        
    batched_cls_lhs = []
    doc_len = []
    for x in batch:
        _, cls_lhs, _ = bert(x['input_ids'], x['attention_mask'], return_dict=False)
        print(cls_lhs.shape)
        doc_len.append(cls_lhs.shape[0])
        batched_cls_lhs.append(cls_lhs)
    max_doc_len = max(doc_len)
    padded_doc = [pad_document(d,max_doc_len) for d in batched_cls_lhs]
    docs_tensor = torch.cat(padded_doc, 1)
    
    x, _ = lstm(docs_tensor)
    
    doc_outputs = []
    for i, size in enumerate(doc_len):
        doc_outputs.append(x[0:size, i, :])
    x = torch.cat(doc_outputs, 0)
    
    x = linear(x)

    return x

for data,target,_ in train_dl:
    forward(data)

Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


torch.Size([46, 768])


Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


torch.Size([16, 768])
