# 3주차 : 단락 연결 여부 확인

In [390]:
#!pip install transformers

In [391]:
from transformers import BertForMaskedLM, FillMaskPipeline
from transformers import TFBertForNextSentencePrediction, BertForNextSentencePrediction, AutoTokenizer, BertTokenizer
from transformers import BertConfig, AdamW
import tensorflow as tf
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn.functional import softmax
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from pytorch_lightning.callbacks import LearningRateMonitor
from tqdm import tqdm
from typing import Callable, Tuple
from seqeval.metrics import accuracy_score
import json
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import os
import random
import MeCab
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score

In [392]:
#device = torch.device("cuda:0")   #gpu 사용
device = torch.device("cpu")

print('GPUs Available :', torch.cuda.device_count())

if torch.cuda.is_available():
    print('GPU running')
else:
    print('GPU not running')

GPUs Available : 1
GPU running


## 1. 문장 연결성 파악

2개 단락의 연결되는 문장이 자연스럽게 연결되는지 확인하는 것이 목적이다.  
이를 위해 KLUE(Korean Lnaguage Understanding Evaluation)를 통해 배포된 BERT 모델을 이용할 것이다. 

####  KLUE는 총 8가지 자연어이해(NLU) task로 구성되어 있다.  

    -Topic Classification  
    -Semantic Textual  
    -Natural Language Inference  
    -Named Entity Recognition  
    -Relation Extraction  
    -Dependency parsing  
    -Machine Reading Comprehension  
    -Dialogue State Tracking  

이 중에서 여기서 진행할 것은 Natural Language Inference(자연어추론)이다.  


## 방법: MASK에 들어갈 단어 예측
    -TFBertForMaskedLM 모델 이용
    2개 단락이 연결되는 부분을 마스킹해 모델에 넣었을 때,
    실제 정답이 결과값 5개 안에 있다면, 연결된 문장은 자연스럽다고 볼 수 있을 것이라 가정

In [393]:
model = BertForMaskedLM.from_pretrained('klue/bert-base')  
tokenizer = AutoTokenizer.from_pretrained("klue/bert-base")   #해당 모델이 학습되었을 당시 사용된 토크나이저

Some weights of the model checkpoint at klue/bert-base were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [394]:
inputs = tokenizer('축구는 정말 재미있는 [MASK]다.', return_tensors='tf')  
print(inputs['input_ids'])
print(inputs['token_type_ids'])   #문장 길이만큼의 0 시퀀스
print(inputs['attention_mask'])   #실제 단어와 패딩 토큰을 구분하기 위한 어텐션 마스크

tf.Tensor([[   2 4713 2259 3944 6001 2259    4  809   18    3]], shape=(1, 10), dtype=int32)
tf.Tensor([[0 0 0 0 0 0 0 0 0 0]], shape=(1, 10), dtype=int32)
tf.Tensor([[1 1 1 1 1 1 1 1 1 1]], shape=(1, 10), dtype=int32)


In [395]:
pip = FillMaskPipeline(model=model, tokenizer=tokenizer)   #[MASK] 위치에 들어갈 상위 5개 출력
pip('축구는 정말 재미있는 [MASK]다.')

[{'score': 0.896362841129303,
  'token': 4559,
  'token_str': '스포츠',
  'sequence': '축구는 정말 재미있는 스포츠 다.'},
 {'score': 0.025958051905035973,
  'token': 568,
  'token_str': '거',
  'sequence': '축구는 정말 재미있는 거 다.'},
 {'score': 0.010034135542809963,
  'token': 3682,
  'token_str': '경기',
  'sequence': '축구는 정말 재미있는 경기 다.'},
 {'score': 0.007924516685307026,
  'token': 4713,
  'token_str': '축구',
  'sequence': '축구는 정말 재미있는 축구 다.'},
 {'score': 0.007844332605600357,
  'token': 5845,
  'token_str': '놀이',
  'sequence': '축구는 정말 재미있는 놀이 다.'}]

In [396]:
#원래 단어는 '지지자'
pip('더불어민주당 [MASK] 중 39.5%, 국민의당 지지자 중 28.8%, 기타 21%, 무당층의 16.2%, 개혁보수당 지지자 중 12.3%였다.')   

[{'score': 0.9840489625930786,
  'token': 11006,
  'token_str': '지지자',
  'sequence': '더불어민주당 지지자 중 39. 5 %, 국민의당 지지자 중 28. 8 %, 기타 21 %, 무당층의 16. 2 %, 개혁보수당 지지자 중 12. 3 % 였다.'},
 {'score': 0.01445611473172903,
  'token': 13755,
  'token_str': '지지층',
  'sequence': '더불어민주당 지지층 중 39. 5 %, 국민의당 지지자 중 28. 8 %, 기타 21 %, 무당층의 16. 2 %, 개혁보수당 지지자 중 12. 3 % 였다.'},
 {'score': 0.0006135448929853737,
  'token': 9440,
  'token_str': '응답자',
  'sequence': '더불어민주당 응답자 중 39. 5 %, 국민의당 지지자 중 28. 8 %, 기타 21 %, 무당층의 16. 2 %, 개혁보수당 지지자 중 12. 3 % 였다.'},
 {'score': 0.00022629096929449588,
  'token': 4315,
  'token_str': '지지',
  'sequence': '더불어민주당 지지 중 39. 5 %, 국민의당 지지자 중 28. 8 %, 기타 21 %, 무당층의 16. 2 %, 개혁보수당 지지자 중 12. 3 % 였다.'},
 {'score': 0.00022148765856400132,
  'token': 7243,
  'token_str': '유권자',
  'sequence': '더불어민주당 유권자 중 39. 5 %, 국민의당 지지자 중 28. 8 %, 기타 21 %, 무당층의 16. 2 %, 개혁보수당 지지자 중 12. 3 % 였다.'}]

In [397]:
#원래 단어는 '이런'
pip('모른다고 답하거나 응답하지 않은 비율은 1.5%였다. [MASK] 결과를 우리나라 인구수 5168만여명에 대입하면 촛불집회 참가 경험이 있는 국민은 1199만여명으로 나온다.')

[{'score': 0.4670316278934479,
  'token': 1504,
  'token_str': '이',
  'sequence': '모른다고 답하거나 응답하지 않은 비율은 1. 5 % 였다. 이 결과를 우리나라 인구수 5168만여명에 대입하면 촛불집회 참가 경험이 있는 국민은 1199만여명으로 나온다.'},
 {'score': 0.20466183125972748,
  'token': 3742,
  'token_str': '조사',
  'sequence': '모른다고 답하거나 응답하지 않은 비율은 1. 5 % 였다. 조사 결과를 우리나라 인구수 5168만여명에 대입하면 촛불집회 참가 경험이 있는 국민은 1199만여명으로 나온다.'},
 {'score': 0.1495237499475479,
  'token': 3686,
  'token_str': '이번',
  'sequence': '모른다고 답하거나 응답하지 않은 비율은 1. 5 % 였다. 이번 결과를 우리나라 인구수 5168만여명에 대입하면 촛불집회 참가 경험이 있는 국민은 1199만여명으로 나온다.'},
 {'score': 0.08464939147233963,
  'token': 3667,
  'token_str': '이런',
  'sequence': '모른다고 답하거나 응답하지 않은 비율은 1. 5 % 였다. 이런 결과를 우리나라 인구수 5168만여명에 대입하면 촛불집회 참가 경험이 있는 국민은 1199만여명으로 나온다.'},
 {'score': 0.03338053822517395,
  'token': 7509,
  'token_str': '설문',
  'sequence': '모른다고 답하거나 응답하지 않은 비율은 1. 5 % 였다. 설문 결과를 우리나라 인구수 5168만여명에 대입하면 촛불집회 참가 경험이 있는 국민은 1199만여명으로 나온다.'}]

## 2. 단락 유사도 파악 및 동일 문장 여부 파악
    2개 단락의 유사도 또는 연결성이 있는지,
    2개의 단락에서 문장이 끊어졌을 때 1개의 문장으로 붙여지는지 확인하는 것이 목적이다.

## 방법: 다음 문장을 예측
    -BertForNextSentencePrediction 모델 이용
    실제로 전후관계인 단락(0:True), 가짜로 묶어놓은 단락(1:False)을 학습시킨다.
    이후 모델에 테스트할 앞뒤 단락을 넣었을 때, True/False를 잘 판별해낸다면,
    두 단락의 유사도/연결성이 있는지, 나아가 한 문장으로 잘 붙는지 확인할 수 있을 것이라 가정

    https://towardsdatascience.com/how-to-use-bert-from-the-hugging-face-transformer-library-d373a22b0209

In [398]:
model = TFBertForNextSentencePrediction.from_pretrained('klue/bert-base', from_pt=True)
tokenizer = AutoTokenizer.from_pretrained("klue/bert-base")

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForNextSentencePrediction: ['bert.embeddings.position_ids']
- This IS expected if you are initializing TFBertForNextSentencePrediction from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForNextSentencePrediction from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertForNextSentencePrediction were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForNextSentencePrediction for predictions without further training.


In [399]:
# XML파일에서 추출한 이어지는 두 개의 문장
prompt = '이런 결과를 지난해 11월 행정자치부가 발표한 우리나라 인구수 5168만여명에 대입하면 ' 
next_sentence = ' 촛불집회 참가 경험이 있는 국민은 1199만여명으로 나온다.'
encoding = tokenizer(prompt, next_sentence, return_tensors='tf')

logits = model(encoding['input_ids'], token_type_ids=encoding['token_type_ids'])[0]

softmax = tf.keras.layers.Softmax()
probs = softmax(logits)
print('최종 예측 레이블 :', tf.math.argmax(probs, axis=-1).numpy())

최종 예측 레이블 : [0]


In [400]:
# XML파일에서 추출한 이어지는 제목, 문장
prompt = '서브프라임 모기지 사태로 촉발된 미국의 금융위기가 미국 금융기관을' 
next_sentence = ' 몰락시키고, 세계 굴지의 자동차 회사들을 생존위기로 내몬 데 이어 최근 언론사까지 벼랑 끝으로 내몰고 있다.'
encoding = tokenizer(prompt, next_sentence, return_tensors='tf')

logits = model(encoding['input_ids'], token_type_ids=encoding['token_type_ids'])[0]

softmax = tf.keras.layers.Softmax()
probs = softmax(logits)
print('최종 예측 레이블 :', tf.math.argmax(probs, axis=-1).numpy())

최종 예측 레이블 : [0]


In [401]:
# XML파일에서 추출한 상관없는 두 개의 문장
prompt = '75.3%는 전혀 참여한 적이 없다고 응답했고, 모른다고 답하거나 응답하지 않은 비율은 1.5%였다.' 
next_sentence = '이 목록에 오른 493개 차량 모델 중 LG화학과 삼성SDI의 배터리를 탑재한 차량은 하나도 없었다.'
encoding = tokenizer(prompt, next_sentence, return_tensors='tf')

logits = model(encoding['input_ids'], token_type_ids=encoding['token_type_ids'])[0]

softmax = tf.keras.layers.Softmax()
probs = softmax(logits)
print('최종 예측 레이블 :', tf.math.argmax(probs, axis=-1).numpy())

최종 예측 레이블 : [1]


In [402]:
# XML파일에서 추출한 상관없는 제목, 문장
prompt = '남의 집 제집이라 속여 팔았는데 사기 아니라고?' 
next_sentence = '충남 금산군이 환경부가 수변구역으로 지정한 임야에 버섯재배사 개발허가를 내줘 빈축을 사고 있다.'
encoding = tokenizer(prompt, next_sentence, return_tensors='tf')

logits = model(encoding['input_ids'], token_type_ids=encoding['token_type_ids'])[0]

softmax = tf.keras.layers.Softmax()
probs = softmax(logits)
print('최종 예측 레이블 :', tf.math.argmax(probs, axis=-1).numpy())

최종 예측 레이블 : [1]


### => TFBertForNextSentencePrediction.from_pretrained('klue/bert-base', from_pt=True) 에
### 모두의말뭉치 Corpus를 이어서 학습시킬 예정

## 데이터 준비

In [403]:
#모두의말뭉치 파일 내 json 파일 목록
def parse_paths(folder):
    for current, dirs, files in os.walk(folder):
        return [os.path.join(current, file) for file in files if file.endswith(".json")]

#json 파일 내 기사 내용 추출
def parse_contents(files):
    result = []
    for file in tqdm(files, desc="[Contents Parsing]"):    #진행상황 확인용
        with open(file, "r", encoding="utf-8") as f:
            contents = json.load(f)
            for doc in contents['document']:
                result_paragraph = []     #같은 기사 한문단
                paragraph_id = doc['id']  #같은 기사 id
                for paragraph in doc['paragraph']:
                    if paragraph['id'][-2:] == '.1':   #기사의 제목 부분은 제외
                        continue
                    elif paragraph['id'][:17]==paragraph_id[:17] and paragraph['id'][-2:]=='.2':  #같은 id(같은 단락의 기사)의 첫줄이라면
                        continue
                    elif paragraph['id'][:17]==paragraph_id[:17] and paragraph['id'][-2:]!='.2':  #같은 id(같은 단락의 기사)의 첫줄이 아닌 문장
                        result_paragraph.append(paragraph['form'])
                    else:   #다른 id(다른 단락의 기사)라면
                        result.append(result_paragraph)   #지금까지 id의 문장들은 result로
                        result_pragraph = []   #result_paragraph 비우기
                        paragraph_id = paragraph['id'][:17]
                        result_paragraph.append(paragraph['form'])
    print("[Contents Length] {0:,}".format(len(result)))
    
    for i in range(len(result)):
        pa = ''
        for sentence in result[i]:
            pa += f' {sentence}'
            result[i] = pa    
    return result
data_path = 'C:\BootCamp\CP2\Korpus'
data = parse_paths(data_path)
contents = parse_contents(data)



                                           





[Contents Parsing]: 100%|██████████| 35/35 [00:19<00:00,  1.80it/s]

[Contents Length] 314





In [404]:
#같은 기사는 한 문단으로 처리됨
contents[0]

' 전남 목포시는 최근 이목이 집중되고 있는 근대역사문화공간 재생활성화 사업을 근대문화재 보존과 활용이라는 당초 취지대로 차질없이 추진하겠다고 밝혔다. 이 사업은 목포 원도심인 유달‧만호동 일대에 산재해 있는 근대건축물 등 문화유산을 보존하고 보수‧정비하는 사업으로 금년부터 향후 5년간 총 사업비 500억원이 투입된다. 목포시는 원도심 일대의 근대경관을 회복하고 거주민 생활여건 개선과 관광인프라 확충 등을 통해 이 지역을 전국적인 근대 문화유산의 보고로 만들어 나간다는 계획이다. 아울러, 최근 언론의 집중 보도로 목포 근대문화재에 대해 관심이 많아진 이 기회를 문화유산 보존의 필요성과 당위성을 널리 알리고 이에 대한 사회적 합의를 이루는 계기로 만드는 노력도 기울이기로 했다. 올 해는 종합정비계획을 수립하고 역사문화공간 내 건축자산 매입 및 정비에 나선다. 개별문화재로 등록된 15개소를 중심으로 우선 매입하고, 역사적‧건축적 가치가 높은 건축물과 경관을 훼손하는 건축물도 매입해서 공공재로의 활용을 확대한다는 계획이다. 특히, 목포시는 건축자산 매입 시 공정하고 투명한 행정절차를 통해 투기자본 유입을 원천 차단한다. 또, 근대역사문화공간 내 보존 활용, 관리 및 지원 기준에 관한 조례를 제정해 젠트리피케이션 발생을 예방하고, 특정 투기세력들이 수혜를 받을 수 없도록 제도적 장치를 마련하기로 했다. 목포시는 근대역사문화공간 재생 활성화 시범사업이 목포의 역사적 가치를 보존하면서 지역발전도 함께 이룰 수 있는 중요한 기회이자, 소중한 문화유산을 지키기 위해 꼭 필요한 사업이라는 점을 강조했다. 아울러, 문화재청, 관련 기관 등과 잘 협력해 사업을 본래의 취지대로 흔들림없이 추진해서 반드시 근대문화재 보존활용의 성공 모델로 만들어 간다는 방침을 확고히 했다.'

In [405]:
len_paragraph = []
for i in contents:
    len_paragraph.append(len(i))
    
print(f'기사의 길이 : 최소 {min(len_paragraph)}자, 최대 길이 {max(len_paragraph)}자')

#앞문단 128자 + 뒷문단 128자 = 256자를 pre-trained 모델에 fine-tuning하기 위한 입력값으로 사용
#예를들어 최대길이인 4135자를 입력값으로 쓴다면, 4135 // 128 = 32개의 문단으로 나누고,
#앞-뒤관계가 True인 총 30쌍의 데이터를 얻을 수 있다.

기사의 길이 : 최소 406자, 최대 길이 4135자


## 데이터 준비

    -기사 총 314개를 각각 128자씩 분할

In [406]:
def paragraph_slicing(data, max_len):
    result_data = []
    for paragraph in data:
        result_paragraph = []
        for i in range(len(paragraph)//max_len +1):
            start_idx = max_len * i
            end_idx = start_idx + max_len
            result_paragraph.append(paragraph[start_idx:end_idx])   #[128자 text]
        result_data.append(result_paragraph)
    return result_data

news = paragraph_slicing(contents, 128)

In [407]:
total_128 = 0
for i in news:
    total_128 += len(i)
    
print('원래 문단 개수 :', len(news))
print('128자로 나눈 후 개수 :', total_128)
news[0]

원래 문단 개수 : 314
128자로 나눈 후 개수 : 2947


[' 전남 목포시는 최근 이목이 집중되고 있는 근대역사문화공간 재생활성화 사업을 근대문화재 보존과 활용이라는 당초 취지대로 차질없이 추진하겠다고 밝혔다. 이 사업은 목포 원도심인 유달‧만호동 일대에 산재해 있는 근대건축물 등 문화유산',
 '을 보존하고 보수‧정비하는 사업으로 금년부터 향후 5년간 총 사업비 500억원이 투입된다. 목포시는 원도심 일대의 근대경관을 회복하고 거주민 생활여건 개선과 관광인프라 확충 등을 통해 이 지역을 전국적인 근대 문화유산의 보고로 만',
 '들어 나간다는 계획이다. 아울러, 최근 언론의 집중 보도로 목포 근대문화재에 대해 관심이 많아진 이 기회를 문화유산 보존의 필요성과 당위성을 널리 알리고 이에 대한 사회적 합의를 이루는 계기로 만드는 노력도 기울이기로 했다. 올 ',
 '해는 종합정비계획을 수립하고 역사문화공간 내 건축자산 매입 및 정비에 나선다. 개별문화재로 등록된 15개소를 중심으로 우선 매입하고, 역사적‧건축적 가치가 높은 건축물과 경관을 훼손하는 건축물도 매입해서 공공재로의 활용을 확대한다',
 '는 계획이다. 특히, 목포시는 건축자산 매입 시 공정하고 투명한 행정절차를 통해 투기자본 유입을 원천 차단한다. 또, 근대역사문화공간 내 보존 활용, 관리 및 지원 기준에 관한 조례를 제정해 젠트리피케이션 발생을 예방하고, 특정 ',
 '투기세력들이 수혜를 받을 수 없도록 제도적 장치를 마련하기로 했다. 목포시는 근대역사문화공간 재생 활성화 시범사업이 목포의 역사적 가치를 보존하면서 지역발전도 함께 이룰 수 있는 중요한 기회이자, 소중한 문화유산을 지키기 위해 꼭',
 ' 필요한 사업이라는 점을 강조했다. 아울러, 문화재청, 관련 기관 등과 잘 협력해 사업을 본래의 취지대로 흔들림없이 추진해서 반드시 근대문화재 보존활용의 성공 모델로 만들어 간다는 방침을 확고히 했다.']

## Input data 만들기

위에서 TFBertForNextSentencePrediction 모델을 사용했던 형식을 보면

    prompt = '앞문장' 
    next_sentence = '뒷문장'
    encoding = tokenizer(prompt, next_sentence, return_tensors='tf')

    logits = model(encoding['input_ids'], token_type_ids=encoding['token_type_ids'])[0]

    softmax = tf.keras.layers.Softmax()
    probs = softmax(logits)
    print('최종 예측 레이블 :', tf.math.argmax(probs, axis=-1).numpy())

=>예를들어, 문단이 128자로 나눴을 때 총 5개로 나눠졌다면, 각 문단의 idx 0-1, 1-2, 2-3, 3-4(총 4번) 문장별로 입력되어야 한다.

In [408]:
i = 0  
tf_next_sentence = []
true, false = [], []

for i in range(len(news)):    #314개 기사를 하나씩 확인
    content = news[i]    #하나의 기사
    for j in range(len(content)):    #각 기사 내 문단을 하나씩 확인
        for j in range(len(content)-1):   
            true.append((content[j], content[j+1], 0))   #(앞문단, 진짜뒷문단, 0(True))
            
            while 0:
                random_news = random.randint(0, len(news))   #314개 중 무작위 기사를 뽑아서
                random_content = random.randint(0, len(news[random_news]))   #무작위로 한 문단 가져오기
                try:
                    false_text = news[random_news][random_content]
                    break
                except:
                    continue
            
            false.append((content[j], false_text, 1))   #(앞문단, 가짜뒷문단, 1(False))
            tf_next_sentence.extend(true)
            tf_next_sentence.extend(false)
            true, false = [], []
        j += 1
    i += 1

print('기존 기사 개수 :', len(news))
print('T/F 처리 후 데이터 개수 :', len(tf_next_sentence))

기존 기사 개수 : 314
T/F 처리 후 데이터 개수 : 64824


In [424]:
tf_next_sentence[:6]

[(' 전남 목포시는 최근 이목이 집중되고 있는 근대역사문화공간 재생활성화 사업을 근대문화재 보존과 활용이라는 당초 취지대로 차질없이 추진하겠다고 밝혔다. 이 사업은 목포 원도심인 유달‧만호동 일대에 산재해 있는 근대건축물 등 문화유산',
  '을 보존하고 보수‧정비하는 사업으로 금년부터 향후 5년간 총 사업비 500억원이 투입된다. 목포시는 원도심 일대의 근대경관을 회복하고 거주민 생활여건 개선과 관광인프라 확충 등을 통해 이 지역을 전국적인 근대 문화유산의 보고로 만',
  0),
 (' 전남 목포시는 최근 이목이 집중되고 있는 근대역사문화공간 재생활성화 사업을 근대문화재 보존과 활용이라는 당초 취지대로 차질없이 추진하겠다고 밝혔다. 이 사업은 목포 원도심인 유달‧만호동 일대에 산재해 있는 근대건축물 등 문화유산',
  "속 소통혁신정책관에게 부여한 '도민 갈등 관리'기능이 제대로 작동하지 않았던 점 역시 원 도정이 서둘러 해결해야 할 과제로 꼽힌다. 이달 중 수립할 예정인 갈등관리 종합계획 역시 예방적 갈등관리와 현안 공공갈등 조정 및 해소라는 ",
  1),
 ('을 보존하고 보수‧정비하는 사업으로 금년부터 향후 5년간 총 사업비 500억원이 투입된다. 목포시는 원도심 일대의 근대경관을 회복하고 거주민 생활여건 개선과 관광인프라 확충 등을 통해 이 지역을 전국적인 근대 문화유산의 보고로 만',
  '들어 나간다는 계획이다. 아울러, 최근 언론의 집중 보도로 목포 근대문화재에 대해 관심이 많아진 이 기회를 문화유산 보존의 필요성과 당위성을 널리 알리고 이에 대한 사회적 합의를 이루는 계기로 만드는 노력도 기울이기로 했다. 올 ',
  0),
 ('을 보존하고 보수‧정비하는 사업으로 금년부터 향후 5년간 총 사업비 500억원이 투입된다. 목포시는 원도심 일대의 근대경관을 회복하고 거주민 생활여건 개선과 관광인프라 확충 등을 통해 이 지역을 전국적인 근대 문화유산의 보고로 만',
  "속 소통혁신정책관에게 부여한 '도민 갈등 관리'기능이 제대로 작동하지 않았

In [410]:
train, test = train_test_split(tf_next_sentence, test_size=0.2, random_state=0, shuffle=True)
train, val = train_test_split(train, test_size=0.2, random_state=0, shuffle=True)
print("train :", len(train), "  val :", len(val), "  test :", len(test))

train : 41487   val : 10372   test : 12965


## 모델 생성
    CorpusDataset
    Preprocessor
    BERTNextSentenceModel
    Config

In [426]:
class CorpusDataset(Dataset):
    def __init__(self, sentences, transform: Callable):
        self.sentences = sentences
        self.transform = transform

    def __len__(self):
        return len(self.sentences)

    def __getitem__(self, idx):
        prompt = self.sentences[idx][0]
        next_sentence = self.sentences[idx][1]
        label = self.sentences[idx][2]
        (
            input_ids,
            attention_mask,
            token_type_ids,
            label, 
        ) = self.transform(prompt, next_sentence, label)

        return input_ids, attention_mask, token_type_ids, label

In [448]:
class Preprocessor :
    def __init__(self, max_len: int):
        self.tokenizer = BertTokenizer.from_pretrained("snunlp/KR-Medium", do_lower_case=False)
        self.max_len = max_len
        self.pad_token_id = 0

    def get_input_features(self, prompt, next_sentence, label
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
        """두 문장(prompt, next_sentence)에 대해 tokenize한 뒤 id로 변환, 두 문장을 이어붙인다.

        Args:
            prompt: 이전 문장
            next_sentence: 이어질 다음 문장
            label : 두 문장이 이어지면 0, 아니면 1

        Returns:
            feature를 리턴한다.
            input_ids : 각 토큰의 id, 2(CLS)로 시작, 3(SEP)
            attention_mask : padding은 0, 데이터가 존재하면 1
            token_type_ids : prompt 위치는 0, next_sentence 위치는 1, 1 뒤의 0은 패딩
            label : 정답이면 [1, 0], 오답이면 [0, 1]
        """
        tok_prompt = self.tokenizer.tokenize(prompt)
        tok_next_sen = self.tokenizer.tokenize(next_sentence)

        input_ids = [2] + self.tokenizer.convert_tokens_to_ids(tok_prompt) + [3]    # 처음과 끝 표시
        input_ids_next_sen = [2]+ self.tokenizer.convert_tokens_to_ids(tok_next_sen)+ [3]

        slicing_idx = 0
        if len(input_ids) + len(input_ids_next_sen) > self.max_len :
          slicing_idx = len(input_ids) + len(input_ids_next_sen) - self.max_len // 2 + 2
          input_ids = [2] + input_ids[slicing_idx:]
          input_ids_next_sen = input_ids_next_sen[:-slicing_idx] + [3]

        token_type_ids = [0]*len(input_ids)
        token_type_ids.extend([1]*len(input_ids_next_sen))
        input_ids.extend(input_ids_next_sen)
        attention_mask = [1] * len(token_type_ids)
        pad_length = self.max_len-len(input_ids)

        input_ids.extend([0] * pad_length)
        token_type_ids.extend([0] * pad_length) # pad : 0
        attention_mask.extend([0] * pad_length) # pad : 0

        input_ids = torch.tensor(input_ids, dtype=torch.int)
        attention_mask = torch.tensor(attention_mask, dtype=torch.int)
        token_type_ids = torch.tensor(token_type_ids, dtype=torch.int)

        label = [1.0, 0.] if label == 0 else [0., 1.0]
        label = torch.tensor(label, dtype=torch.float)
        return input_ids, attention_mask, token_type_ids, label

In [458]:
class BertOnlyNSPHead(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.seq_relationship = nn.Linear(config.hidden_size, 2)

    def forward(self, pooled_output):
        seq_relationship_score = self.seq_relationship(pooled_output)
        print(seq_relationship_score, len(seq_relationship_score))
        return seq_relationship_score

# NextSentencePredictorOutput
class BERTNextSentenceModel(pl.LightningModule):
    def __init__(self, config, dataset):
        super().__init__()
        print("init")
        self.config = config
        self.dataset = dataset
        self.labels_type = [0,1]
        self.pad_token_id = 0
        self.softmax = torch.nn.Softmax()
        self.bert_config = BertConfig.from_pretrained(
            self.config.bert_model, num_labels=2
        )
        self.model = BertForNextSentencePrediction.from_pretrained(
            self.config.bert_model, config=self.bert_config
        )
        self.cls = BertOnlyNSPHead(config)
        self.dropout = nn.Dropout(self.config.dropout_rate)
        self.linear = nn.Linear(
            self.bert_config.hidden_size, len(self.labels_type)
        )

    def forward(self,
        input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None,
        head_mask=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None,
        return_dict=None, **kwargs, ):
        """
        return NextSentencePredictorOutput(
            loss=next_sentence_loss,
            logits=seq_relationship_scores,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

        """
        logits = self.model(input_ids, token_type_ids=token_type_ids).logits
        probs = self.softmax(logits)
        return probs

    def training_step(self, batch, batch_nb):
        input_ids, attention_mask, token_type_ids, label_ids = batch
        
        outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            labels=label_ids,
        )

        loss = self._calculate_loss(outputs, label_ids) # slot_labels : labels
        # f1 = self._f1_score(outputs, label_ids)
        acc = self._calculate_accuracy(outputs, label_ids)
        tensorboard_logs = {'train_loss': loss, 'train_acc':acc}
        return {"loss": loss, "acc": acc, "log": tensorboard_logs}

    def training_end(self, batch, batch_nb):
        input_ids, attention_mask, token_type_ids, label_ids = batch

        outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
        )
        loss = self._calculate_loss(outputs, label_ids) # slot_labels : labels
        # f1 = self._f1_score(outputs, slot_label_ids)
        acc = self._calculate_accuracy(outputs, label_ids)

        tensorboard_logs = {'train_loss': loss, 'train_acc':acc}
        print("training_end : ", tensorboard_logs)
        return {"loss": loss, "acc": acc, "log": tensorboard_logs}

    def training_epoch_end(self, outputs):
        super().on_train_epoch_end()
        print("training_epoch_end")
            
        avg_loss = torch.stack([torch.tensor(x['loss']) for x in outputs]).mean()
        # val_f1 = torch.stack([torch.tensor(x['val_f1']) for x in outputs]).mean()
        acc = torch.stack([torch.tensor(x['acc']) for x in outputs]).mean()
        tensorboard_logs = {'loss': avg_loss, 'acc': acc}
        print('training_epoch_end : ', tensorboard_logs)
        self.log("validation_epoch_end : tensorboard_logs ", tensorboard_logs)

    def validation_step(self, batch, batch_nb):
        input_ids, attention_mask, token_type_ids, slot_label_ids = batch
        outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
        )
        loss = self._calculate_loss(outputs, slot_label_ids)
        # val_f1 = self._f1_score(outputs, slot_label_ids)
        val_acc = self._calculate_accuracy(outputs, slot_label_ids)
        return {"val_loss": loss, 'val_acc':val_acc}

    def validation_epoch_end(self, outputs):
      
        avg_loss = torch.stack([torch.tensor(x['val_loss']) for x in outputs]).mean()
        # val_f1 = torch.stack([torch.tensor(x['val_f1']) for x in outputs]).mean()
        val_acc = torch.stack([torch.tensor(x['val_acc']) for x in outputs]).mean()
        tensorboard_logs = {'val_loss': avg_loss, 'val_acc':val_acc}
        print('validation_epoch_end : ', tensorboard_logs)
        self.log('val_acc', val_acc)
        self.log('val_loss', avg_loss)
        return {'val_acc':val_acc, 'val_loss': avg_loss, 'log': tensorboard_logs}
    
    def validation_end(self, outputs):
        print('validation_end : ')

        val_loss = torch.stack([torch.tensor(x["val_loss"]) for x in outputs]).mean()
        val_acc = torch.stack([torch.tensor(x["val_acc"]) for x in outputs]).mean()
        # val_f1 = torch.stack([x["val_f1"] for x in outputs]).mean()
        tensorboard_logs = {
            "val_loss": val_loss,
            "val_acc" : val_acc
        }
        print("validation_end : ", tensorboard_logs)
        return {'val_acc':val_acc, 'val_loss': val_loss, 'log': tensorboard_logs}
    
    def test_step(self, batch, batch_nb):
        input_ids, attention_mask, token_type_ids, slot_label_ids = batch
        outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
        )

        # f1 = self._f1_score(gt_slot_labels, pred_slot_labels)
        acc = self._calculate_accuracy(outputs, slot_label_ids)
        loss = self._calculate_loss(outputs, slot_label_ids)
        return {"test_loss": loss, "test_acc": acc}

    def test_end(self, outputs):

        # test_f1 = torch.stack([x["test_f1"] for x in outputs]).mean()
        test_loss = torch.stack([torch.tensor(x["test_loss"]) for x in outputs]).mean()
        test_acc = torch.stack([torch.tensor(x["test_acc"]) for x in outputs]).mean()
        self.log("test_loss", test_loss)
        self.log("test_acc", test_acc)
        # self.log("test_f1", test_f1)
        return {"labels" : [x["labels"] for x in outputs], "test_loss": test_loss,  "test_acc": test_acc}
    
    def test_epoch_end(self, outputs):
        avg_loss = torch.stack([torch.tensor(x['test_loss']) for x in outputs]).mean()
        # f1 = torch.stack([x['test_f1'] for x in outputs]).mean()
        acc = torch.stack([torch.tensor(x['test_acc']) for x in outputs]).mean()
        tensorboard_logs = {'test_loss': avg_loss, 'test_acc':acc}
        print('test_epoch_end : ', tensorboard_logs)
        self.log("test_epoch_end : tensorboard_logs ", tensorboard_logs)
        return {'test_acc':acc, 'test_loss': avg_loss, 'log': tensorboard_logs}

    def predict_step(self, batch, batch_idx, dataloader_idx=0):   # prediction : forward(), predict_step()
        input_ids, attention_mask, token_type_ids, label_ids = batch    # slot_label은 없음.
        outputs = self(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
        )

        return {'pred_labels':outputs, 'slot_label_ids': label_ids}


    def configure_optimizers(self):
        return AdamW(self.model.parameters(), lr=2e-5, eps=1e-8)

    def train_dataloader(self):
        return DataLoader(self.dataset["train"], batch_size=self.config.eval_batch_size)

    def val_dataloader(self):
        return DataLoader(self.dataset["val"], batch_size=self.config.eval_batch_size)

    def test_dataloader(self):
        return DataLoader(self.dataset["test"], batch_size=self.config.eval_batch_size)

    def pred_dataloader(self, dataset):
        return DataLoader(dataset, batch_size=1)

    def _calculate_loss(self, outputs, labels):   # 확률에서 얼마나 떨어져있는가?
        loss = F.cross_entropy(outputs, labels)
        return loss
        
    def _calculate_accuracy(self, outputs, labels):   # 0.5보다 크면 1, 아니면 0으로 labeling
        active_logits = torch.argmax(outputs, dim=1)
        active_labels = torch.argmax(labels, dim=1)
        accuracy = accuracy_score(active_logits, active_labels)
        return accuracy

    # def _f1_score(self, outputs, labels):
    #     print('_f1_score : ')

    #     active_logits = outputs.view(-1)
    #     active_labels = labels.view(-1)
    #     print(active_logits, active_logits)
    #     f1 = F1Score()
    #     f1 = f1(active_logits, active_logits)
    #     print(f1)
    #     return f1

    def _convert_ids_to_labels(self, outputs, slot_labels):
        _, y_hat = torch.max(outputs, dim=2)
        y_hat = y_hat.detach().cpu().numpy()
        slot_label_ids = slot_labels.detach().cpu().numpy()

        slot_label_map = {i: label for i, label in enumerate(self.slot_labels_type)}
        slot_gt_labels = [[] for _ in range(slot_label_ids.shape[0])]
        slot_pred_labels = [[] for _ in range(slot_label_ids.shape[0])]

        for i in range(slot_label_ids.shape[0]):
            for j in range(slot_label_ids.shape[1]):
                if slot_label_ids[i, j] != self.pad_token_id:
                    slot_gt_labels[i].append(slot_label_map[slot_label_ids[i][j]])
                    slot_pred_labels[i].append(slot_label_map[y_hat[i][j]])

        return slot_pred_labels, slot_gt_labels

In [459]:
class Config(BertConfig):
    def __init__(self) :
        super().__init__()
        self.task= 'kor_nextsentence_prediction_'
        self.log_path= data_path+'/logs'
        self.bert_model = 'snunlp/KR-Medium' #'klue/bert-base'   
        self.max_len= 256
        self.train_batch_size= 32
        self.eval_batch_size= 32
        self.dropout_rate= 0.1
        self.gpus= torch.cuda.device_count()
    
config = Config()

## 모델 학습

In [460]:
preprocessor = Preprocessor(config.max_len)

In [461]:
dataset = {}
dataset['train'] = CorpusDataset(train, preprocessor.get_input_features)
dataset['val'] = CorpusDataset(val, preprocessor.get_input_features)
dataset['test'] = CorpusDataset(test, preprocessor.get_input_features)

In [462]:
model = BERTNextSentenceModel(config, dataset).cuda()

init


Some weights of the model checkpoint at snunlp/KR-Medium were not used when initializing BertForNextSentencePrediction: ['cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForNextSentencePrediction from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForNextSentencePrediction from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [463]:
logger = TensorBoardLogger(
    save_dir=os.path.join(config.log_path, config.task), version=3, name=config.task
)

acc_checkpoint_callback = ModelCheckpoint(
    dirpath=data_path+'/sts/checkpoints/'+ config.task, 
    filename="{epoch}_{val_acc:2f}_{other_metric:.2f}",
    verbose=True,
    monitor='val_acc',
    mode='max',
    save_top_k=1,
    save_last=True)

lrmonitor_callback = LearningRateMonitor(logging_interval='step')

In [467]:
trainer = pl.Trainer(
    #gpus=config.gpus,
    callbacks=[acc_checkpoint_callback, lrmonitor_callback],
    logger=logger,
    max_epochs=2,
)

trainer.fit(model)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name    | Type                          | Params
----------------------------------------------------------
0 | softmax | Softmax                       | 0     
1 | model   | BertForNextSentencePrediction | 101 M 
2 | cls     | BertOnlyNSPHead               | 1.5 K 
3 | dropout | Dropout                       | 0     
4 | linear  | Linear                        | 1.5 K 
----------------------------------------------------------
101 M     Trainable params
0         Non-trainable params
101 M     Total params
405.624   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]



Validation sanity check:  50%|█████     | 1/2 [00:10<00:10, 10.83s/it]



Validation sanity check: 100%|██████████| 2/2 [00:21<00:00, 10.84s/it]validation_epoch_end :  {'val_loss': tensor(0.3689), 'val_acc': tensor(0.9688, dtype=torch.float64)}
                                                                      



Epoch 0:   0%|          | 0/1622 [00:00<?, ?it/s]



Epoch 0:   0%|          | 1/1622 [00:45<20:30:05, 45.53s/it, loss=0.364, v_num=3]



Epoch 0:   0%|          | 2/1622 [01:31<20:38:59, 45.89s/it, loss=0.348, v_num=3]



Epoch 0:   0%|          | 3/1622 [02:08<19:14:50, 42.80s/it, loss=0.351, v_num=3]



Epoch 0:   0%|          | 4/1622 [02:44<18:30:00, 41.16s/it, loss=0.349, v_num=3]



Epoch 0:   0%|          | 5/1622 [03:21<18:03:42, 40.21s/it, loss=0.342, v_num=3]



Epoch 0:   0%|          | 6/1622 [03:57<17:48:00, 39.65s/it, loss=0.337, v_num=3]



Epoch 0:   0%|          | 7/1622 [04:34<17:33:49, 39.15s/it, loss=0.334, v_num=3]



Epoch 0:   0%|          | 8/1622 [05:10<17:24:35, 38.83s/it, loss=0.331, v_num=3]



Epoch 0:   1%|          | 9/1622 [05:47<17:16:51, 38.57s/it, loss=0.33, v_num=3] 



Epoch 0:   1%|          | 10/1622 [06:24<17:13:20, 38.46s/it, loss=0.329, v_num=3]



Epoch 0:   1%|          | 11/1622 [07:02<17:11:19, 38.41s/it, loss=0.328, v_num=3]



Epoch 0:   1%|          | 12/1622 [07:40<17:09:23, 38.36s/it, loss=0.327, v_num=3]



Epoch 0:   1%|          | 13/1622 [08:18<17:07:58, 38.33s/it, loss=0.326, v_num=3]



Epoch 0:   1%|          | 14/1622 [08:56<17:06:44, 38.31s/it, loss=0.325, v_num=3]



Epoch 0:   1%|          | 15/1622 [09:34<17:05:06, 38.27s/it, loss=0.324, v_num=3]



Epoch 0:   1%|          | 16/1622 [10:10<17:01:22, 38.16s/it, loss=0.325, v_num=3]



Epoch 0:   1%|          | 17/1622 [10:46<16:57:54, 38.05s/it, loss=0.324, v_num=3]



Epoch 0:   1%|          | 18/1622 [11:23<16:55:05, 37.97s/it, loss=0.323, v_num=3]



Epoch 0:   1%|          | 19/1622 [11:59<16:52:19, 37.89s/it, loss=0.323, v_num=3]



Epoch 0:   1%|          | 20/1622 [12:35<16:49:15, 37.80s/it, loss=0.323, v_num=3]



Epoch 0:   1%|▏         | 21/1622 [13:12<16:46:35, 37.72s/it, loss=0.322, v_num=3]



Epoch 0:   1%|▏         | 22/1622 [13:48<16:44:38, 37.67s/it, loss=0.321, v_num=3]



Epoch 0:   1%|▏         | 23/1622 [14:25<16:42:37, 37.62s/it, loss=0.318, v_num=3]



Epoch 0:   1%|▏         | 24/1622 [15:01<16:40:30, 37.57s/it, loss=0.317, v_num=3]



Epoch 0:   2%|▏         | 25/1622 [15:38<16:38:52, 37.53s/it, loss=0.317, v_num=3]



Epoch 0:   2%|▏         | 26/1622 [16:14<16:37:01, 37.48s/it, loss=0.317, v_num=3]



Epoch 0:   2%|▏         | 27/1622 [16:50<16:35:15, 37.44s/it, loss=0.317, v_num=3]



Epoch 0:   2%|▏         | 28/1622 [17:26<16:33:19, 37.39s/it, loss=0.317, v_num=3]



Epoch 0:   2%|▏         | 29/1622 [18:03<16:31:31, 37.35s/it, loss=0.317, v_num=3]



Epoch 0:   2%|▏         | 30/1622 [18:39<16:29:51, 37.31s/it, loss=0.316, v_num=3]



Epoch 0:   2%|▏         | 31/1622 [19:15<16:28:23, 37.27s/it, loss=0.316, v_num=3]



Epoch 0:   2%|▏         | 32/1622 [19:51<16:27:06, 37.25s/it, loss=0.318, v_num=3]



Epoch 0:   2%|▏         | 33/1622 [20:27<16:25:25, 37.21s/it, loss=0.318, v_num=3]



Epoch 0:   2%|▏         | 34/1622 [21:04<16:24:12, 37.19s/it, loss=0.318, v_num=3]



Epoch 0:   2%|▏         | 35/1622 [21:40<16:22:46, 37.16s/it, loss=0.318, v_num=3]



Epoch 0:   2%|▏         | 36/1622 [22:16<16:21:27, 37.13s/it, loss=0.317, v_num=3]



Epoch 0:   2%|▏         | 37/1622 [22:52<16:20:04, 37.10s/it, loss=0.317, v_num=3]



Epoch 0:   2%|▏         | 38/1622 [23:29<16:18:55, 37.08s/it, loss=0.317, v_num=3]



Epoch 0:   2%|▏         | 39/1622 [24:05<16:17:42, 37.06s/it, loss=0.318, v_num=3]



Epoch 0:   2%|▏         | 40/1622 [24:41<16:16:29, 37.04s/it, loss=0.318, v_num=3]



Epoch 0:   3%|▎         | 41/1622 [25:17<16:15:28, 37.02s/it, loss=0.316, v_num=3]



Epoch 0:   3%|▎         | 42/1622 [25:54<16:14:26, 37.00s/it, loss=0.316, v_num=3]



Epoch 0:   3%|▎         | 43/1622 [26:30<16:13:27, 36.99s/it, loss=0.317, v_num=3]



Epoch 0:   3%|▎         | 44/1622 [27:06<16:12:20, 36.97s/it, loss=0.317, v_num=3]



Epoch 0:   3%|▎         | 45/1622 [27:42<16:11:10, 36.95s/it, loss=0.317, v_num=3]



Epoch 0:   3%|▎         | 46/1622 [28:19<16:10:17, 36.94s/it, loss=0.317, v_num=3]



Epoch 0:   3%|▎         | 47/1622 [28:55<16:09:09, 36.92s/it, loss=0.317, v_num=3]



Epoch 0:   3%|▎         | 48/1622 [29:31<16:08:10, 36.91s/it, loss=0.317, v_num=3]



Epoch 0:   3%|▎         | 49/1622 [30:07<16:07:19, 36.90s/it, loss=0.317, v_num=3]



Epoch 0:   3%|▎         | 50/1622 [30:47<16:08:08, 36.95s/it, loss=0.317, v_num=3]



Epoch 0:   3%|▎         | 51/1622 [31:24<16:07:21, 36.95s/it, loss=0.317, v_num=3]



Epoch 0:   3%|▎         | 52/1622 [32:00<16:06:16, 36.93s/it, loss=0.315, v_num=3]



Epoch 0:   3%|▎         | 53/1622 [32:36<16:05:23, 36.92s/it, loss=0.315, v_num=3]



Epoch 0:   3%|▎         | 54/1622 [33:13<16:04:35, 36.91s/it, loss=0.315, v_num=3]



Epoch 0:   3%|▎         | 55/1622 [33:49<16:03:35, 36.90s/it, loss=0.315, v_num=3]



Epoch 0:   3%|▎         | 56/1622 [34:25<16:02:39, 36.88s/it, loss=0.315, v_num=3]



Epoch 0:   4%|▎         | 57/1622 [35:01<16:01:45, 36.87s/it, loss=0.315, v_num=3]



Epoch 0:   4%|▎         | 58/1622 [35:38<16:01:03, 36.87s/it, loss=0.315, v_num=3]



Epoch 0:   4%|▎         | 59/1622 [36:14<16:00:12, 36.86s/it, loss=0.314, v_num=3]



Epoch 0:   4%|▎         | 60/1622 [36:50<15:59:19, 36.85s/it, loss=0.314, v_num=3]



Epoch 0:   4%|▍         | 61/1622 [37:27<15:58:35, 36.85s/it, loss=0.314, v_num=3]



Epoch 0:   4%|▍         | 62/1622 [38:03<15:57:33, 36.83s/it, loss=0.314, v_num=3]



Epoch 0:   4%|▍         | 63/1622 [38:39<15:56:36, 36.82s/it, loss=0.314, v_num=3]



Epoch 0:   4%|▍         | 64/1622 [39:15<15:55:43, 36.81s/it, loss=0.314, v_num=3]



Epoch 0:   4%|▍         | 65/1622 [39:52<15:54:59, 36.80s/it, loss=0.314, v_num=3]



Epoch 0:   4%|▍         | 66/1622 [40:28<15:54:16, 36.80s/it, loss=0.314, v_num=3]



Epoch 0:   4%|▍         | 67/1622 [41:04<15:53:25, 36.79s/it, loss=0.314, v_num=3]



Epoch 0:   4%|▍         | 68/1622 [41:40<15:52:29, 36.78s/it, loss=0.314, v_num=3]



Epoch 0:   4%|▍         | 69/1622 [42:17<15:51:48, 36.77s/it, loss=0.313, v_num=3]



Epoch 0:   4%|▍         | 70/1622 [42:53<15:50:55, 36.76s/it, loss=0.313, v_num=3]



Epoch 0:   4%|▍         | 71/1622 [43:29<15:50:08, 36.76s/it, loss=0.313, v_num=3]



Epoch 0:   4%|▍         | 72/1622 [44:05<15:49:20, 36.75s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▍         | 73/1622 [44:42<15:48:35, 36.74s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▍         | 74/1622 [45:18<15:47:50, 36.74s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▍         | 75/1622 [45:54<15:47:04, 36.73s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▍         | 76/1622 [46:31<15:46:21, 36.73s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▍         | 77/1622 [47:07<15:45:35, 36.72s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▍         | 78/1622 [47:43<15:44:45, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▍         | 79/1622 [48:20<15:44:05, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▍         | 80/1622 [48:56<15:43:19, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▍         | 81/1622 [49:32<15:42:37, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▌         | 82/1622 [50:08<15:41:47, 36.69s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▌         | 83/1622 [50:44<15:40:57, 36.68s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▌         | 84/1622 [51:20<15:40:11, 36.68s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▌         | 85/1622 [51:56<15:39:20, 36.67s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▌         | 86/1622 [52:33<15:38:36, 36.66s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▌         | 87/1622 [53:09<15:37:46, 36.66s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▌         | 88/1622 [53:44<15:36:57, 36.65s/it, loss=0.313, v_num=3]



Epoch 0:   5%|▌         | 89/1622 [54:20<15:36:01, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 90/1622 [54:56<15:35:09, 36.62s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 91/1622 [55:32<15:34:26, 36.62s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 92/1622 [56:08<15:33:46, 36.62s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 93/1622 [56:45<15:33:01, 36.61s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 94/1622 [57:20<15:32:13, 36.61s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 95/1622 [57:56<15:31:25, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 96/1622 [58:32<15:30:37, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 97/1622 [59:09<15:30:01, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 98/1622 [59:45<15:29:25, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 99/1622 [1:00:22<15:28:44, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 100/1622 [1:00:58<15:28:02, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▌         | 101/1622 [1:01:34<15:27:19, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▋         | 102/1622 [1:02:10<15:26:38, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▋         | 103/1622 [1:02:47<15:25:55, 36.57s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▋         | 104/1622 [1:03:23<15:25:12, 36.57s/it, loss=0.313, v_num=3]



Epoch 0:   6%|▋         | 105/1622 [1:03:59<15:24:32, 36.57s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 106/1622 [1:04:35<15:23:51, 36.56s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 107/1622 [1:05:11<15:23:07, 36.56s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 108/1622 [1:05:48<15:22:26, 36.56s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 109/1622 [1:06:24<15:21:47, 36.55s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 110/1622 [1:07:00<15:21:06, 36.55s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 111/1622 [1:07:36<15:20:26, 36.55s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 112/1622 [1:08:13<15:19:43, 36.55s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 113/1622 [1:08:49<15:19:01, 36.54s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 114/1622 [1:09:25<15:18:21, 36.54s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 115/1622 [1:10:01<15:17:40, 36.54s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 116/1622 [1:10:37<15:16:56, 36.53s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 117/1622 [1:11:13<15:16:07, 36.52s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 118/1622 [1:11:49<15:15:26, 36.52s/it, loss=0.313, v_num=3]



Epoch 0:   7%|▋         | 119/1622 [1:12:25<15:14:41, 36.51s/it, loss=0.315, v_num=3]



Epoch 0:   7%|▋         | 120/1622 [1:13:01<15:13:58, 36.51s/it, loss=0.315, v_num=3]



Epoch 0:   7%|▋         | 121/1622 [1:13:37<15:13:18, 36.51s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 122/1622 [1:14:13<15:12:35, 36.50s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 123/1622 [1:14:49<15:11:54, 36.50s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 124/1622 [1:15:25<15:11:13, 36.50s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 125/1622 [1:16:02<15:10:35, 36.50s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 126/1622 [1:16:38<15:09:58, 36.50s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 127/1622 [1:17:14<15:09:16, 36.49s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 128/1622 [1:17:50<15:08:30, 36.49s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 129/1622 [1:18:26<15:07:52, 36.49s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 130/1622 [1:19:02<15:07:12, 36.48s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 131/1622 [1:19:38<15:06:30, 36.48s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 132/1622 [1:20:14<15:05:46, 36.47s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 133/1622 [1:20:50<15:05:08, 36.47s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 134/1622 [1:21:27<15:04:30, 36.47s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 135/1622 [1:22:03<15:03:50, 36.47s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 136/1622 [1:22:39<15:03:12, 36.47s/it, loss=0.315, v_num=3]



Epoch 0:   8%|▊         | 137/1622 [1:23:15<15:02:31, 36.47s/it, loss=0.315, v_num=3]



Epoch 0:   9%|▊         | 138/1622 [1:23:51<15:01:48, 36.46s/it, loss=0.315, v_num=3]



Epoch 0:   9%|▊         | 139/1622 [1:24:27<15:01:08, 36.46s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▊         | 140/1622 [1:25:04<15:00:36, 36.46s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▊         | 141/1622 [1:25:42<15:00:15, 36.47s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 142/1622 [1:26:22<15:00:10, 36.49s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 143/1622 [1:27:01<15:00:06, 36.52s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 144/1622 [1:27:39<14:59:47, 36.53s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 145/1622 [1:28:15<14:59:02, 36.52s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 146/1622 [1:28:51<14:58:23, 36.52s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 147/1622 [1:29:28<14:57:47, 36.52s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 148/1622 [1:30:04<14:57:07, 36.52s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 149/1622 [1:30:40<14:56:23, 36.51s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 150/1622 [1:31:16<14:55:45, 36.51s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 151/1622 [1:31:53<14:55:08, 36.51s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 152/1622 [1:32:29<14:54:30, 36.51s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 153/1622 [1:33:05<14:53:49, 36.51s/it, loss=0.313, v_num=3]



Epoch 0:   9%|▉         | 154/1622 [1:33:41<14:53:08, 36.50s/it, loss=0.313, v_num=3]



Epoch 0:  10%|▉         | 155/1622 [1:34:17<14:52:25, 36.50s/it, loss=0.313, v_num=3]



Epoch 0:  10%|▉         | 156/1622 [1:34:53<14:51:47, 36.50s/it, loss=0.313, v_num=3]



Epoch 0:  10%|▉         | 157/1622 [1:35:30<14:51:09, 36.50s/it, loss=0.313, v_num=3]



Epoch 0:  10%|▉         | 158/1622 [1:36:05<14:50:26, 36.49s/it, loss=0.313, v_num=3]



Epoch 0:  10%|▉         | 159/1622 [1:36:41<14:49:42, 36.49s/it, loss=0.313, v_num=3]



Epoch 0:  10%|▉         | 160/1622 [1:37:17<14:49:02, 36.49s/it, loss=0.313, v_num=3]



Epoch 0:  10%|▉         | 161/1622 [1:37:53<14:48:17, 36.48s/it, loss=0.313, v_num=3]



Epoch 0:  10%|▉         | 162/1622 [1:38:29<14:47:34, 36.48s/it, loss=0.313, v_num=3]



Epoch 0:  10%|█         | 163/1622 [1:39:05<14:46:55, 36.47s/it, loss=0.313, v_num=3]



Epoch 0:  10%|█         | 164/1622 [1:39:40<14:46:11, 36.47s/it, loss=0.313, v_num=3]



Epoch 0:  10%|█         | 165/1622 [1:40:17<14:45:35, 36.47s/it, loss=0.313, v_num=3]



Epoch 0:  10%|█         | 166/1622 [1:40:53<14:44:56, 36.47s/it, loss=0.313, v_num=3]



Epoch 0:  10%|█         | 167/1622 [1:41:29<14:44:16, 36.47s/it, loss=0.313, v_num=3]



Epoch 0:  10%|█         | 168/1622 [1:42:06<14:43:39, 36.46s/it, loss=0.313, v_num=3]



Epoch 0:  10%|█         | 169/1622 [1:42:42<14:42:59, 36.46s/it, loss=0.313, v_num=3]



Epoch 0:  10%|█         | 170/1622 [1:43:19<14:42:31, 36.47s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 171/1622 [1:43:56<14:42:01, 36.47s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 172/1622 [1:44:33<14:41:22, 36.47s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 173/1622 [1:45:10<14:40:54, 36.48s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 174/1622 [1:45:46<14:40:17, 36.48s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 175/1622 [1:46:25<14:39:56, 36.49s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 176/1622 [1:47:03<14:39:33, 36.50s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 177/1622 [1:47:41<14:39:09, 36.50s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 178/1622 [1:48:18<14:38:41, 36.51s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 179/1622 [1:48:57<14:38:21, 36.52s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 180/1622 [1:49:35<14:37:56, 36.53s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 181/1622 [1:50:12<14:37:28, 36.54s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█         | 182/1622 [1:50:51<14:37:09, 36.55s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█▏        | 183/1622 [1:51:30<14:36:48, 36.56s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█▏        | 184/1622 [1:52:08<14:36:23, 36.57s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█▏        | 185/1622 [1:52:46<14:35:59, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  11%|█▏        | 186/1622 [1:53:24<14:35:36, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 187/1622 [1:54:03<14:35:18, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 188/1622 [1:54:42<14:34:58, 36.61s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 189/1622 [1:55:21<14:34:40, 36.62s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 190/1622 [1:56:00<14:34:16, 36.63s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 191/1622 [1:56:38<14:33:52, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 192/1622 [1:57:17<14:33:34, 36.65s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 193/1622 [1:57:55<14:33:07, 36.66s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 194/1622 [1:58:34<14:32:45, 36.67s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 195/1622 [1:59:13<14:32:27, 36.68s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 196/1622 [1:59:51<14:31:58, 36.69s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 197/1622 [2:00:29<14:31:38, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 198/1622 [2:01:09<14:31:21, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 199/1622 [2:01:47<14:30:53, 36.72s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 200/1622 [2:02:26<14:30:35, 36.73s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 201/1622 [2:03:06<14:30:16, 36.75s/it, loss=0.313, v_num=3]



Epoch 0:  12%|█▏        | 202/1622 [2:03:45<14:29:56, 36.76s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 203/1622 [2:04:23<14:29:32, 36.77s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 204/1622 [2:05:02<14:29:13, 36.78s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 205/1622 [2:05:42<14:28:55, 36.79s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 206/1622 [2:06:22<14:28:41, 36.81s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 207/1622 [2:07:00<14:28:11, 36.81s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 208/1622 [2:07:37<14:27:34, 36.81s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 209/1622 [2:08:15<14:27:09, 36.82s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 210/1622 [2:08:53<14:26:36, 36.82s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 211/1622 [2:09:32<14:26:13, 36.83s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 212/1622 [2:10:11<14:25:50, 36.84s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 213/1622 [2:10:50<14:25:30, 36.86s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 214/1622 [2:11:29<14:25:11, 36.87s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 215/1622 [2:12:09<14:24:52, 36.88s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 216/1622 [2:12:48<14:24:32, 36.89s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 217/1622 [2:13:28<14:24:11, 36.91s/it, loss=0.313, v_num=3]



Epoch 0:  13%|█▎        | 218/1622 [2:14:07<14:23:50, 36.92s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▎        | 219/1622 [2:14:46<14:23:26, 36.93s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▎        | 220/1622 [2:15:26<14:23:05, 36.94s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▎        | 221/1622 [2:16:05<14:22:44, 36.95s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▎        | 222/1622 [2:16:44<14:22:23, 36.96s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▎        | 223/1622 [2:17:24<14:22:00, 36.97s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 224/1622 [2:18:03<14:21:36, 36.98s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 225/1622 [2:18:42<14:21:11, 36.99s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 226/1622 [2:19:21<14:20:48, 37.00s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 227/1622 [2:20:00<14:20:24, 37.01s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 228/1622 [2:20:40<14:20:02, 37.02s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 229/1622 [2:21:19<14:19:38, 37.03s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 230/1622 [2:21:58<14:19:17, 37.04s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 231/1622 [2:22:38<14:18:56, 37.05s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 232/1622 [2:23:17<14:18:32, 37.06s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 233/1622 [2:23:57<14:18:09, 37.07s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 234/1622 [2:24:32<14:17:24, 37.06s/it, loss=0.313, v_num=3]



Epoch 0:  14%|█▍        | 235/1622 [2:25:09<14:16:44, 37.06s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▍        | 236/1622 [2:25:45<14:16:02, 37.06s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▍        | 237/1622 [2:26:21<14:15:20, 37.05s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▍        | 238/1622 [2:26:57<14:14:37, 37.05s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▍        | 239/1622 [2:27:34<14:13:54, 37.05s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▍        | 240/1622 [2:28:10<14:13:12, 37.04s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▍        | 241/1622 [2:28:46<14:12:29, 37.04s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▍        | 242/1622 [2:29:22<14:11:46, 37.03s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▍        | 243/1622 [2:29:58<14:11:03, 37.03s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▌        | 244/1622 [2:30:34<14:10:20, 37.03s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▌        | 245/1622 [2:31:10<14:09:41, 37.02s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▌        | 246/1622 [2:31:46<14:08:59, 37.02s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▌        | 247/1622 [2:32:22<14:08:15, 37.01s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▌        | 248/1622 [2:32:58<14:07:31, 37.01s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▌        | 249/1622 [2:33:34<14:06:48, 37.01s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▌        | 250/1622 [2:34:10<14:06:06, 37.00s/it, loss=0.313, v_num=3]



Epoch 0:  15%|█▌        | 251/1622 [2:34:46<14:05:25, 37.00s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 252/1622 [2:35:22<14:04:44, 37.00s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 253/1622 [2:35:58<14:04:01, 36.99s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 254/1622 [2:36:35<14:03:20, 36.99s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 255/1622 [2:37:11<14:02:38, 36.99s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 256/1622 [2:37:47<14:01:57, 36.98s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 257/1622 [2:38:23<14:01:13, 36.98s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 258/1622 [2:38:58<14:00:29, 36.97s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 259/1622 [2:39:34<13:59:44, 36.97s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 260/1622 [2:40:10<13:59:02, 36.96s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 261/1622 [2:40:46<13:58:21, 36.96s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 262/1622 [2:41:22<13:57:38, 36.95s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▌        | 263/1622 [2:41:58<13:56:57, 36.95s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▋        | 264/1622 [2:42:34<13:56:18, 36.95s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▋        | 265/1622 [2:43:11<13:55:37, 36.95s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▋        | 266/1622 [2:43:47<13:54:56, 36.94s/it, loss=0.313, v_num=3]



Epoch 0:  16%|█▋        | 267/1622 [2:44:23<13:54:16, 36.94s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 268/1622 [2:44:59<13:53:35, 36.94s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 269/1622 [2:45:35<13:52:55, 36.94s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 270/1622 [2:46:12<13:52:14, 36.93s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 271/1622 [2:46:48<13:51:34, 36.93s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 272/1622 [2:47:24<13:50:53, 36.93s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 273/1622 [2:48:00<13:50:12, 36.93s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 274/1622 [2:48:36<13:49:31, 36.92s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 275/1622 [2:49:13<13:48:51, 36.92s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 276/1622 [2:49:49<13:48:11, 36.92s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 277/1622 [2:50:25<13:47:29, 36.91s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 278/1622 [2:51:01<13:46:47, 36.91s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 279/1622 [2:51:37<13:46:06, 36.91s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 280/1622 [2:52:13<13:45:26, 36.90s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 281/1622 [2:52:49<13:44:47, 36.90s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 282/1622 [2:53:26<13:44:07, 36.90s/it, loss=0.313, v_num=3]



Epoch 0:  17%|█▋        | 283/1622 [2:54:02<13:43:26, 36.90s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 284/1622 [2:54:38<13:42:45, 36.89s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 285/1622 [2:55:14<13:42:05, 36.89s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 286/1622 [2:55:50<13:41:24, 36.89s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 287/1622 [2:56:26<13:40:45, 36.89s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 288/1622 [2:57:03<13:40:06, 36.89s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 289/1622 [2:57:40<13:39:28, 36.89s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 290/1622 [2:58:15<13:38:46, 36.88s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 291/1622 [2:58:52<13:38:07, 36.88s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 292/1622 [2:59:27<13:37:25, 36.88s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 293/1622 [3:00:03<13:36:45, 36.87s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 294/1622 [3:00:40<13:36:05, 36.87s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 295/1622 [3:01:16<13:35:25, 36.87s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 296/1622 [3:01:52<13:34:44, 36.87s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 297/1622 [3:02:28<13:34:05, 36.86s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 298/1622 [3:03:05<13:33:25, 36.86s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 299/1622 [3:03:41<13:32:45, 36.86s/it, loss=0.313, v_num=3]



Epoch 0:  18%|█▊        | 300/1622 [3:04:17<13:32:06, 36.86s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▊        | 301/1622 [3:04:53<13:31:27, 36.86s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▊        | 302/1622 [3:05:30<13:30:48, 36.85s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▊        | 303/1622 [3:06:06<13:30:09, 36.85s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▊        | 304/1622 [3:06:42<13:29:30, 36.85s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 305/1622 [3:07:19<13:28:51, 36.85s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 306/1622 [3:07:55<13:28:11, 36.85s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 307/1622 [3:08:31<13:27:31, 36.85s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 308/1622 [3:09:07<13:26:51, 36.84s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 309/1622 [3:09:44<13:26:13, 36.84s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 310/1622 [3:10:20<13:25:34, 36.84s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 311/1622 [3:10:56<13:24:55, 36.84s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 312/1622 [3:11:32<13:24:15, 36.84s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 313/1622 [3:12:09<13:23:36, 36.83s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 314/1622 [3:12:45<13:22:56, 36.83s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 315/1622 [3:13:21<13:22:18, 36.83s/it, loss=0.313, v_num=3]



Epoch 0:  19%|█▉        | 316/1622 [3:13:58<13:21:38, 36.83s/it, loss=0.313, v_num=3]



Epoch 0:  20%|█▉        | 317/1622 [3:14:34<13:20:59, 36.83s/it, loss=0.313, v_num=3]



Epoch 0:  20%|█▉        | 318/1622 [3:15:10<13:20:22, 36.83s/it, loss=0.313, v_num=3]



Epoch 0:  20%|█▉        | 319/1622 [3:15:47<13:19:42, 36.82s/it, loss=0.313, v_num=3]



Epoch 0:  20%|█▉        | 320/1622 [3:16:23<13:19:03, 36.82s/it, loss=0.313, v_num=3]



Epoch 0:  20%|█▉        | 321/1622 [3:16:59<13:18:23, 36.82s/it, loss=0.313, v_num=3]



Epoch 0:  20%|█▉        | 322/1622 [3:17:35<13:17:45, 36.82s/it, loss=0.313, v_num=3]



Epoch 0:  20%|█▉        | 323/1622 [3:18:12<13:17:06, 36.82s/it, loss=0.313, v_num=3]



Epoch 0:  20%|█▉        | 324/1622 [3:18:48<13:16:26, 36.82s/it, loss=0.313, v_num=3]



Epoch 0:  20%|██        | 325/1622 [3:19:24<13:15:47, 36.81s/it, loss=0.313, v_num=3]



Epoch 0:  20%|██        | 326/1622 [3:20:00<13:15:08, 36.81s/it, loss=0.313, v_num=3]



Epoch 0:  20%|██        | 327/1622 [3:20:36<13:14:28, 36.81s/it, loss=0.313, v_num=3]



Epoch 0:  20%|██        | 328/1622 [3:21:13<13:13:49, 36.81s/it, loss=0.313, v_num=3]



Epoch 0:  20%|██        | 329/1622 [3:21:49<13:13:11, 36.81s/it, loss=0.313, v_num=3]



Epoch 0:  20%|██        | 330/1622 [3:22:25<13:12:32, 36.81s/it, loss=0.313, v_num=3]



Epoch 0:  20%|██        | 331/1622 [3:23:02<13:11:54, 36.80s/it, loss=0.313, v_num=3]



Epoch 0:  20%|██        | 332/1622 [3:23:38<13:11:14, 36.80s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 333/1622 [3:24:14<13:10:35, 36.80s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 334/1622 [3:24:50<13:09:55, 36.80s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 335/1622 [3:25:26<13:09:17, 36.80s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 336/1622 [3:26:03<13:08:39, 36.80s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 337/1622 [3:26:39<13:08:00, 36.79s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 338/1622 [3:27:15<13:07:21, 36.79s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 339/1622 [3:27:52<13:06:43, 36.79s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 340/1622 [3:28:28<13:06:04, 36.79s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 341/1622 [3:29:04<13:05:25, 36.79s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 342/1622 [3:29:41<13:04:47, 36.79s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 343/1622 [3:30:16<13:04:06, 36.78s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██        | 344/1622 [3:30:53<13:03:29, 36.78s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██▏       | 345/1622 [3:31:29<13:02:51, 36.78s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██▏       | 346/1622 [3:32:06<13:02:12, 36.78s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██▏       | 347/1622 [3:32:42<13:01:32, 36.78s/it, loss=0.313, v_num=3]



Epoch 0:  21%|██▏       | 348/1622 [3:33:18<13:00:53, 36.78s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 349/1622 [3:33:54<13:00:15, 36.78s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 350/1622 [3:34:31<12:59:37, 36.77s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 351/1622 [3:35:07<12:58:58, 36.77s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 352/1622 [3:35:43<12:58:18, 36.77s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 353/1622 [3:36:19<12:57:39, 36.77s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 354/1622 [3:36:55<12:57:00, 36.77s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 355/1622 [3:37:31<12:56:22, 36.77s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 356/1622 [3:38:08<12:55:43, 36.76s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 357/1622 [3:38:43<12:55:03, 36.76s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 358/1622 [3:39:20<12:54:24, 36.76s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 359/1622 [3:39:56<12:53:46, 36.76s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 360/1622 [3:40:32<12:53:07, 36.76s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 361/1622 [3:41:08<12:52:28, 36.76s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 362/1622 [3:41:44<12:51:49, 36.75s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 363/1622 [3:42:21<12:51:11, 36.75s/it, loss=0.313, v_num=3]



Epoch 0:  22%|██▏       | 364/1622 [3:42:57<12:50:34, 36.75s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 365/1622 [3:43:34<12:49:55, 36.75s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 366/1622 [3:44:10<12:49:17, 36.75s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 367/1622 [3:44:46<12:48:38, 36.75s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 368/1622 [3:45:22<12:47:59, 36.75s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 369/1622 [3:45:58<12:47:20, 36.74s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 370/1622 [3:46:35<12:46:42, 36.74s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 371/1622 [3:47:11<12:46:04, 36.74s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 372/1622 [3:47:47<12:45:26, 36.74s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 373/1622 [3:48:23<12:44:48, 36.74s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 374/1622 [3:49:00<12:44:09, 36.74s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 375/1622 [3:49:36<12:43:30, 36.74s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 376/1622 [3:50:12<12:42:51, 36.73s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 377/1622 [3:50:48<12:42:13, 36.73s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 378/1622 [3:51:24<12:41:35, 36.73s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 379/1622 [3:52:01<12:40:56, 36.73s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 380/1622 [3:52:37<12:40:18, 36.73s/it, loss=0.313, v_num=3]



Epoch 0:  23%|██▎       | 381/1622 [3:53:13<12:39:40, 36.73s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▎       | 382/1622 [3:53:49<12:39:00, 36.73s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▎       | 383/1622 [3:54:25<12:38:20, 36.72s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▎       | 384/1622 [3:55:01<12:37:41, 36.72s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▎       | 385/1622 [3:55:37<12:37:02, 36.72s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 386/1622 [3:56:13<12:36:23, 36.72s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 387/1622 [3:56:49<12:35:44, 36.72s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 388/1622 [3:57:25<12:35:05, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 389/1622 [3:58:01<12:34:26, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 390/1622 [3:58:37<12:33:47, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 391/1622 [3:59:13<12:33:09, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 392/1622 [3:59:49<12:32:30, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 393/1622 [4:00:25<12:31:51, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 394/1622 [4:01:02<12:31:15, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 395/1622 [4:01:38<12:30:38, 36.71s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 396/1622 [4:02:14<12:29:59, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  24%|██▍       | 397/1622 [4:02:50<12:29:19, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  25%|██▍       | 398/1622 [4:03:27<12:28:42, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  25%|██▍       | 399/1622 [4:04:04<12:28:07, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  25%|██▍       | 400/1622 [4:04:41<12:27:33, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  25%|██▍       | 401/1622 [4:05:17<12:26:54, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  25%|██▍       | 402/1622 [4:05:54<12:26:16, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  25%|██▍       | 403/1622 [4:06:30<12:25:37, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  25%|██▍       | 404/1622 [4:07:06<12:24:58, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  25%|██▍       | 405/1622 [4:07:42<12:24:20, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  25%|██▌       | 406/1622 [4:08:18<12:23:42, 36.70s/it, loss=0.313, v_num=3]



Epoch 0:  25%|██▌       | 407/1622 [4:08:54<12:23:03, 36.69s/it, loss=0.315, v_num=3]



Epoch 0:  25%|██▌       | 408/1622 [4:09:30<12:22:23, 36.69s/it, loss=0.315, v_num=3]



Epoch 0:  25%|██▌       | 409/1622 [4:10:06<12:21:45, 36.69s/it, loss=0.315, v_num=3]



Epoch 0:  25%|██▌       | 410/1622 [4:10:42<12:21:06, 36.69s/it, loss=0.315, v_num=3]



Epoch 0:  25%|██▌       | 411/1622 [4:11:18<12:20:27, 36.69s/it, loss=0.315, v_num=3]



Epoch 0:  25%|██▌       | 412/1622 [4:11:54<12:19:48, 36.68s/it, loss=0.315, v_num=3]



Epoch 0:  25%|██▌       | 413/1622 [4:12:30<12:19:10, 36.68s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 414/1622 [4:13:06<12:18:32, 36.68s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 415/1622 [4:13:42<12:17:54, 36.68s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 416/1622 [4:14:19<12:17:16, 36.68s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 417/1622 [4:14:55<12:16:37, 36.68s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 418/1622 [4:15:30<12:15:59, 36.68s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 419/1622 [4:16:07<12:15:21, 36.68s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 420/1622 [4:16:43<12:14:42, 36.67s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 421/1622 [4:17:19<12:14:03, 36.67s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 422/1622 [4:17:55<12:13:25, 36.67s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 423/1622 [4:18:31<12:12:46, 36.67s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 424/1622 [4:19:07<12:12:07, 36.67s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▌       | 425/1622 [4:19:42<12:11:28, 36.67s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▋       | 426/1622 [4:20:18<12:10:49, 36.66s/it, loss=0.315, v_num=3]



Epoch 0:  26%|██▋       | 427/1622 [4:20:54<12:10:11, 36.66s/it, loss=0.313, v_num=3]



Epoch 0:  26%|██▋       | 428/1622 [4:21:30<12:09:33, 36.66s/it, loss=0.313, v_num=3]



Epoch 0:  26%|██▋       | 429/1622 [4:22:06<12:08:53, 36.66s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 430/1622 [4:22:42<12:08:15, 36.66s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 431/1622 [4:23:18<12:07:37, 36.66s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 432/1622 [4:23:55<12:07:00, 36.66s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 433/1622 [4:24:31<12:06:22, 36.65s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 434/1622 [4:25:07<12:05:43, 36.65s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 435/1622 [4:25:43<12:05:04, 36.65s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 436/1622 [4:26:19<12:04:27, 36.65s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 437/1622 [4:26:55<12:03:49, 36.65s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 438/1622 [4:27:31<12:03:11, 36.65s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 439/1622 [4:28:07<12:02:33, 36.65s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 440/1622 [4:28:43<12:01:54, 36.65s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 441/1622 [4:29:19<12:01:16, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 442/1622 [4:29:56<12:00:38, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 443/1622 [4:30:32<12:00:00, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 444/1622 [4:31:08<11:59:23, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 445/1622 [4:31:44<11:58:45, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:  27%|██▋       | 446/1622 [4:32:20<11:58:07, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 447/1622 [4:32:56<11:57:28, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 448/1622 [4:33:33<11:56:51, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 449/1622 [4:34:09<11:56:14, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 450/1622 [4:34:45<11:55:36, 36.64s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 451/1622 [4:35:21<11:54:58, 36.63s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 452/1622 [4:35:57<11:54:19, 36.63s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 453/1622 [4:36:34<11:53:42, 36.63s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 454/1622 [4:37:10<11:53:04, 36.63s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 455/1622 [4:37:46<11:52:26, 36.63s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 456/1622 [4:38:22<11:51:48, 36.63s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 457/1622 [4:38:58<11:51:09, 36.63s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 458/1622 [4:39:34<11:50:30, 36.62s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 459/1622 [4:40:10<11:49:52, 36.62s/it, loss=0.313, v_num=3]



Epoch 0:  28%|██▊       | 460/1622 [4:40:45<11:49:14, 36.62s/it, loss=0.315, v_num=3]



Epoch 0:  28%|██▊       | 461/1622 [4:41:21<11:48:35, 36.62s/it, loss=0.315, v_num=3]



Epoch 0:  28%|██▊       | 462/1622 [4:41:57<11:47:57, 36.62s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▊       | 463/1622 [4:42:33<11:47:19, 36.62s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▊       | 464/1622 [4:43:10<11:46:43, 36.62s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▊       | 465/1622 [4:43:46<11:46:06, 36.62s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▊       | 466/1622 [4:44:22<11:45:27, 36.62s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 467/1622 [4:44:58<11:44:49, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 468/1622 [4:45:34<11:44:11, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 469/1622 [4:46:11<11:43:33, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 470/1622 [4:46:47<11:42:56, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 471/1622 [4:47:23<11:42:19, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 472/1622 [4:47:59<11:41:41, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 473/1622 [4:48:35<11:41:03, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 474/1622 [4:49:12<11:40:27, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 475/1622 [4:49:49<11:39:50, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 476/1622 [4:50:25<11:39:13, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 477/1622 [4:51:03<11:38:38, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  29%|██▉       | 478/1622 [4:51:39<11:38:00, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  30%|██▉       | 479/1622 [4:52:15<11:37:23, 36.61s/it, loss=0.315, v_num=3]



Epoch 0:  30%|██▉       | 480/1622 [4:52:51<11:36:46, 36.61s/it, loss=0.313, v_num=3]



Epoch 0:  30%|██▉       | 481/1622 [4:53:27<11:36:08, 36.61s/it, loss=0.313, v_num=3]



Epoch 0:  30%|██▉       | 482/1622 [4:54:03<11:35:30, 36.61s/it, loss=0.313, v_num=3]



Epoch 0:  30%|██▉       | 483/1622 [4:54:39<11:34:52, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  30%|██▉       | 484/1622 [4:55:15<11:34:13, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  30%|██▉       | 485/1622 [4:55:51<11:33:35, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  30%|██▉       | 486/1622 [4:56:27<11:32:58, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  30%|███       | 487/1622 [4:57:03<11:32:20, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  30%|███       | 488/1622 [4:57:40<11:31:43, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  30%|███       | 489/1622 [4:58:16<11:31:06, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  30%|███       | 490/1622 [4:58:53<11:30:29, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  30%|███       | 491/1622 [4:59:29<11:29:51, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  30%|███       | 492/1622 [5:00:05<11:29:13, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  30%|███       | 493/1622 [5:00:41<11:28:37, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  30%|███       | 494/1622 [5:01:18<11:27:59, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 495/1622 [5:01:54<11:27:22, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 496/1622 [5:02:30<11:26:43, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 497/1622 [5:03:06<11:26:05, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 498/1622 [5:03:42<11:25:28, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 499/1622 [5:04:18<11:24:50, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 500/1622 [5:04:54<11:24:13, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 501/1622 [5:05:30<11:23:36, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 502/1622 [5:06:07<11:22:58, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 503/1622 [5:06:43<11:22:20, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 504/1622 [5:07:19<11:21:43, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 505/1622 [5:07:55<11:21:06, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███       | 506/1622 [5:08:32<11:20:29, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███▏      | 507/1622 [5:09:08<11:19:52, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███▏      | 508/1622 [5:09:44<11:19:14, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███▏      | 509/1622 [5:10:20<11:18:37, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  31%|███▏      | 510/1622 [5:10:57<11:18:00, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 511/1622 [5:11:33<11:17:22, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 512/1622 [5:12:09<11:16:45, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 513/1622 [5:12:46<11:16:08, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 514/1622 [5:13:22<11:15:31, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 515/1622 [5:13:58<11:14:54, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 516/1622 [5:14:34<11:14:16, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 517/1622 [5:15:11<11:13:39, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 518/1622 [5:15:47<11:13:03, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 519/1622 [5:16:24<11:12:25, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 520/1622 [5:17:00<11:11:48, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 521/1622 [5:17:36<11:11:10, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 522/1622 [5:18:12<11:10:33, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 523/1622 [5:18:48<11:09:56, 36.58s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 524/1622 [5:19:34<11:09:39, 36.59s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 525/1622 [5:20:17<11:09:15, 36.60s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 526/1622 [5:20:55<11:08:41, 36.61s/it, loss=0.313, v_num=3]



Epoch 0:  32%|███▏      | 527/1622 [5:21:34<11:08:10, 36.61s/it, loss=0.313, v_num=3]



Epoch 0:  33%|███▎      | 528/1622 [5:22:12<11:07:35, 36.61s/it, loss=0.313, v_num=3]



Epoch 0:  33%|███▎      | 529/1622 [5:22:49<11:07:01, 36.62s/it, loss=0.313, v_num=3]



Epoch 0:  33%|███▎      | 530/1622 [5:23:27<11:06:27, 36.62s/it, loss=0.313, v_num=3]



Epoch 0:  33%|███▎      | 531/1622 [5:24:05<11:05:52, 36.62s/it, loss=0.313, v_num=3]



Epoch 0:  33%|███▎      | 532/1622 [5:24:42<11:05:16, 36.62s/it, loss=0.313, v_num=3]



Epoch 0:  33%|███▎      | 533/1622 [5:25:17<11:04:37, 36.62s/it, loss=0.313, v_num=3]



### 토크나이저 비교

Mecab  
    https://i-am-wendy.tistory.com/27  
    https://velog.io/@kjyggg/%ED%98%95%ED%83%9C%EC%86%8C-%EB%B6%84%EC%84%9D%EA%B8%B0-Mecab-%EC%82%AC%EC%9A%A9%ED%95%98%EA%B8%B0-A-to-Z%EC%84%A4%EC%B9%98%EB%B6%80%ED%84%B0-%EB%8B%A8%EC%96%B4-%EC%9A%B0%EC%84%A0%EC%88%9C%EC%9C%84-%EB%93%B1%EB%A1%9D%EA%B9%8C%EC%A7%80

In [None]:
#result[0]을 예를들어보자.
prompt = news[0][0]
next_sentence = news[0][1]

print(prompt)
print(next_sentence)

 전남 목포시는 최근 이목이 집중되고 있는 근대역사문화공간 재생활성화 사업을 근대문화재 보존과 활용이라는 당초 취지대로 차질없이 추진하겠다고 밝혔다. 이 사업은 목포 원도심인 유달‧만호동 일대에 산재해 있는 근대건축물 등 문화유산
을 보존하고 보수‧정비하는 사업으로 금년부터 향후 5년간 총 사업비 500억원이 투입된다. 목포시는 원도심 일대의 근대경관을 회복하고 거주민 생활여건 개선과 관광인프라 확충 등을 통해 이 지역을 전국적인 근대 문화유산의 보고로 만


In [None]:
kb_tokenizer = AutoTokenizer.from_pretrained("klue/bert-base")
mk_tokenizer = AutoTokenizer.from_pretrained('monologg/kobert')
sk_tokenizer = AutoTokenizer.from_pretrained('snunlp/KR-Medium')
mc_tokenizer = MeCab.Tagger()

print(f'klue/bert-base(의미 단위 토큰화) => {len(kb_tokenizer.tokenize(prompt))}토큰')
print(kb_tokenizer.tokenize(prompt))
print()
print(f'monologg/kobert(띄어쓰기 단위 토큰화) => {len(mk_tokenizer.tokenize(prompt))}토큰')
print(mk_tokenizer.tokenize(prompt))
print()
print(f'snunlp/KR-Medium(의미 단위 토큰화) => {len(sk_tokenizer.tokenize(prompt))}토큰')
print(sk_tokenizer.tokenize(prompt))
print()
mc = mc_tokenizer.parse(prompt).split('\n')
print(f"mecab(의미 단위 토큰화) => {len(mc)}토큰")
print(mc)

#klue/bert-base의 토크나이저의 결과가 가장 좋아보임

klue/bert-base(의미 단위 토큰화) => 65토큰
['전남', '목포시', '##는', '최근', '이목', '##이', '집중', '##되', '##고', '있', '##는', '근대', '##역', '##사', '##문화', '##공간', '재생', '##활', '##성', '##화', '사업', '##을', '근대', '##문화재', '보존', '##과', '활용', '##이', '##라는', '당초', '취지', '##대로', '차질', '##없이', '추진', '##하', '##겠다', '##고', '밝혔', '##다', '.', '이', '사업', '##은', '목포', '원도', '##심', '##인', '유', '##달', '‧', '만호', '##동', '일대', '##에', '산재', '##해', '있', '##는', '근대', '##건', '##축', '##물', '등', '문화유산']

monologg/kobert(띄어쓰기 단위 토큰화) => 31토큰
['[UNK]', '[UNK]', '최근', '[UNK]', '[UNK]', '있는', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '.', '이', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '[UNK]', '있는', '[UNK]', '등', '[UNK]']

snunlp/KR-Medium(의미 단위 토큰화) => 64토큰
['전남', '목포', '##시는', '최근', '이', '##목이', '집중', '##되고', '있는', '근대', '##역', '##사', '##문화', '##공', '##간', '재', '##생활', '##성화', '사업을', '근대', '##문화', '##재', '보존', '##과', '활용', '##이라는', '당초', '취', '##지', '##대로'

In [None]:
model = TFBertForNextSentencePrediction.from_pretrained('klue/bert-base', from_pt=True)
kb_tokenizer = AutoTokenizer.from_pretrained("klue/bert-base")

encoding = kb_tokenizer(prompt, next_sentence, return_tensors='tf', padding=True)
encoding
#input_ids, token_type_ids, attention_mask가 각각 (1, 131) shape로 되어있음
#131 = 128자의 토큰 + [CLS], [SEP], [SEP] 인 것 같다...?

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForNextSentencePrediction: ['bert.embeddings.position_ids']
- This IS expected if you are initializing TFBertForNextSentencePrediction from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForNextSentencePrediction from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertForNextSentencePrediction were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForNextSentencePrediction for predictions without further training.


{'input_ids': <tf.Tensor: shape=(1, 67), dtype=int32, numpy=
array([[    2,  4997, 31441,  2259,  3744, 11086,  2052,  4154,  2496,
         2088,  1513,  2259,  5496,  2331,  2063,  6977, 29107,  6951,
         2424,  2047,  2267,  3639,  2069,  5496, 19611,  6628,  2145,
         3987,  2052, 23548,  6051,  5811,  3770,  8436,  4087,  3835,
         2205, 18395,  2088,  3705,  2062,    18,  1504,  3639,  2073,
         9312, 17638,  2361,  2179,  1490,  2448,   122, 31241,  2328,
         6394,  2170, 12389,  2097,  1513,  2259,  5496,  2332,  2174,
         2266,   886, 11319,     3]])>, 'token_type_ids': <tf.Tensor: shape=(1, 67), dtype=int32, numpy=
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0]])>, 'attention_mask': <tf.Tensor: shape=(1, 67), dtype=int32, numpy=
array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [None]:
#result[0]을 예를들어보자.
prompt = result[0][0]
next_sentence = result[0][1]
encoding = tokenizer(prompt, next_sentence, return_tensors='tf')

encoding
#input_ids, token_type_ids, attention_mask가 각각 (1, 131) shape로 되어있음
#131 = 128자의 글자 + 2(), 3(), + ...?

{'input_ids': <tf.Tensor: shape=(1, 131), dtype=int32, numpy=
array([[    2,  4997, 31441,  2259,  3744, 11086,  2052,  4154,  2496,
         2088,  1513,  2259,  5496,  2331,  2063,  6977, 29107,  6951,
         2424,  2047,  2267,  3639,  2069,  5496, 19611,  6628,  2145,
         3987,  2052, 23548,  6051,  5811,  3770,  8436,  4087,  3835,
         2205, 18395,  2088,  3705,  2062,    18,  1504,  3639,  2073,
         9312, 17638,  2361,  2179,  1490,  2448,   122, 31241,  2328,
         6394,  2170, 12389,  2097,  1513,  2259,  5496,  2332,  2174,
         2266,   886, 11319,     3,  1498,  6628, 19521,  4687,   122,
         5336,  2205,  2259,  3639,  6233, 17008,  3797,  4914,    25,
         2440,  2366,  1668,  7518,  4667,  2028,  2252,  2052,  5113,
         2897,  2062,    18, 31441,  2259, 17638,  2361,  6394,  2079,
         5496,  2382,  2133,  2069,  4585, 19521,  5884,  2114,  3799,
         2173,  2332,  4076,  2145,  4066,  2179,  5747,  7237,   886,
         2069, 

In [None]:
#kobert의 tokenizer를 사용하면?
from kobert_transformers.utils import get_tokenizer

to = get_tokenizer()
prompt = result[0][0]
next_sentence = result[0][1]
encoding = to(prompt, next_sentence, return_tensors='tf')

encoding
#shape이 왜 (1, 142)일까..?

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'BertTokenizer'. 
The class this function is called from is 'KoBertTokenizer'.


{'input_ids': <tf.Tensor: shape=(1, 142), dtype=int32, numpy=
array([[   2, 4018, 2068, 7728, 6705, 5760, 4525, 3647, 6217, 7096, 4389,
        5887, 3860, 1221, 5808, 6926, 6493, 6238, 5453, 3969, 6545, 6573,
        7941, 2610, 1221, 5808, 6238, 7191, 2355, 7264, 5468, 5146, 7103,
        1630, 4609, 5812, 4419, 6883, 4555, 7786, 2261,   54, 3647, 2609,
        7086, 2068, 7728, 3533, 5859, 6745, 7119, 3574, 5793,    0, 6150,
        7925, 5872, 3806, 6896, 2640, 7191, 7848, 3860, 1221, 5808, 5384,
        7463, 6241, 1815, 2132, 7063, 6516,    3,  517, 7088, 2355, 7264,
        7788, 2374,    0, 7227, 6441, 7794, 2609, 7078, 1235, 5717, 5036,
         611, 5713, 4512, 2609, 6441,  614, 6861, 7096, 4767, 5900,   54,
        2068, 7728, 6705, 5760, 3533, 5859, 6745, 3806, 7095, 1221, 5808,
        5424, 5474, 7088, 5155, 7788,  875, 6263, 2717, 6918,  843, 5468,
        1080, 7119, 7754, 5135, 1824, 4756, 3647, 4329, 7088, 4014, 7206,
        1221, 5808, 2132, 7063, 6516, 7095, 2358, 

In [None]:
logits = model(encoding['input_ids'], token_type_ids=encoding['token_type_ids'])[0]

logits

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[ 0.16655698, -0.5226557 ]], dtype=float32)>

In [None]:
softmax = tf.keras.layers.Softmax()
probs = softmax(logits)
print('최종 예측 레이블 :', tf.math.argmax(probs, axis=-1).numpy())

최종 예측 레이블 : [1]


In [None]:
train, test = train_test_split(result, test_size=0.1, random_state=2)

len(train), len(test)

(282, 32)

### Korpus 데이터로 학습이 잘되면, news_class9x13000 활용 예정

## 마무리
맞춤법 검사 : py-hanspell