# OUI

### package load 및 device 지정

In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd

In [2]:
from kobert import get_tokenizer
from kobert import get_pytorch_kobert_model

In [3]:
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

In [4]:
import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
#os.environ["CUDA_VISIBLE_DEVICES"] = "3"
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
#os.environ["TORCH_USE_CUDA_DSA"] = '1'
#device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
#torch.cuda.device_count()
device = torch.device("cuda:3")
print(device)

cuda:3


### 데이터세트 구성

In [81]:
train_dataset = nlp.data.TSVDataset(os.path.join("../dataset/train", "train.tsv"), num_discard_samples=1)
test_dataset = nlp.data.TSVDataset(os.path.join("../dataset/test", "valid.tsv"), num_discard_samples=1)

In [82]:
bertmodel, vocab = get_pytorch_kobert_model(cachedir=".cache")
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

using cached model. /home/j-j10a506/oui/KoBERT/.cache/kobert_v1.zip
using cached model. /home/j-j10a506/oui/KoBERT/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece
using cached model. /home/j-j10a506/oui/KoBERT/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [83]:
batch_size = 128
max_len = 100

In [84]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len,
                 pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair)

        self.sentences = []
        self.labels = []
        for i in dataset:
            if len(i)!=2:
                continue

            self.sentences.append(transform([i[sent_idx]]))
            self.labels.append(np.int32(i[label_idx]))
        

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))

    def __len__(self):
        return (len(self.labels))

In [85]:
data_train = BERTDataset(train_dataset, 0, 1, tok, max_len, True, False)
data_test = BERTDataset(test_dataset, 0, 1, tok, max_len, True, False)

In [86]:
train_dataloader = DataLoader(data_train, batch_size=batch_size, num_workers=5)
test_dataloader = DataLoader(data_test, batch_size=batch_size, num_workers=5)

### kobert load
* 하이퍼파라미터 조정
  * dropout 비율
  * learning rate
  * warmup_ratio
  * learning_rate
  * max_grad_norm

In [87]:
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=6,
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        else:
            out = pooler
        return self.classifier(out)

In [89]:
# 0.2282168316541426
model = BERTClassifier(bertmodel,  dr_rate=0.5).to(device)

### 학습
* best 모델 저장
* epoch 수: 최대 20회

In [90]:
## Setting parameters
warmup_ratio = 0.1 # 0.2503085518907766 # 학습률 warm up 비율로 학습률을 초기에 점진적으로 높이는데 사용
num_epochs = 10
max_grad_norm = 1 # Gradient clipping에 사용됨 
log_interval = 200
learning_rate = 5e-5 #4.4397570365495904e-05

In [91]:
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.009198865305723895},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

In [92]:
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()

In [93]:
t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)

In [94]:
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

In [95]:
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

In [96]:
best = 0.0
for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (token_ids, valid_length, segment_ids, label) in tqdm(enumerate(train_dataloader), total=len(train_dataloader)):
        optimizer.zero_grad()
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    model.eval()

    for batch_id, (token_ids, valid_length, segment_ids, label) in tqdm(enumerate(test_dataloader), total=len(test_dataloader)):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))
    if best <  (test_acc / (batch_id+1)):
        MODEL_PATH = "../myModel"
        torch.save(model.state_dict(), os.path.join(MODEL_PATH, "oui_20240327.pt"))
        best = test_acc / (batch_id+1)

  0%|          | 0/671 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.814565658569336 train acc 0.1640625
epoch 1 batch id 201 loss 1.4248244762420654 train acc 0.3113339552238806
epoch 1 batch id 401 loss 1.2490034103393555 train acc 0.3847023067331671
epoch 1 batch id 601 loss 1.0381593704223633 train acc 0.43924188851913476
epoch 1 train acc 0.4525079172876304


  0%|          | 0/168 [00:00<?, ?it/s]

epoch 1 test acc 0.5688887444960861


  0%|          | 0/671 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 1.1160815954208374 train acc 0.6015625
epoch 2 batch id 201 loss 1.0192915201187134 train acc 0.562383395522388
epoch 2 batch id 401 loss 1.1564719676971436 train acc 0.566104270573566
epoch 2 batch id 601 loss 0.9732457995414734 train acc 0.5743682404326124
epoch 2 train acc 0.5777352995967389


  0%|          | 0/168 [00:00<?, ?it/s]

epoch 2 test acc 0.587013438723092


  0%|          | 0/671 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 1.0886294841766357 train acc 0.5625
epoch 3 batch id 201 loss 0.9133548140525818 train acc 0.6084810323383084
epoch 3 batch id 401 loss 1.0644561052322388 train acc 0.611284289276808
epoch 3 batch id 601 loss 0.8846819996833801 train acc 0.6203852953410982
epoch 3 train acc 0.6230453340054353


  0%|          | 0/168 [00:00<?, ?it/s]

epoch 3 test acc 0.5813865786040443


  0%|          | 0/671 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 0.9813555479049683 train acc 0.59375
epoch 4 batch id 201 loss 0.7628929615020752 train acc 0.6520133706467661
epoch 4 batch id 401 loss 0.9633272886276245 train acc 0.6560162094763092
epoch 4 batch id 601 loss 0.7674992084503174 train acc 0.6667273294509152
epoch 4 train acc 0.6701511407469097


  0%|          | 0/168 [00:00<?, ?it/s]

epoch 4 test acc 0.5883269885437051


  0%|          | 0/671 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 0.8384386897087097 train acc 0.6640625
epoch 5 batch id 201 loss 0.655267596244812 train acc 0.7009483830845771
epoch 5 batch id 401 loss 0.9494495391845703 train acc 0.7059304862842892
epoch 5 batch id 601 loss 0.6989797353744507 train acc 0.7178010607321131
epoch 5 train acc 0.7211683341369335


  0%|          | 0/168 [00:00<?, ?it/s]

epoch 5 test acc 0.5772363472358121


  0%|          | 0/671 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 0.6963568925857544 train acc 0.765625
epoch 6 batch id 201 loss 0.5153539776802063 train acc 0.753731343283582
epoch 6 batch id 401 loss 0.7021451592445374 train acc 0.7560785536159601
epoch 6 batch id 601 loss 0.5945965647697449 train acc 0.7652870216306157
epoch 6 train acc 0.7679679966248795


  0%|          | 0/168 [00:00<?, ?it/s]

epoch 6 test acc 0.577689910510437


  0%|          | 0/671 [00:00<?, ?it/s]

epoch 7 batch id 1 loss 0.5908110737800598 train acc 0.8125
epoch 7 batch id 201 loss 0.3998739719390869 train acc 0.7939987562189055
epoch 7 batch id 401 loss 0.6436739563941956 train acc 0.7960567331670823
epoch 7 batch id 601 loss 0.45449110865592957 train acc 0.8043235232945092
epoch 7 train acc 0.8069489951345665


  0%|          | 0/168 [00:00<?, ?it/s]

epoch 7 test acc 0.5832001946754729


  0%|          | 0/671 [00:00<?, ?it/s]

epoch 8 batch id 1 loss 0.5446958541870117 train acc 0.7890625
epoch 8 batch id 201 loss 0.2681998312473297 train acc 0.8267257462686567
epoch 8 batch id 401 loss 0.47047412395477295 train acc 0.8288263715710723
epoch 8 batch id 601 loss 0.39722174406051636 train acc 0.8349885607321131
epoch 8 train acc 0.8364032228017884


  0%|          | 0/168 [00:00<?, ?it/s]

epoch 8 test acc 0.5851647861627528


  0%|          | 0/671 [00:00<?, ?it/s]

epoch 9 batch id 1 loss 0.46010541915893555 train acc 0.8046875
epoch 9 batch id 201 loss 0.25465598702430725 train acc 0.8528451492537313
epoch 9 batch id 401 loss 0.5026413798332214 train acc 0.851932668329177
epoch 9 batch id 601 loss 0.352687269449234 train acc 0.8553062603993344
epoch 9 train acc 0.8561032973174366


  0%|          | 0/168 [00:00<?, ?it/s]

epoch 9 test acc 0.5866643478881278


  0%|          | 0/671 [00:00<?, ?it/s]

epoch 10 batch id 1 loss 0.40991273522377014 train acc 0.859375
epoch 10 batch id 201 loss 0.2087116837501526 train acc 0.863339552238806
epoch 10 batch id 401 loss 0.47499048709869385 train acc 0.8633299563591023
epoch 10 batch id 601 loss 0.4044005274772644 train acc 0.8653156198003328
epoch 10 train acc 0.8662211251862891


  0%|          | 0/168 [00:00<?, ?it/s]

epoch 10 test acc 0.5862808575913242


In [46]:
# MODEL_PATH = "../myModel"
# torch.save(model.state_dict(), os.path.join(MODEL_PATH, "oui_20240327.pt"))

In [108]:
test_dataset = nlp.data.TSVDataset(os.path.join("../dataset/test", "test.tsv"), num_discard_samples=1)
data_test = BERTDataset(test_dataset, 0, 1, tok, max_len, True, False)
test_dataloader = DataLoader(data_test, batch_size=batch_size, num_workers=5)

In [111]:
model.eval()
test_acc = 0.0
for batch_id, (token_ids, valid_length, segment_ids, label) in tqdm(enumerate(test_dataloader), total=len(test_dataloader)):
    token_ids = token_ids.long().to(device)
    segment_ids = segment_ids.long().to(device)
    valid_length= valid_length
    label = label.long().to(device)
    out = model(token_ids, valid_length, segment_ids)
    test_acc += calc_accuracy(out, label)
print("test acc {}".format(test_acc/(batch_id+1)))

  0%|          | 0/94 [00:00<?, ?it/s]

55.044270833333336 94
test acc 0.5855773492907802


In [26]:
from konlpy.tag import Kkma

In [115]:
class Oui:
    def __init__(self, bertmodel, vocab, device, train_dataloader, test_dataloader, fixed, hp, log_interval, num_epochs ,batch_size=128, max_len=100):
        self.num_epochs = num_epochs
        self.log_interval = log_interval
        self.batch_size = batch_size
        self.max_len = max_len
    
        # Hyper-parameter random sampling
        params = dict()
        if not fixed:
            for name, value_info in hp.items():
                #print(value_info, len(value_info))
                if value_info[1] == 'max_grad_norm':
                    params[name] = np.random.choice(value_info[0])
                elif value_info[1] == 'int':
                    params[name] = np.random.randint(min(value_info[0]), max(value_info[0])+1)
                elif value_info[1] == 'float':
                    params[name] = np.random.uniform(min(value_info[0]), max(value_info[0]))
        else:
            params['dr_rate']=0.5 #0.3 #0.2282168316541426 #0.3
            params['learning_rate']=5e-5 #4.4397570365495904e-05 #5e-5
            params['max_grad_norm']=1 #3 #1
            params['warmup_ratio']=0.1 #0.2503085518907766 #0.1
            params['weight_decay']=0.01 #0.009198865305723895 #0.01
        
        self.params = params
        print(self.params)

        self.model=BERTClassifier(bertmodel,  dr_rate=params["dr_rate"]).to(device)
        ## weight decay 
        no_decay = ['bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': params["weight_decay"]},
            {'params': [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]

        ## optimizer & loss
        self.optimizer = AdamW(optimizer_grouped_parameters, lr=params["learning_rate"])
        self.loss_fn = nn.CrossEntropyLoss()

        ## learning rate scheduler
        self.train_dataloader = train_dataloader
        self.test_dataloader = test_dataloader
        t_total = len(self.train_dataloader) * num_epochs
        warmup_step = int(t_total * params["warmup_ratio"])
        self.scheduler = get_cosine_schedule_with_warmup(self.optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)

        # predict
        self.kkma = Kkma()
        tokenizer = get_tokenizer()
        self.tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)
        self.emotion_to_idx = {0:'분노', 1:'당황', 2:'슬픔', 3:'기쁨', 4:'불안', 5:"느긋"}
        self.idx_to_emotion = {"분노": 0, "당황":1, "슬픔":2, "기쁨":3, "불안":4, "느긋":5}    

    # 예측
    def predict(self, X, device):
        X_split = self.kkma.sentences(X)
        sentences = [[re.sub('[^a-zA-Z가-힣ㄱ-ㅎㅏ-ㅣ\s]','',x).replace(" ", ""), 0]  for x in X_split]
        print(sentences)
        data_test = BERTDataset(sentences, 0, 1, self.tok, self.max_len, True, False)
        test_dataloader = torch.utils.data.DataLoader(data_test, batch_size=self.batch_size, num_workers=5)

        self.model.eval()
        test_eval_list = []
        for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
            token_ids = token_ids.long().to(device)
            segment_ids = segment_ids.long().to(device)
    
            valid_length= valid_length
            label = label.long().to(device)
    
            out = self.model(token_ids, valid_length, segment_ids)

            test_eval=[]
            for i in out:
                logits=i
                logits = logits.detach().cpu().numpy()
                print(logits)
                
                test_eval.append(self.emotion_to_idx[np.argmax(logits)])
                print(test_eval)
            test_eval_list.append(test_eval)
        return test_eval_list

    # 모델 load
    def load(self, path, device, bertmodel):
        self.model = BERTClassifier(bertmodel,  dr_rate=0.2282168316541426).to(device)
        model_state_dict = torch.load(path, map_location=device)
        self.model.load_state_dict(model_state_dict)
        

    # 정확도 계산
    def calc_accuracy(self, X,y):
        max_vals, max_indices = torch.max(X, 1)
        train_acc = (max_indices == y).sum().data.cpu().numpy()/max_indices.size()[0]
        return train_acc

    # 학습
    def train(self, device):
        self.train_verbose_dict = {}
        self.train_score_dict = {}
        self.test_score_dict = {}
        self.params_list = []

        max_acc = 0
        self.be = 0
        self.bestmodel = self.model
        print(self.params)
        for e in range(self.num_epochs):
            train_acc = 0.0
            test_acc = 0.0

            train_loss = []
            train_scores = []
            self.model.train()
            for batch_id, (token_ids, valid_length, segment_ids, label) in tqdm(enumerate(self.train_dataloader), total=len(self.train_dataloader)):
                self.optimizer.zero_grad()
                token_ids = token_ids.long().to(device)
                segment_ids = segment_ids.long().to(device)
                valid_length= valid_length
                label = label.long().to(device)
                
                out = self.model(token_ids, valid_length, segment_ids)
                loss = self.loss_fn(out, label)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.params["max_grad_norm"])
                
                self.optimizer.step()
                self.scheduler.step()  # Update learning rate schedule
                train_acc += self.calc_accuracy(out, label)
                
                if batch_id % self.log_interval == 0:
                    print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
                    train_loss.append(loss.data.cpu().numpy())
                    train_scores.append(train_acc / (batch_id+1))
            
            print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
            self.train_score_dict[e] = train_acc / (batch_id+1)
            self.train_verbose_dict[e] = (train_loss, train_scores) 
            
            self.model.eval()
            for batch_id, (token_ids, valid_length, segment_ids, label) in tqdm(enumerate(self.test_dataloader), total=len(self.test_dataloader)):
                token_ids = token_ids.long().to(device)
                segment_ids = segment_ids.long().to(device)
                valid_length= valid_length
                label = label.long().to(device)
                
                out = self.model(token_ids, valid_length, segment_ids)
                test_acc += self.calc_accuracy(out, label)
            
            print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))
            self.test_score_dict[e] = test_acc / (batch_id+1)
            if max_acc < self.test_score_dict[e]:
                max_acc = self.test_score_dict[e]
                self.bestmodel = self.model
                self.be = e
                
        return self.bestmodel, self.train_score_dict[self.be], self.test_score_dict[self.be]    

In [80]:
config = {
    'hp': {
        'dr_rate': ([1e-1, 5e-1], 'float'),
        'learning_rate': ([1e-5, 1e-3], 'float'),
        'max_grad_norm': ([1, 3, 5], 'max_grad_norm'),
        'warmup_ratio': ([1e-2, 3e-1], 'float'),
        'weight_decay': ([1e-4, 1e-2] , 'float'),
    },
    'log_interval': 200,
    'num_epochs': 10,
}

In [68]:
import re
import os
os.environ['JAVA_HOME'] = '/home/j-j10a506/.jdk/jdk8u402-b06'
os.environ['PATH'] = f"{os.environ.get('PATH')}:{os.environ.get('JAVA_HOME')}/bin"

In [69]:
oui = Oui(bertmodel, vocab, device, train_dataloader, test_dataloader, True, **config)
oui.load("../myModel/oui_20240327.pt", device, bertmodel)

{'dr_rate': 0.2282168316541426, 'learning_rate': 4.4397570365495904e-05, 'max_grad_norm': 3, 'warmup_ratio': 0.2503085518907766, 'weight_decay': 0.009198865305723895}
using cached model. /home/j-j10a506/oui/KoBERT/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [70]:
oui.predict("아 시끄러워. 소리 좀 줄였으면 좋겠다. 나 귀 안먹었다!!!!!!!!!!!!!!!!!!!!!!")

[['아시끄러워소리좀줄였으면좋겠다', 0], ['나귀안먹었다', 0]]
[ 0.93896294  0.26407677 -0.01857913  0.82859117 -0.20105541 -1.5416832 ]
['분노']
[ 1.4599817   0.11218978  0.78544724  0.5301367  -0.41732517 -2.646504  ]
['분노', '분노']


[['분노', '분노']]

In [71]:
oui.predict("오늘 떡볶이를 먹었는데 맛있었다!")

[['오늘떡볶이를먹었는데맛있었다', 0]]
[-1.6993369 -1.4869488 -1.2186148  2.990341  -1.2543203  3.6374328]
['느긋']


[['느긋']]

In [72]:
oui.predict("그래도 이정도면 성공했다...") #  {0:'분노', 1:'당황', 2:'슬픔', 3:'기쁨', 4:'불안', 5:"느긋"}

[['그래도이정도면성공했다', 0]]
[-0.00164447  0.12928875  1.0824149  -1.103029   -0.4248172   0.00994094]
['슬픔']


[['슬픔']]

In [73]:
oui.predict("오늘은 푹 쉬었다 기분이 좋다")

[['오늘은푹쉬었다기분이좋다', 0]]
[-1.7089108 -1.6795552 -1.1885542  4.944749  -1.527644   2.3833554]
['기쁨']


[['기쁨']]

In [74]:
oui.predict("오늘은 푹 쉬었다. 기분이 좋다.")

[['오늘은푹쉬었다', 0], ['기분이좋다', 0]]
[-1.4346611 -1.58216   -1.0334294  5.3622437 -1.4215934  1.2479757]
['기쁨']
[-1.2587606 -1.5102684 -1.4196774  5.9655056 -1.3495058  0.6854899]
['기쁨', '기쁨']


[['기쁨', '기쁨']]

In [75]:
oui.predict("이번주만 버티면 끝나 힘내자!!!")

[['이번주만버티면끝나힘내자', 0]]
[-0.70630693 -1.039113    1.2975603   1.0058483  -0.63473684  0.66562396]
['슬픔']


[['슬픔']]

In [76]:
oui.predict("넌 최고야아아아아아아")

[['넌최고야아아아아아아', 0]]
[-1.3186584 -1.5941619 -1.4129112  5.661271  -1.559342   1.4824171]
['기쁨']


[['기쁨']]

In [77]:
oui.predict("재택근무. '재즈'. '작업'. '카페'. '매장'. '독서'. '힐링'. '잠들기전'. '사무실'. '커피'. '뉴에이지'. '명상'. '재즈'. '낭만'")

[['재택근무재즈작업카페매장독서힐링잠들기전사무실커피뉴에이지명상재즈낭만', 0]]
[-1.0881397 -1.3204321 -1.3050059  5.7505355 -1.1264142  0.1805162]
['기쁨']


[['기쁨']]

In [78]:
oui.predict("떠나는 길에 니가 내게 말했지 너는 바라는 게 너무나 많아 잠깐이라도 널 안 바라보면 머리에 불이 나버린다니까 나는 흐르려는 눈물을 참고 하려던 얘길 어렵게 누르고 그래 미안해라는 한 마디로 너랑 나눈 날들 마무리했었지 달디달고 달디달고 달디단 밤양갱 밤양갱 내가 먹고 싶었던 건 달디단 밤양갱 밤양갱이야 떠나는 길에 니가 내게 말했지 너는 바라는 게 너무나 많아 아냐 내가 늘 바란 건 하나야 한 개뿐이야 달디단 밤양갱 달디달고 달디달고 달디단 밤양갱 밤양갱 내가 먹고 싶었던 건 달디단 밤양갱 밤양갱이야")

[['떠나는길에니가내게말했지너는바라는게너무나많아잠깐이라도널안바라보면머리에불이나버린다니까', 0], ['나는흐르려는눈물을참고하려던얘길어렵게누르고그래미안해라는한마디로너랑나눈날들마무리했었지달디달고달디달고달디단밤양갱밤양갱내가먹고싶었던건달디단밤양갱밤양갱이야떠나는길에니가내게말했지너는바라는게너무나많아아냐내가늘바란건하나야한개뿐이야달디단밤양갱달디달고달디달고달디단밤양갱밤양갱내가먹고싶었던건달디단밤양갱밤양갱이야', 0]]
[ 2.7328718   0.56082225  1.140427   -3.4001148   1.1493524  -3.031311  ]
['분노']
[ 0.40829292  1.291262    2.586315   -2.1231759  -0.58100814 -1.5883467 ]
['분노', '슬픔']


[['분노', '슬픔']]

In [97]:
oui.predict("오빠 나 완성했어!!!!!!! 대박이지?!?!?")

[['오빠나완성했어', 0], ['대박이지', 0]]
[-0.85560447 -0.38656846  0.3335202   0.9191572  -0.990793    1.2560804 ]
['느긋']
[-1.3730785 -1.5239209 -1.1816858  5.1094117 -1.2090582  1.4085342]
['느긋', '기쁨']


[['느긋', '기쁨']]

In [112]:
oui.predict("비온 뒤 무지개!")

[['비온뒤무지개', 0]]
[-0.86147183 -0.7022993  -0.50529355  3.856172   -0.38319296 -0.65039045]
['기쁨']


[['기쁨']]

In [116]:
oui = Oui(bertmodel, vocab, device, train_dataloader, test_dataloader, True, **config)
oui.load("../myModel/oui_20240327.pt", "cpu", bertmodel)

{'dr_rate': 0.5, 'learning_rate': 5e-05, 'max_grad_norm': 1, 'warmup_ratio': 0.1, 'weight_decay': 0.01}
using cached model. /home/j-j10a506/oui/KoBERT/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [117]:
oui.predict("비온 뒤 무지개!", "cpu")

[['비온뒤무지개', 0]]
[-0.99661195 -0.8946776  -0.35201007  5.1848445  -0.70639396 -0.67166615]
['기쁨']


[['기쁨']]

In [118]:
oui.predict("떠나는 길에 니가 내게 말했지 너는 바라는 게 너무나 많아 잠깐이라도 널 안 바라보면 머리에 불이 나버린다니까 나는 흐르려는 눈물을 참고 하려던 얘길 어렵게 누르고 그래 미안해라는 한 마디로 너랑 나눈 날들 마무리했었지 달디달고 달디달고 달디단 밤양갱 밤양갱 내가 먹고 싶었던 건 달디단 밤양갱 밤양갱이야 떠나는 길에 니가 내게 말했지 너는 바라는 게 너무나 많아 아냐 내가 늘 바란 건 하나야 한 개뿐이야 달디단 밤양갱 달디달고 달디달고 달디단 밤양갱 밤양갱 내가 먹고 싶었던 건 달디단 밤양갱 밤양갱이야", "cpu")

[['떠나는길에니가내게말했지너는바라는게너무나많아잠깐이라도널안바라보면머리에불이나버린다니까', 0], ['나는흐르려는눈물을참고하려던얘길어렵게누르고그래미안해라는한마디로너랑나눈날들마무리했었지달디달고달디달고달디단밤양갱밤양갱내가먹고싶었던건달디단밤양갱밤양갱이야떠나는길에니가내게말했지너는바라는게너무나많아아냐내가늘바란건하나야한개뿐이야달디단밤양갱달디달고달디달고달디단밤양갱밤양갱내가먹고싶었던건달디단밤양갱밤양갱이야', 0]]
[ 2.5409856   0.15911539  1.4782271  -2.996636    1.7618265  -3.246282  ]
['분노']
[-0.19877008 -0.8851231   0.26179868  2.67469    -1.406069    0.51363176]
['분노', '기쁨']


[['분노', '기쁨']]