# OUI

### package load 및 device 지정

In [168]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
from konlpy.tag import Kkma

In [169]:
from kobert import get_tokenizer
from kobert import get_pytorch_kobert_model

In [170]:
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

In [171]:
import re
import os
os.environ['JAVA_HOME'] = '/home/j-j10a506/.jdk/jdk8u402-b06'
os.environ['PATH'] = f"{os.environ.get('PATH')}:{os.environ.get('JAVA_HOME')}/bin"

In [172]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
device = torch.device("cuda:3")
print(device)

cuda:3


### 데이터세트 구성

In [173]:
class BERTDataset(Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len,
                 pad, pair):
        transform = nlp.data.BERTSentenceTransform(
            bert_tokenizer, max_seq_length=max_len, pad=pad, pair=pair)

        self.sentences = []
        self.labels = []
        for i in dataset:
            if len(i)!=2:
                continue

            self.sentences.append(transform([i[sent_idx]]))
            self.labels.append(np.int32(i[label_idx]))
        

    def __getitem__(self, i):
        return (self.sentences[i] + (self.labels[i], ))

    def __len__(self):
        return (len(self.labels))

In [259]:
class BERTClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes=6,
                 dr_rate=None,
                 params=None):
        super(BERTClassifier, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate
                 
        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p=dr_rate)
    
    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):

        print(token_ids.shape)
        print(valid_length.shape)
        print(segment_ids)
        attention_mask = self.gen_attention_mask(token_ids, valid_length)
        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device))
        if self.dr_rate:
            out = self.dropout(pooler)
        else:
            out = pooler
        return self.classifier(out)

In [260]:
class Oui:
    def __init__(self, data_path, nc, batch_size=128, max_len=100):
        self.data_path = data_path
        self.batch_size = batch_size

        # pretrained model & tokenizer load
        self.bertmodel, vocab = get_pytorch_kobert_model(cachedir=".cache")
        tokenizer = get_tokenizer()
        self.tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)
        self.model = None
        self.max_len = max_len
        
        # dataset 로드
        self.__load_data__(data_path, batch_size, max_len, self.tok)

        # predict
        self.kkma = Kkma()
        self.emotion_to_idx = {0:'분노', 1:'당황', 2:'슬픔', 3:'기쁨', 4:'불안', 5:"느긋"}
        self.idx_to_emotion = {"분노": 0, "당황":1, "슬픔":2, "기쁨":3, "불안":4, "느긋":5}

    '''데이터 로드'''
    def __load_data__(self, data_path, batch_size, max_len, tok):
        train_dataset = nlp.data.TSVDataset(os.path.join(data_path, "train/train.tsv"), num_discard_samples=1)
        valid_dataset = nlp.data.TSVDataset(os.path.join(data_path, "test/valid.tsv"), num_discard_samples=1)
        test_dataset = nlp.data.TSVDataset(os.path.join(data_path, "test/test.tsv"), num_discard_samples=1)

        data_train = BERTDataset(train_dataset, 0, 1, tok, max_len, True, False)
        data_valid = BERTDataset(valid_dataset, 0, 1, tok, max_len, True, False)
        data_test = BERTDataset(test_dataset, 0, 1, tok, max_len, True, False)
        
        self.train_dataloader = DataLoader(data_train, batch_size=batch_size, num_workers=5)
        self.valid_dataloader = DataLoader(data_valid, batch_size=batch_size, num_workers=5)
        self.test_dataloader = DataLoader(data_test, batch_size=batch_size, num_workers=5)
        

    '''하이퍼파라미터 튜닝'''
    def __tune_parameters__(self, parameters):
        params = dict()
        for name, value_info in parameters.items():
            if value_info[1] == 'max_grad_norm':
                params[name] = np.random.choice(value_info[0])
            elif value_info[1] == 'int':
                params[name] = np.random.randint(min(value_info[0]), max(value_info[0])+1)
            elif value_info[1] == 'float':
                params[name] = np.random.uniform(min(value_info[0]), max(value_info[0]))
        return params

    '''모델 로드'''
    def __load_model__(self, dr_rate=0.5, lr=5e-5, wr=0.1, wd=0.01, device="cpu", num_epochs=10):
        if not self.model:
            self.model = BERTClassifier(self.bertmodel, dr_rate=dr_rate).to(device)
            no_decay = ['bias', 'LayerNorm.weight']
            optimizer_grouped_parameters = [
                {'params': [p for n, p in self.model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': wd},
                {'params': [p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
            ]

            ## optimizer & loss
            self.optimizer = AdamW(optimizer_grouped_parameters, lr=lr)
            self.loss_fn = nn.CrossEntropyLoss()

            t_total = len(self.train_dataloader) * num_epochs
            warmup_step = int(t_total * wr)
            self.scheduler = get_cosine_schedule_with_warmup(self.optimizer, num_warmup_steps=warmup_step, num_training_steps=t_total)
        return self.model
    
    '''학습 함수'''
    def train(self,export_path, parameters=None, log_interval=200, num_epochs=10, device="cpu"):
        params = dict()
        if parameters:         # 하이퍼파라미터 튜닝
            params = self.__tune_parameters__(parameters)
        else:
            params['dr_rate']=0.5
            params['learning_rate']=5e-5 
            params['max_grad_norm']=1
            params['warmup_ratio']=0.1
            params['weight_decay']=0.01

        self.model = self.__load_model__(params['dr_rate'], params['learning_rate'], params['warmup_ratio'], params["weight_decay"], device)

        max_acc = 0.0
        best_epoch = 0
        for e in range(self.num_epochs):
            train_acc = 0.0
            test_acc = 0.0

            # 학습
            self.model.train()
            for batch_id, (token_ids, valid_length, segment_ids, label) in tqdm(enumerate(self.train_dataloader), total=len(self.train_dataloader)):
                self.optimizer.zero_grad()
                token_ids = token_ids.long().to(device)
                segment_ids = segment_ids.long().to(device)
                valid_length= valid_length
                label = label.long().to(device)
                
                out = self.model(token_ids, valid_length, segment_ids)
                loss = self.loss_fn(out, label)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.params["max_grad_norm"])
                
                self.optimizer.step()
                self.scheduler.step()  # Update learning rate schedule
                train_acc += self.__cal_acc__(out, label)
                
                if batch_id % self.log_interval == 0:
                    print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))

            train_acc =  train_acc/(batch_id+1)
            print("epoch {} train acc {}".format(e+1,train_acc))

            # 평가
            self.model.eval()
            for batch_id, (token_ids, valid_length, segment_ids, label) in tqdm(enumerate(self.valid_dataloader), total=len(self.valid_dataloader)):
                token_ids = token_ids.long().to(device)
                segment_ids = segment_ids.long().to(device)
                valid_length= valid_length
                label = label.long().to(device)
                
                out = self.model(token_ids, valid_length, segment_ids)
                valid_acc += self.__cal_acc__(out, label)

            valid_acc = valid_acc/(batch_id+1)
            print("epoch {} test acc {}".format(e+1, valid_acc))

            # best acc 모델 저장
            if max_acc < valid_acc: 
                max_acc = valid_acc
                best_epoch = e
                self.export_model(export_path, "best_{}_epoch{}_acc{}.pt".format(self.__today__formatted__(),best_epoch,max_acc*100//100))
        

    # '''예측 함수'''
    def predict(self, X, device="cpu"):
        #X_split = self.kkma.sentences(X)
        X_split = [X]
        sentences = [[re.sub('[^a-zA-Z가-힣ㄱ-ㅎㅏ-ㅣ\s]','',x).replace(" ", ""), 0]  for x in X_split]
        data_test = BERTDataset(sentences, 0, 1, self.tok, self.max_len, True, False)
        test_dataloader = torch.utils.data.DataLoader(data_test, batch_size=self.batch_size, num_workers=5)

        self.model = self.__load_model__(device)
        self.model.eval()
        test_eval_list = []
        for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
            token_ids = token_ids.long().to(device)
            segment_ids = segment_ids.long().to(device)
    
            valid_length= valid_length
            label = label.long().to(device)
    
            out = self.model(token_ids, valid_length, segment_ids)

            test_eval=[]
            for i in out:
                logits=i
                logits = logits.detach().cpu().numpy()
                
                positive_indices = np.where(logits > 0.5)[0]
                positive_values = logits[positive_indices]
                sorted_indices = positive_indices[np.argsort(positive_values)[::-1]]
                emotions = list(map(lambda x: self.emotion_to_idx[x], sorted_indices))
                test_eval.append((logits, emotions))
                #test_eval.append((logits,self.emotion_to_idx[np.argmax(logits)]))
            test_eval_list.append(test_eval)
        return test_eval_list

    '''정확도 계산'''
    def __cal_acc__(self, X, y):
        max_vals, max_indices = torch.max(X, 1)
        train_acc = (max_indices == y).sum().data.cpu().numpy()/max_indices.size()[0]
        return train_acc

    '''모델 저장'''
    def export_model(self, export_path, export_name):
        torch.save(model.state_dict(), os.path.join(export_path, export_name))
        print("************************************모델 저장************************************************")        

    '''모델 로드'''
    def import_model(self, path, device):
        self.model = self.__load_model__(device=device)
        model_state_dict = torch.load(path, map_location=device)
        self.model.load_state_dict(model_state_dict)
        print("************************************모델 로드************************************************")        

    ''' 날짜 포맷'''
    def __today__formatted__(self):
        today = datetime.datetime.today()
        return str(today.year) + str(today.month) + str(today.day)

In [261]:
config = {
    'parameters': {
        'dr_rate': ([1e-1, 5e-1], 'float'),
        'learning_rate': ([1e-5, 1e-3], 'float'),
        'max_grad_norm': ([1, 3, 5], 'max_grad_norm'),
        'warmup_ratio': ([1e-2, 3e-1], 'float'),
        'weight_decay': ([1e-4, 1e-2] , 'float'),
    },
    'log_interval': 200,
    'num_epochs': 10,
}

In [262]:
oui = Oui("../dataset", 6)
oui.import_model("../myModel/oui_20240327_acc59.pt", "cpu")

using cached model. /home/j-j10a506/oui/KoBERT/.cache/kobert_v1.zip
using cached model. /home/j-j10a506/oui/KoBERT/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece
using cached model. /home/j-j10a506/oui/KoBERT/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece
************************************모델 로드************************************************


In [263]:
oui.predict("오늘 떡볶이를 먹었는데 맛있었다!")

torch.Size([1, 100])
torch.Size([1])
tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0]])


[[(array([-1.6993368, -1.486949 , -1.218615 ,  2.9903407, -1.2543204,
           3.6374338], dtype=float32),
   ['느긋', '기쁨'])]]

In [264]:
oui.predict("그래도 이정도면 성공했다...")

torch.Size([1, 100])
torch.Size([1])
tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0]])


[[(array([-0.00164383,  0.12928936,  1.0824162 , -1.1030301 , -0.42481732,
           0.00993926], dtype=float32),
   ['슬픔'])]]

In [265]:
oui.predict("오늘은 푹 쉬었다. 기분이 좋다.")

torch.Size([1, 100])
torch.Size([1])
tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0]])


[[(array([-1.7089108, -1.6795555, -1.1885538,  4.9447465, -1.5276444,
           2.3833578], dtype=float32),
   ['기쁨', '느긋'])]]

In [180]:
oui.predict("다음주에 끝나!!! 힘내자!!!")

[[(array([-1.0346911 , -1.3176281 , -0.0993569 ,  4.19938   , -1.249438  ,
           0.57501316], dtype=float32),
   ['기쁨', '느긋'])]]

In [181]:
oui.predict("넌 최고야아아아아아아")

[[(array([-1.3186585, -1.5941617, -1.4129113,  5.6612716, -1.559342 ,
           1.4824166], dtype=float32),
   ['기쁨', '느긋'])]]

In [182]:
oui.predict("재택근무. '재즈'. '작업'. '카페'. '매장'. '독서'. '힐링'. '잠들기전'. '사무실'. '커피'. '뉴에이지'. '명상'. '재즈'. '낭만'")

[[(array([-1.0881395 , -1.3204317 , -1.3050059 ,  5.750535  , -1.1264136 ,
           0.18051589], dtype=float32),
   ['기쁨'])]]

In [183]:
oui.predict("떠나는 길에 니가 내게 말했지 너는 바라는 게 너무나 많아 잠깐이라도 널 안 바라보면 머리에 불이 나버린다니까 나는 흐르려는 눈물을 참고 하려던 얘길 어렵게 누르고 그래 미안해라는 한 마디로 너랑 나눈 날들 마무리했었지 달디달고 달디달고 달디단 밤양갱 밤양갱 내가 먹고 싶었던 건 달디단 밤양갱 밤양갱이야 떠나는 길에 니가 내게 말했지 너는 바라는 게 너무나 많아 아냐 내가 늘 바란 건 하나야 한 개뿐이야 달디단 밤양갱 달디달고 달디달고 달디단 밤양갱 밤양갱 내가 먹고 싶었던 건 달디단 밤양갱 밤양갱이야")

[[(array([ 0.4595502 ,  1.7228955 ,  1.844803  , -2.0612774 , -0.45642117,
          -1.3379616 ], dtype=float32),
   ['슬픔', '당황'])]]

In [184]:
oui.predict("오빠 나 완성했어!!!!!!! 대박이지?!?!?")

[[(array([-1.4889722, -1.4272162, -0.7195817,  3.7282648, -1.5424592,
           2.2768848], dtype=float32),
   ['기쁨', '느긋'])]]

In [185]:
oui.predict("비온 뒤 무지개!")

[[(array([-0.8614713 , -0.70230097, -0.50529486,  3.8561769 , -0.383195  ,
          -0.6503905 ], dtype=float32),
   ['기쁨'])]]

In [186]:
oui.predict("나 너무 화가 나 으악 이거 왜 이래")

[[(array([ 3.4871686 ,  0.03877295,  1.159696  , -2.6744626 , -0.11092299,
          -3.0513337 ], dtype=float32),
   ['분노', '슬픔'])]]

In [187]:
oui.predict("유경언니 잘가 고생해썽!!! 내일 봐아아아")

[[(array([-0.24827906,  0.06239401,  0.46597284,  1.6907028 , -0.7146384 ,
          -0.7499463 ], dtype=float32),
   ['기쁨'])]]

In [188]:
oui.predict("오늘 너무 피곤해 진짜 힘들었어... 나는 왜 이럴까")

[[(array([ 2.2673976,  0.9862835,  2.077407 , -3.8068776,  0.3757995,
          -2.9449425], dtype=float32),
   ['분노', '슬픔', '당황'])]]

In [189]:
oui.predict("너무 어렵다 이문제...")

[[(array([ 0.69671607,  0.98253715,  2.342915  , -3.6035538 ,  1.462371  ,
          -2.5276515 ], dtype=float32),
   ['슬픔', '불안', '당황', '분노'])]]

In [192]:
oui.predict("에러가 너무 많이 터진다. 할일이 너무 많다")

[[(array([ 2.1695783 ,  0.4398461 ,  0.93120855, -2.4109993 ,  1.3664936 ,
          -3.0241697 ], dtype=float32),
   ['분노', '불안', '슬픔'])]]

In [251]:
oui.predict("오늘 엄마랑 다이소에 갔다. 사고 싶은게 너무 많은데 못사서 슬펐다. 그래도 엄마랑 같이 다이소 구경해서 좋았다.")

torch.Size([1, 100])
torch.Size([1])
torch.Size([1, 100])


[[(array([-0.9345543 , -0.31555843,  1.2597854 ,  0.83889073, -1.3189985 ,
           0.7683024 ], dtype=float32),
   ['슬픔', '기쁨', '느긋'])]]

# OpenVino로 변환
* pt -> onnx -> openvino

### onnx 변환

In [246]:
print(oui.model)

BERTClassifier(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(8002, 768, padding_idx=1)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_aff

In [247]:
import torch.onnx

In [267]:
token_ids = torch.randint(0, 20, (1, 100)).to("cpu")
valid_length = torch.randint(10, 21, (1,)).to("cpu")
segment_ids = torch.zeros(1, 100).to("cpu")

In [275]:
torch.onnx.export(oui.model,               
                  (token_ids,valid_length,segment_ids),                   
                  "../myModel/oui_240328.onnx",   
                  export_params=True,       
                  input_names = ['input'],  
                  output_names = ['output'], opset_version=11)

torch.Size([1, 100])
torch.Size([1])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.]])


  for i, v in enumerate(valid_length):


In [276]:
import onnx

onnx_model = onnx.load("../myModel/oui_240328.onnx")
onnx.checker.check_model(onnx_model)

In [286]:
import onnxruntime
bertmodel, vocab = get_pytorch_kobert_model(cachedir=".cache")
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)
max_len=100
batch_size=128

using cached model. /home/j-j10a506/oui/KoBERT/.cache/kobert_v1.zip
using cached model. /home/j-j10a506/oui/KoBERT/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece
using cached model. /home/j-j10a506/oui/KoBERT/.cache/kobert_news_wiki_ko_cased-1087f8699e.spiece


In [376]:
batch_size=128
kkma = Kkma()

In [384]:
 def onnx_predict(X, device="cpu"):
    #X_split = self.kkma.sentences(X)
    emotion_to_idx = {0:'분노', 1:'당황', 2:'슬픔', 3:'기쁨', 4:'불안', 5:"느긋"}
    X_split = [X]
    sentences = [[re.sub('[^a-zA-Z가-힣ㄱ-ㅎㅏ-ㅣ\s]','',x).replace(" ", ""), 0]  for x in X_split]
    data_test = BERTDataset(sentences, 0, 1, tok, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(data_test, batch_size=batch_size, num_workers=5)
    test_eval_list = []
    model = onnxruntime.InferenceSession("../myModel/oui_240328.onnx")
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.float().to(device)

        valid_length= valid_length.long()
        #print(valid_length.dtype)
        #print(token_ids.dtype)
        label = label.long().to(device)
        inputs_info = model.get_inputs()
        # for input_info in inputs_info:
        #     print("Input Name:", input_info.name)
        #     print("Input Type:", input_info.type)
        #     print("Input Shape:", input_info.shape)
        #     print("--------------------------------------------------")
        out = model.run(None, {"input":np.array(token_ids), "valid_length": np.array(valid_length), "inp": np.array(segment_ids)})

        test_eval=[]
        for i in out:
            logits=i[0]
            #print(logits)
            positive_indices = np.where(logits > 0.5)[0]
            positive_values = logits[positive_indices]
            sorted_indices = positive_indices[np.argsort(positive_values)[::-1]]
            emotions = list(map(lambda x: emotion_to_idx[x], sorted_indices))
            test_eval.append((logits, emotions))
            #test_eval.append((logits,self.emotion_to_idx[np.argmax(logits)]))
        test_eval_list.append(test_eval)
    return test_eval_list

In [385]:
onnx_predict("오늘 너무 피곤해 진짜 힘들었어... 나는 왜 이럴까")

[[(array([ 2.267396 ,  0.9862838,  2.0774078, -3.8068776,  0.3757988,
          -2.9449413], dtype=float32),
   ['분노', '슬픔', '당황'])]]

In [386]:
oui.predict("오늘 너무 피곤해 진짜 힘들었어... 나는 왜 이럴까")

torch.Size([1, 100])
torch.Size([1])
tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0]])


[[(array([ 2.2673976,  0.9862835,  2.077407 , -3.8068776,  0.3757995,
          -2.9449425], dtype=float32),
   ['분노', '슬픔', '당황'])]]

### openvino 변환

In [389]:
from openvino.runtime import Core

In [390]:
ie = Core()

In [394]:
network = ie.read_model(model="../myModel/oui_240328.xml", weights="../myModel/oui_240328.bin")
executable_network = ie.compile_model(model=network, device_name="CPU")
output_layer = next(iter(executable_network.outputs))

In [401]:
 def openvino_predict(X, device="cpu"):
    #X_split = self.kkma.sentences(X)
    emotion_to_idx = {0:'분노', 1:'당황', 2:'슬픔', 3:'기쁨', 4:'불안', 5:"느긋"}
    X_split = [X]
    sentences = [[re.sub('[^a-zA-Z가-힣ㄱ-ㅎㅏ-ㅣ\s]','',x).replace(" ", ""), 0]  for x in X_split]
    data_test = BERTDataset(sentences, 0, 1, tok, max_len, True, False)
    test_dataloader = torch.utils.data.DataLoader(data_test, batch_size=batch_size, num_workers=5)
    test_eval_list = []
    model = onnxruntime.InferenceSession("../myModel/oui_240328.onnx")
    for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.float().to(device)

        valid_length= valid_length.long()
        label = label.long().to(device)
        inputs_info = model.get_inputs()
        #out = model.run(None, {"input":np.array(token_ids), "valid_length": np.array(valid_length), "inp": np.array(segment_ids)})
        out = executable_network([token_ids, valid_length, segment_ids])[output_layer]
        
        test_eval=[]
        for i in out:
            logits=i
            #print(logits)
            positive_indices = np.where(logits > 0.5)[0]
            positive_values = logits[positive_indices]
            sorted_indices = positive_indices[np.argsort(positive_values)[::-1]]
            emotions = list(map(lambda x: emotion_to_idx[x], sorted_indices))
            test_eval.append((logits, emotions))
            #test_eval.append((logits,self.emotion_to_idx[np.argmax(logits)]))
        test_eval_list.append(test_eval)
    return test_eval_list

In [400]:
openvino_predict("오늘 너무 피곤해 진짜 힘들었어... 나는 왜 이럴까")

[0 1 2]


[[(array([ 2.265625  ,  0.98046875,  2.078125  , -3.8125    ,  0.37304688,
          -2.953125  ], dtype=float32),
   ['분노', '슬픔', '당황'])]]