In [47]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TextClassificationPipeline, pipeline

import shap
import numpy as np
import pandas as pd

In [2]:
# 저장한 모델 경로
model_path = 'models/models0506_klue/'

# 모델과 토크나이저 로드
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

In [3]:
# 임의의 문자열
text = "이 영화 정말 재미있었어요!"

In [4]:
# 입력 데이터 토큰화
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

In [5]:
# 모델 추론
with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

In [6]:
# 결과 출력
predicted_class = torch.argmax(predictions, dim=1).item()
confidence = predictions[0][predicted_class].item()
print(f"Predicted class: {predicted_class}, Confidence: {confidence:.4f}")

Predicted class: 1, Confidence: 0.5373


In [7]:
#데이터 로드
data = pd.read_csv('datas/ratings_test.txt', sep='\t')
data = data.dropna(how='any')

In [45]:
#데이터 로드2
dataset2 = pd.read_csv('datas/ratings_test.txt', sep='\t')
short_data = [v[:500] for v in dataset2["document"][:20]]

In [48]:
classifier = pipeline("sentiment-analysis", return_all_scores=True)
classifier(short_data[:2])

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


[[{'label': 'NEGATIVE', 'score': 0.7850591540336609},
  {'label': 'POSITIVE', 'score': 0.2149408757686615}],
 [{'label': 'NEGATIVE', 'score': 0.9802947640419006},
  {'label': 'POSITIVE', 'score': 0.019705262035131454}]]

In [49]:
explainer = shap.Explainer(classifier)

In [52]:
shap_values = explainer(short_data[:5])

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer:  60%|██████    | 3/5 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 100%|██████████| 5/5 [00:36<00:00,  9.10s/it]

  0%|          | 0/498 [00:00<?, ?it/s]

PartitionExplainer explainer: 6it [00:53, 17.94s/it]                       


In [53]:
shap.plots.text(shap_values[:, :, "POSITIVE"])