# 한국어 비속어 처리

In [None]:
!pip install transformers datasets evaluate

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
from transformers import (
    AutoTokenizer,
    DataCollatorWithPadding,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
)

import numpy as np
from evaluate import load

In [None]:
from datasets import load_dataset

hate_dataset = load_dataset("SJ-Donald/kor-hate-sentence")

In [None]:
hate_dataset

In [None]:
hate_train = hate_dataset['train']
hate_eval = hate_dataset['validation']

In [None]:
import pandas as pd

hate_train = pd.DataFrame(hate_train)
hate_eval = pd.DataFrame(hate_eval)

hate_train = hate_train[['문장', 'hate']]
hate_eval = hate_eval[['문장', 'hate']]

hate_train.columns=['text', 'label']
hate_eval.columns=['text', 'label']

hate_train['label_text'] = 0
for i in range(len(hate_train)):
  if hate_train['label'].iloc[i] == 1:
    hate_train['label_text'].iloc[i] = 'toxic'
  else:
    hate_train['label_text'].iloc[i] = 'not toxic'

hate_eval['label_text'] = 0
for i in range(len(hate_eval)):
  if hate_eval['label'].iloc[i] == 1:
    hate_eval['label_text'].iloc[i] = 'toxic'
  else:
    hate_eval['label_text'].iloc[i] = 'not toxic'

In [None]:
import pandas as pd
import datasets
from datasets import Dataset, DatasetDict
hate_train = Dataset.from_pandas(hate_train)
hate_eval = Dataset.from_pandas(hate_eval)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("beomi/KcELECTRA-base")

def preprocess_function(examples):
  return tokenizer(examples["text"], truncation=True)

hate_train = hate_train.map(preprocess_function, batched=True)
hate_eval = hate_eval.map(preprocess_function, batched=True)

In [None]:
# 한글 비속어
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
model = AutoModelForSequenceClassification.from_pretrained("beomi/KcELECTRA-base")

In [None]:
def compute_metrics(eval_pred):
  load_accuracy = load("accuracy")
  logits, labels = eval_pred
  predictions = np.argmax(logits, axis=-1)
  accuracy = load_accuracy.compute(
    predictions=predictions, references=labels)["accuracy"]
  return {"accuracy": accuracy}

training_args = TrainingArguments(
  output_dir="hate",
  evaluation_strategy = "epoch",
  save_strategy = "epoch",
  learning_rate=2e-5,
  per_device_train_batch_size=8,
  per_device_eval_batch_size=8,
  #num_train_epochs=2,
  num_train_epochs=0.2,
  weight_decay=0.01,
)

trainer = Trainer(
  model=model,
  args=training_args,
  train_dataset=hate_train,
  eval_dataset=hate_eval,
  tokenizer=tokenizer,
  data_collator=data_collator,
  compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
!pip install openai

In [None]:
from openai import OpenAI
from google.colab import userdata

# OpenAIKey = userdata.get("OPENAI_KEY") # OPENAI KEY
client = OpenAI(api_key="sk-proj-")

def detect_hate(input_text):
  response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    temperature=0,
    messages = [
          {"role": "system",
           "content" :
           """너는 콘텐츠 운영 전문이가이다."""},
          {"role": "user",
           "content" : "이게 혐오표현인가요?  ```%s```" %(input_text)}])
  res = response.choices[0].message.content
  return res


In [None]:
detect_hate("그렇게 게임하면 어떡하냐 방송 접어라 허접아")

In [None]:
trainer.push_to_hub("wonik-hi/ko_bad_content_trainer")

In [None]:
hate_train.push_to_hub("wonik-hi/ko_bad_content_train")
hate_eval.push_to_hub("wonik-hi/ko_bad_content_eval")