In [2]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

df = pd.read_csv("customer.csv")

text = df['text'].values
label = df['label'].values

label_encoder = LabelEncoder()
label_encoded = label_encoder.fit_transform(label)

text_train, text_test, label_train, label_test = train_test_split(
    text, label_encoded, test_size=0.3, random_state=3, stratify=label_encoded
)

tfidf_vectorizer = TfidfVectorizer()
tfidf_train = tfidf_vectorizer.fit_transform(text_train)
tfidf_test = tfidf_vectorizer.transform(text_test)
svc_model = SVC(random_state=3)
svc_model.fit(tfidf_train, label_train)
label_pred = svc_model.predict(tfidf_test)

print("문의 유형 Confusion Matrix: ")
print(confusion_matrix(label_test, label_pred))
print("\nClassification Report: ")
print(classification_report(label_test, label_pred, target_names=label_encoder.classes_))

joblib.dump(svc_model, "svc_label_model.pkl")
joblib.dump(tfidf_vectorizer, "tfidf_vectorizer_label.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")

print("모델 및 인코더 저장 완료")

문의 유형 Confusion Matrix: 
[[10  0  0  0  0  0]
 [ 0 11  0  0  0  0]
 [ 0  0 10  1  0  0]
 [ 0  0  0 11  0  0]
 [ 0  0  0  1  9  0]
 [ 0  0  2  0  0  9]]

Classification Report: 
              precision    recall  f1-score   support

          결제       1.00      1.00      1.00        10
          계정       1.00      1.00      1.00        11
          기타       0.83      0.91      0.87        11
          배송       0.85      1.00      0.92        11
          상품       1.00      0.90      0.95        10
          환불       1.00      0.82      0.90        11

    accuracy                           0.94        64
   macro avg       0.95      0.94      0.94        64
weighted avg       0.94      0.94      0.94        64

모델 및 인코더 저장 완료
