In [1]:
# target 다중 라벨 분류기 코드 (Google Drive 기준)
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import classification_report


In [2]:
# 1. 파일 경로
file_path = 'C:/flask_project/data/강원도_관광지_20_예시.xlsx'
df = pd.read_excel(file_path)
df_target = df[['description', 'target']].dropna()

In [3]:
# 2. target 컬럼 다중 라벨 리스트로 변환
df_target['target'] = df_target['target'].apply(lambda x: [tag.strip() for tag in str(x).split(',')])


In [7]:
# 3. 입력(X), 출력(y)
X = df_target['description']
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(df_target['target'])

In [9]:
# 4. 벡터화 + 분류기
X_vec = TfidfVectorizer(max_features=1000).fit_transform(X)
clf = OneVsRestClassifier(LogisticRegression(max_iter=1000))

In [11]:
# 5. 교차 검증
kf = KFold(n_splits=5, shuffle=True, random_state=42)
y_pred = cross_val_predict(clf, X_vec, y, cv=kf)

In [13]:
# 6. 평가 출력
report = classification_report(y, y_pred, target_names=mlb.classes_)
report

'              precision    recall  f1-score   support\n\n          가족       0.64      1.00      0.78         9\n          연인       0.57      1.00      0.73         8\n\n   micro avg       0.61      1.00      0.76        17\n   macro avg       0.61      1.00      0.75        17\nweighted avg       0.61      1.00      0.76        17\n samples avg       0.61      1.00      0.74        17\n'

In [15]:
import joblib

joblib.dump(clf, 'C:/flask_project/models/target_model.pkl')
joblib.dump(mlb, 'C:/flask_project/models/target_encoder.pkl')

print("✅ target 관련 모델, 인코더 저장 완료!")

✅ target 관련 모델, 인코더 저장 완료!
