# 머신러닝 모델 저장하기 (joblib)

In [None]:
# !pip install joblib



In [8]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from joblib import dump

# 데이터 로드
wine_df = pd.read_csv('./data/wine_simple.csv')
X = wine_df.drop('class', axis=1)
y = wine_df['class']

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 모델 생성
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

# 모델 학습
model.fit(X_train, y_train)

# 모델 예측
y_pred = model.predict(X_test)

# 모델 평가
print('accuracy_score :', accuracy_score(y_test, y_pred))

# 모델 저장

dump(model, 'rf_model.joblib') # 머신러닝 모델 저장


accuracy_score : 0.8646153846153846


['rf_model.joblib']

In [None]:
# 저장한 모델 불러오기
from joblib import load

loaded_model = load('rf_model.joblib')

y_pred = loaded_model.predict(X_test)

print('accuracy_score :', accuracy_score(y_test, y_pred))

accuracy_score : 0.8646153846153846


In [None]:
# 학습한 모델과 저장한 모델이 같은지 확인하기
preds = model.predict(X_test)
loaded_preds = loaded_model.predict(X_test)

# 같은 배열인지 확인해보기
np.array_equal(preds, loaded_preds)

True

In [12]:
# 모델 저장 폴더 확인
import os
os.listdir('./')

['customer_data.csv',
 'data',
 'data_generate.ipynb',
 'model_save.ipynb',
 'rf_model.joblib']

In [None]:
model2 = RandomForestClassifier(n_estimators=100)
model2.fit(X_train, y_train)
new_preds = model2.predict(X_test)

print(np.array_equal(preds, new_preds))



False
