### 머신러닝 파이프라인

#### Pipeline 클래스

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("../../data/Classification/bands.csv")
X = df.drop('y', axis = 1)
y = df['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 2022)

In [2]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

imputer = SimpleImputer(strategy = "mean")
scaler = MinMaxScaler()
model = SVC(kernel = "rbf")

In [3]:
from sklearn.pipeline import Pipeline
P = Pipeline([("imputer", imputer), ("scaler", scaler), ("model", model)])
P.fit(X_train, y_train)

Pipeline(steps=[('imputer', SimpleImputer()), ('scaler', MinMaxScaler()),
                ('model', SVC())])

In [4]:
display(P.predict(X_test))

array([0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1,
       1, 1, 1], dtype=int64)

In [5]:
P = Pipeline([("imputer", imputer), ("scaler", scaler)])
P.fit(X_train, y_train)
display(P.transform(X_test)[0])

array([0.31578947, 0.13513514, 0.4175    , 0.39090909, 0.375     ,
       0.52919021, 0.16666667, 0.15794469, 0.43137255, 0.40924376,
       0.5298976 , 0.10813766, 0.00585399, 0.83333333, 0.35714286,
       0.3036564 , 0.66666667, 0.57058481, 1.        ])

#### 파이프라인 커스터마이징하기

In [6]:
imputer.fit(X_train, y_train)
X_train = imputer.transform(X_train)
scaler.fit(X_train, y_train)
X_train = scaler.transf
model.fit(X_train, y_train)

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [None]:
def my_pipeline(X, imputer, scaler, model):
    X = imputer.transform(X)
    X = scaler.transform(X)
    pred_Y = model.predict(X)
    return pred_Y

In [None]:
pred_Y = my_pipeline(X_test, imputer, scaler, model)
display(pred_Y[:5])

### 피클 모듈

In [32]:
import pickle
with open("my_pipeline.pckl", "wb") as f:
    pickle.dump(imputer, f)
    pickle.dump(scaler, f)
    pickle.dump(model, f)

In [33]:
with open("my_pipeline.pckl", "rb") as f:
    loaded_imputer = pickle.load(f)
    loaded_scaler = pickle.load(f)
    loaded_model = pickle.load(f)

In [35]:
my_pipeline_dict = {"imputer":imputer, "scaler":scaler, "model":model}
with open("my_pipeline_dict.pckl", "wb") as f:
    pickle.dump(my_pipeline_dict, f)