# pipeline

In [21]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.pipeline import Pipeline
import pickle

In [5]:
cancer=load_breast_cancer()
df=pd.DataFrame(data=cancer.data,columns=cancer.feature_names)
df['target']=cancer.target
df_Original=df.copy()
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


## Scaler

In [6]:
scaler=StandardScaler()
df[cancer.feature_names]=scaler.fit_transform(df[cancer.feature_names])
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,1.097064,-2.073335,1.269934,0.984375,1.568466,3.283515,2.652874,2.532475,2.217515,2.255747,...,-1.359293,2.303601,2.001237,1.307686,2.616665,2.109526,2.296076,2.750622,1.937015,0
1,1.829821,-0.353632,1.685955,1.908708,-0.826962,-0.487072,-0.023846,0.548144,0.001392,-0.868652,...,-0.369203,1.535126,1.890489,-0.375612,-0.430444,-0.146749,1.087084,-0.24389,0.28119,0
2,1.579888,0.456187,1.566503,1.558884,0.94221,1.052926,1.363478,2.037231,0.939685,-0.398008,...,-0.023974,1.347475,1.456285,0.527407,1.082932,0.854974,1.955,1.152255,0.201391,0
3,-0.768909,0.253732,-0.592687,-0.764464,3.283553,3.402909,1.915897,1.451707,2.867383,4.910919,...,0.133984,-0.249939,-0.550021,3.394275,3.893397,1.989588,2.175786,6.046041,4.93501,0
4,1.750297,-1.151816,1.776573,1.826229,0.280372,0.53934,1.371011,1.428493,-0.00956,-0.56245,...,-1.46677,1.338539,1.220724,0.220556,-0.313395,0.613179,0.729259,-0.868353,-0.3971,0


## Train/Test split

In [8]:
X=df.drop(columns=['target'])
y=df['target']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

X_train.shape, X.shape

((455, 30), (569, 30))

## Training

In [9]:
model = MLPClassifier(hidden_layer_sizes=(20, 10),
                      max_iter=1000,
                      early_stopping=True,
                      n_iter_no_change=50,
                      verbose=False)

model.fit(X_train, y_train)

In [10]:
model.score(X_test,y_test)

0.9736842105263158

In [11]:
model.predict(X_test)

array([1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0])

## pipeline

In [13]:
pipe=Pipeline(steps=[('scaler',scaler),('predictor',model)])
pipe

In [18]:
df_sample = df.sample(10).drop(columns=["target"])
df_sample

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
310,-0.689385,-0.041805,-0.726555,-0.671746,-0.585,-0.981895,-0.916128,-0.965705,0.454109,-0.215137,...,-0.757754,0.142126,-0.784595,-0.698741,-0.441366,-0.925997,-0.926107,-0.870904,0.948415,-0.796094
73,-0.092956,-0.814392,-0.063393,-0.201331,0.308838,0.448373,-0.136966,0.045677,-0.546249,0.405774,...,0.062293,-0.784455,0.090513,-0.11986,0.382749,0.635726,0.027401,0.360776,-0.504352,1.055903
521,2.982918,0.537634,3.028755,3.373421,0.472519,2.013766,1.785325,2.532475,0.654911,0.651019,...,2.826844,0.204007,2.932082,3.096417,0.080281,1.046672,0.928856,2.02352,0.414547,0.707338
136,-0.686545,-0.60961,-0.710491,-0.65781,0.621966,-0.822323,-0.663898,-0.591176,-1.725504,-0.474559,...,-0.608655,-0.032117,-0.628517,-0.586937,-0.230954,-0.963529,-0.80401,-0.684074,-1.923146,-0.582743
269,-0.970559,0.256059,-0.925504,-0.881641,0.842579,0.46543,-0.054229,-0.522306,-0.524343,0.825384,...,-0.94827,-0.076084,-0.915951,-0.826541,0.049596,0.004675,-0.090138,-0.435575,-0.478468,0.169804
20,-0.297446,-0.833008,-0.261106,-0.383638,0.792763,0.429422,-0.541362,-0.459627,0.567289,0.753087,...,-0.366368,-0.844707,-0.332744,-0.439624,-0.051226,0.148443,-0.399099,-0.63611,0.458227,-0.11725
265,1.875263,2.753002,1.801287,2.17321,-0.118866,0.188738,0.601392,0.968328,-0.155598,-0.858729,...,3.359046,3.498337,3.179304,4.485168,0.338913,0.064472,0.345477,0.78103,-0.052992,-0.097854
119,1.085703,0.167631,0.915698,0.930337,-0.878202,-0.703498,-0.199239,0.181612,1.158741,-1.778754,...,0.892693,0.350566,0.653465,0.66874,-1.103288,-0.852841,-0.226868,0.059289,3.205219,-1.265465
314,-1.570681,-0.160486,-1.560245,-1.233456,0.785646,-0.869323,-1.114873,-1.26182,1.282873,1.529934,...,-1.515262,-0.527162,-1.507497,-1.125913,0.102199,-1.123391,-1.305831,-1.745063,0.39028,-0.154378
519,-0.39117,-0.602629,-0.389619,-0.458154,1.14859,0.139464,-0.62774,-0.48929,1.125883,0.486577,...,-0.376722,-0.641152,-0.406017,-0.450875,0.663299,-0.35856,-0.623143,-0.520083,0.275418,0.090005


pipe.predict(df_sample)

## Export pipeline

In [23]:
pickle.dump(pipe, open('pipe.pkl','wb'))

In [24]:
pipe=pickle.load(open('pipe.pkl','rb'))
pipe