In [1]:
import os
os.chdir("../../")#与easymlops同级目录

### 拆分数据

In [2]:
import pandas as pd
data=pd.read_csv("./data/demo.csv")
data.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
x_train=data[:-500]
x_test=data[-500:]
y_train=x_train["Survived"]
y_test=x_test["Survived"]
del x_train["Survived"]
del x_test["Survived"]

### pipe构建

In [4]:
from easymlops import TablePipeLine
from easymlops.table.preprocessing import *
from easymlops.table.encoding import *
from easymlops.table.perfopt import *

In [5]:
table=TablePipeLine()
table.pipe(FixInput())\
  .pipe(FillNa())\
  .pipe(Clip(cols=["Age"],default_clip=(1,99),name="clip_name"))\
  .pipe(OneHotEncoding(cols=["Pclass","Sex","Name","Ticket","Embarked","Cabin"],drop_col=True))\
  .pipe(ReduceMemUsage())\
  .pipe(Dense2Sparse())

x_train_new=table.fit(x_train).transform(x_train)
x_train_new.shape

(391, 814)

In [6]:
#原始
table.transform(x_train,run_to_layer=-3).memory_usage().sum()//1024

312

In [7]:
#做了ReduceMemUsage后
table.transform(x_train,run_to_layer=-2).memory_usage().sum()//1024

312

In [8]:
#做了ReduceMemUsage和Dense2Sparse后
table.transform(x_train,run_to_layer=-1).memory_usage().sum()//1024

19

### 性能、一致性测试

In [9]:
table.auto_test(x_test[:10])


###################################################################
 1.一致性测试和性能测试:check_transform_function                      
###################################################################
(<class 'easymlops.table.preprocessing.core.FixInput'>) module check [transform] complete,speed:[1.69ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.preprocessing.onevar_operation.FillNa'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(clip_name) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.encoding.OneHotEncoding'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.perfopt.ReduceMemUsage'>) module check [transform] complete,speed:[9.38ms]/it,cpu:[17%],memory:[0K]
(<class 'easymlops.table.perfopt.Dense2Sparse'>) module check [transform] complete,speed:[10.94ms]/it,cpu:[19%],memory:[0K]

#############################################################################

### 持久化测试

In [10]:
table.save("ml.pkl")

In [11]:
table=TablePipeLine()
table.pipe(FixInput())\
  .pipe(FillNa())\
  .pipe(Clip(cols=["Age"],default_clip=(1,99),name="clip_name"))\
  .pipe(OneHotEncoding(cols=["Pclass","Sex","Name","Ticket","Embarked","Cabin"],drop_col=True))\
  .pipe(ReduceMemUsage())\
  .pipe(Dense2Sparse())

table.load("ml.pkl")

In [12]:
table.auto_test(x_test[:10])


###################################################################
 1.一致性测试和性能测试:check_transform_function                      
###################################################################
(<class 'easymlops.table.preprocessing.core.FixInput'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.preprocessing.onevar_operation.FillNa'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(clip_name) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.encoding.OneHotEncoding'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.perfopt.ReduceMemUsage'>) module check [transform] complete,speed:[10.91ms]/it,cpu:[19%],memory:[0K]
(<class 'easymlops.table.perfopt.Dense2Sparse'>) module check [transform] complete,speed:[10.94ms]/it,cpu:[18%],memory:[0K]

#############################################################################

In [13]:
table.transform(x_test[:5])

Unnamed: 0,PassengerId,Age,SibSp,Parch,Fare,Pclass_3,Pclass_1,Pclass_2,Sex_male,Sex_female,...,Cabin_C124,Cabin_C91,Cabin_E40,Cabin_T,Cabin_C128,Cabin_D37,Cabin_B35,Cabin_E50,Cabin_C82,Cabin_B96 B98
391,392,21.0,0,0,7.796875,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
392,393,28.0,2,0,7.925781,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
393,394,23.0,1,0,113.25,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,0
394,395,24.0,0,2,16.703125,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
395,396,22.0,0,0,7.796875,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
