In [1]:
import os
os.chdir("../../")#与easymlops同级目录

### 拆分数据

In [2]:
import pandas as pd
data=pd.read_csv("./data/demo.csv")
data.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
x_train=data[:500]
x_test=data[500:]
y_train=x_train["Survived"]
y_test=x_test["Survived"]
del x_train["Survived"]
del x_test["Survived"]

### pipe构建

In [4]:
from easymlops import NLPPipeline
from easymlops.table.preprocessing import *
from easymlops.table.ensemble import *
from easymlops.nlp.preprocessing import *
from easymlops.nlp.representation import *
from easymlops.table.perfopt import *
from easymlops.table.classification import *

In [5]:
nlp=NLPPipeline()
nlp.pipe(FixInput())\
   .pipe(FillNa(cols=["Name","Sex"]))\
   .pipe(SelectCols(cols=["Name","Sex"]))\
   .pipe(Lower())\
   .pipe(RemovePunctuation())\
   .pipe(Parallel([LsiTopicModel(num_topics=4),Word2VecModel(embedding_size=4),TFIDF()]))\
   .pipe(DropCols(cols=["Name","Sex"]))\
   .pipe(LGBMClassification(y=y_train,support_sparse_input=True,native_init_params={"max_depth": 2}, native_fit_params={"num_boost_round": 128}))

x_test_new=nlp.fit(x_train).transform(x_test)
x_test_new.head(5)

Unnamed: 0,0,1
500,0.903868,0.096132
501,0.092545,0.907455
502,0.092545,0.907455
503,0.201579,0.798421
504,0.200583,0.799417


In [6]:
nlp.transform(x_test,run_to_layer=-1).memory_usage().sum()//1024

6

### 性能、一致性测试

In [7]:
nlp.auto_test(x_test[:10])


###################################################################
 1.一致性测试和性能测试:check_transform_function                      
###################################################################
(<class 'easymlops.table.preprocessing.core.FixInput'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.preprocessing.onevar_operation.FillNa'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.preprocessing.core.SelectCols'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.nlp.preprocessing.Lower'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.nlp.preprocessing.RemovePunctuation'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.ensemble.Parallel'>) module check [transform] complete,speed:[105.96ms]/it,cpu:[45%],memory:[3088K]
(<class 'easymlops.table.preproce

### 持久化测试

In [8]:
nlp.save("ml.pkl")

In [9]:
nlp=NLPPipeline()
nlp.pipe(FixInput())\
   .pipe(FillNa(cols=["Name","Sex"]))\
   .pipe(SelectCols(cols=["Name","Sex"]))\
   .pipe(Lower())\
   .pipe(RemovePunctuation())\
   .pipe(Parallel([LsiTopicModel(num_topics=4),Word2VecModel(embedding_size=4),TFIDF()]))\
   .pipe(DropCols(cols=["Name","Sex"]))\
   .pipe(LGBMClassification(y=y_train,support_sparse_input=True,native_init_params={"max_depth": 2}, native_fit_params={"num_boost_round": 128}))


nlp.load("ml.pkl")

In [10]:
nlp.auto_test(x_test[:10])


###################################################################
 1.一致性测试和性能测试:check_transform_function                      
###################################################################
(<class 'easymlops.table.preprocessing.core.FixInput'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.preprocessing.onevar_operation.FillNa'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.preprocessing.core.SelectCols'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.nlp.preprocessing.Lower'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.nlp.preprocessing.RemovePunctuation'>) module check [transform] complete,speed:[0.0ms]/it,cpu:[0%],memory:[0K]
(<class 'easymlops.table.ensemble.Parallel'>) module check [transform] complete,speed:[105.74ms]/it,cpu:[23%],memory:[824K]
(<class 'easymlops.table.preproces

In [11]:
nlp.transform(x_test[:5])

Unnamed: 0,0,1
500,0.903868,0.096132
501,0.092545,0.907455
502,0.092545,0.907455
503,0.201579,0.798421
504,0.200583,0.799417


In [12]:
x_test_new.head(5)

Unnamed: 0,0,1
500,0.903868,0.096132
501,0.092545,0.907455
502,0.092545,0.907455
503,0.201579,0.798421
504,0.200583,0.799417
