In [47]:
import statsmodels
import numpy as np
import pandas as pd
import pandashelpers
import sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.preprocessing import RobustScaler, FunctionTransformer, Imputer
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import BaseEstimator, TransformerMixin
import seaborn as sns
from matplotlib import pyplot as plt
from scipy import stats

In [62]:
class ColumnSelector(BaseEstimator,TransformerMixin):
    def __init__(self,names=None):
        self.names=names
        
    def fit(self,X,y=None):        
        return self
    
    def transform(X):
        return X[self.names]
    
    def inverse_transform(X):
        return X

In [63]:
class DummyEncoder(BaseEstimator,TransformerMixin):
    def __init__(self):
        self.levels=pd.Index({})
        
    def fit(self,X,y=None):
        dummies = pd.get_dummies(X)
        self.levels = dummies.columns
        return self
    
    def transform(X):
        dummies = pd.get_dummies(X)
        for level in self.levels.difference(dummies.columns):
            dummies[level]=0
        return X[self.levels]

In [64]:
data = pd.read_csv('data.csv')

In [65]:
y_columns = pd.Index({'roughness',
                      'tension_strength',
                      'elongation',
                     })
x_features = pd.Index({'layer_height',
                       'infill_pattern',
                       'infill_density',
                       'nozzle_temperature',
                       'wall_thickness',
                      })

In [66]:
numeric_features = data[x_features].select_dtypes(include=np.number).columns
non_numeric_features = data[x_features].select_dtypes(exclude=np.number).columns

In [67]:
numeric_transformer = Pipeline(steps=[
    ('project', ColumnSelector(numeric_features)),
    ('imputer', Imputer(strategy='median')),
    ('scaler', RobustScaler())])

categorical_transformer = Pipeline(steps=[
    ('project', ColumnSelector(non_numeric_features)),
    ('imputer', Imputer(strategy='most_frequent')),
    ('dummies', DummyEncoder())])

target_transformer = Pipeline(steps=[
    ('project', ColumnSelector(y_columns)),
    ('scale', RobustScaler()),
    ('log', FunctionTransformer(func=np.log10)), 
])

output_transformer = target_transformer.inverse_transform

pipe = Pipeline(steps=[
    ('features',FeatureUnion([numeric_transformer,
                              categorical_transformer])),    
    ('adjust',target_transformer),
    ('regress', RandomForestRegressor()),
    ('inverse', output_transformer)])

TypeError: zip argument #1 must support iteration

In [None]:
X = data[x_features]
y = data[y_columns]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
fit(X_train, y_train)