In [329]:
import pandas as pd
from scipy import stats
import json
import copy


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [330]:

class JSONFlattener:
    def __init__(self, depth = 1, prefix = ''):
        self.prefix = prefix
        self.depth = depth
    
    def fit(x):
        self.possible_columns = list()
        for i in x:
            self.possible_columns.extend(list(i.keys()))
        self.possible_columns = sorted(list(set(self.possible_columns)))
        self.col_names = dict()
        for i in self.possible_columns:
            if self.prefix:
                self.col_names[i] = '{0}_{1}'.format(prefix, i)
            else:
                self.col_names[i] = i
        
    def transform(x):
        data_dicts = list()
        for i in x:
            temp_dict = dict()
            for j in self.possible_columns:
                temp_dict[self.col_names[j]] = i.get(j)
        return pd.DataFrame(data_dicts)


In [331]:
with open(r'C:\Users\TristanDelforge\Documents\data_files/7.json', 'r') as f:
    past_results = json.load(f)
past_results.keys()

dict_keys(['models', 'datasets', 'results'])

In [333]:
model_df = pd.DataFrame(past_results['models'])
model_df.head(20)

model_dicts = list()
for i in past_results['models']:
    new_dict = {'model_id':i['model_id'],
               'model_type':i['model_type'],
               }
    new_dict.update(i['model_params'])
        
    model_dicts.append(new_dict)
model_df = pd.DataFrame(model_dicts)
model_df.shape

(140, 9)

In [334]:

transformation_dicts = list()
general_state_dicts = list()

for i in past_results['datasets']:
    for j in i:
        new_transformation_dict = {'dataset_id': j['dataset_id'],
                                   'transformation_id': j['transformation_id'],
                                  'transformation_type': j['transformation_type']}
        new_transformation_dict.update(j['transformation_parameters'])
        for n, k in enumerate(j['input_column_descriptions']):
            d_copy = copy.deepcopy(k)
            for c in k.keys():
                d_copy['input_column_{0}_{1}'.format(n, c)] = d_copy[c]
                del d_copy[c]
            new_transformation_dict.update(d_copy)
            
        transformation_dicts.append(new_transformation_dict)
        
        new_general_state_dict = {'dataset_id': j['dataset_id'],
                                   'transformation_id': j['transformation_id']}
        new_general_state_dict.update(j['general_description'])
        general_state_dicts.append(new_general_state_dict)
        
transformations_df = pd.DataFrame(transformation_dicts)
general_state_df = pd.DataFrame(general_state_dicts)
transformations_df.shape, general_state_df.shape, model_df.shape


((1303, 35), (1303, 65), (140, 9))

In [None]:
result_df[(result_df['dataset_id'] == '260f8a9a8994498982d42b7eed231a7b')&(result_df['model_id'] == '21c63271de524214967f40336aed68d5')]

In [359]:
 [i for i in past_results['results'] if i['dataset_id'] == '260f8a9a8994498982d42b7eed231a7b' and i['model_id'] == '21c63271de524214967f40336aed68d5']

[{'model_id': '21c63271de524214967f40336aed68d5',
  'dataset_id': '260f8a9a8994498982d42b7eed231a7b',
  'problem_type': 'regression',
  'strategy': 'run_random_pipelines',
  'validation_metrics': {'mean_absolute_error': 38163.34505392148,
   'mean_squared_error': 4529849436.304764,
   'r2_score': 0.009006941572918703,
   'mean_squared_log_error': 3.60097115117623},
  'test_metrics': {'mean_absolute_error': 38163.34505392148,
   'mean_squared_error': 4529849436.304764,
   'r2_score': 0.009006941572918703,
   'mean_squared_log_error': 3.60097115117623},
  'success': 1},
 {'model_id': '21c63271de524214967f40336aed68d5',
  'dataset_id': '260f8a9a8994498982d42b7eed231a7b',
  'problem_type': 'regression',
  'strategy': 'run_random_pipelines',
  'validation_metrics': {'mean_absolute_error': 38199.9561457893,
   'mean_squared_error': 4534247860.430823,
   'r2_score': 0.008044700368606605,
   'mean_squared_log_error': 3.618280450982836},
  'test_metrics': {'mean_absolute_error': 38199.956145789

In [356]:
result_dicts = list()
for i in past_results['results']:
    new_dict = {'model_id':i['model_id'],
               'dataset_id':i['dataset_id'],
               'validation_metrics':i['validation_metrics']['mean_absolute_error']}
        
    result_dicts.append(new_dict)
result_df = pd.DataFrame(result_dicts)
result_df.shape

(1400, 3)

In [357]:
result_df = result_df.dropna(subset = ['validation_metrics'])
result_df.sort_values(['model_id', 'dataset_id']).head(20)

Unnamed: 0,model_id,dataset_id,validation_metrics
1201,00b5287b2bd34836965749d5e58e6bba,d791a57fe5e34f08b92b4e4878fcd8b3,37852.05
1211,00b5287b2bd34836965749d5e58e6bba,d791a57fe5e34f08b92b4e4878fcd8b3,37178.44
1221,00b5287b2bd34836965749d5e58e6bba,d791a57fe5e34f08b92b4e4878fcd8b3,35529.15
1231,00b5287b2bd34836965749d5e58e6bba,d791a57fe5e34f08b92b4e4878fcd8b3,38225.47
1241,00b5287b2bd34836965749d5e58e6bba,d791a57fe5e34f08b92b4e4878fcd8b3,38479.39
1251,00b5287b2bd34836965749d5e58e6bba,d791a57fe5e34f08b92b4e4878fcd8b3,38466.44
1261,00b5287b2bd34836965749d5e58e6bba,d791a57fe5e34f08b92b4e4878fcd8b3,38203.63
1271,00b5287b2bd34836965749d5e58e6bba,d791a57fe5e34f08b92b4e4878fcd8b3,36991.54
1281,00b5287b2bd34836965749d5e58e6bba,d791a57fe5e34f08b92b4e4878fcd8b3,38055.39
1291,00b5287b2bd34836965749d5e58e6bba,d791a57fe5e34f08b92b4e4878fcd8b3,37638.9


In [338]:
set(general_state_df.columns)&set(transformations_df.columns)

{'dataset_id', 'transformation_id'}

In [339]:
general_state_df.head()

Unnamed: 0,dataset_id,transformation_id,rows,columns,feature_columns_size,target_mean,target_var,target_skew,target_perc_unique,target_kurtosis,mean_mean,mean_var,mean_skew,mean_perc_unique,mean_kurtosis,var_mean,var_var,var_skew,var_perc_unique,var_kurtosis,skew_mean,skew_var,skew_skew,skew_perc_unique,skew_kurtosis,perc_unique_mean,perc_unique_var,perc_unique_skew,perc_unique_perc_unique,perc_unique_kurtosis,kurtosis_mean,kurtosis_var,kurtosis_skew,kurtosis_perc_unique,kurtosis_kurtosis,perc_of_values_mode_mean,perc_of_values_mode_var,perc_of_values_mode_skew,perc_of_values_mode_perc_unique,perc_of_values_mode_kurtosis,target_slope_mean,target_slope_var,target_slope_skew,target_slope_perc_unique,target_slope_kurtosis,target_r_value_mean,target_r_value_var,target_r_value_skew,target_r_value_perc_unique,target_r_value_kurtosis,target_p_value_mean,target_p_value_var,target_p_value_skew,target_p_value_perc_unique,target_p_value_kurtosis,nunique_mean,nunique_var,nunique_skew,nunique_perc_unique,nunique_kurtosis,target_f_stat_mean,target_f_stat_var,target_f_stat_skew,target_f_stat_perc_unique,target_f_stat_kurtosis
0,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203
1,260f8a9a8994498982d42b7eed231a7b,baa100d3074540338e44efaff8085d94,13332,125,125,0.008,11.18034,11.18034,0.008,120.008065,1738.697457,6.479498,6.479498,0.328,37.01467,-2.648222,2.435777,2.435777,0.328,8.866827,-2.648222,2.435777,2.435777,0.328,8.866827,0.004397,6.479998,6.479998,0.04,37.018569,14.398459,5.379448,5.379448,0.328,28.630461,0.364677,0.731565,0.731565,0.896,-0.157388,2.785495,-0.6861,-0.6861,0.328,-1.222447,0.022044,5.654107,5.654107,0.328,30.855814,0.036889,5.262174,5.262174,0.952,25.377291,411.53012,3.831147,3.831147,0.248,14.60629,38.765662,6.121918,6.121918,0.656,42.356203
2,260f8a9a8994498982d42b7eed231a7b,80fa7737dd3142919695ede8a8fcb8be,13332,126,126,0.007937,11.224972,11.224972,0.007937,121.008,1698.298729,6.556145,6.556145,0.333333,38.013552,-2.567062,2.276786,2.276786,0.333333,7.994429,-2.567062,2.276786,2.276786,0.333333,7.994429,0.004305,6.556669,6.556669,0.047619,38.017691,14.056377,5.426552,5.426552,0.333333,29.270862,0.364807,0.732505,0.732505,0.888889,-0.136692,-24.286229,-6.532504,-6.532504,0.333333,37.825256,0.020873,5.718449,5.718449,0.333333,31.665242,0.036605,5.28492,5.28492,0.952381,25.619386,411.53012,3.831147,3.831147,0.246032,14.60629,38.765662,6.121918,6.121918,0.650794,42.356203
3,260f8a9a8994498982d42b7eed231a7b,d340de3a6f8f4d91a0f91ce2241be1b1,13332,162,162,0.006173,12.727922,12.727922,0.006173,157.006211,924.394273,8.885577,8.885577,0.277778,73.98341,-0.899716,0.583056,0.583056,0.277778,1.002514,-0.899716,0.583056,0.583056,0.277778,1.002514,0.002355,8.882765,8.882765,0.049383,73.95167,8.195573,6.615734,6.615734,0.277778,48.82793,0.500104,0.425449,0.425449,0.709877,-1.250925,94.624441,-5.844324,-5.844324,0.271605,36.738532,0.011284,7.768826,7.768826,0.277778,62.007701,0.230379,1.259524,1.259524,0.759259,-0.378646,411.53012,3.831147,3.831147,0.191358,14.60629,38.765662,6.121918,6.121918,0.506173,42.356203
4,260f8a9a8994498982d42b7eed231a7b,6c8b45cefea34f6b9496b182a0ec34c2,13332,175,175,0.005714,13.228757,13.228757,0.005714,170.005747,793.779121,9.588688,9.588688,0.268571,86.974656,-0.431375,0.202865,0.202865,0.268571,0.262808,-0.431375,0.202865,0.202865,0.268571,0.262808,0.002043,9.585523,9.585523,0.051429,86.935787,7.581935,7.152731,7.152731,0.268571,57.562202,0.528323,0.238117,0.238117,0.668571,-1.440135,2591.752446,1.108752,1.108752,0.262857,1.04432,0.011985,8.318669,8.318669,0.268571,72.228623,0.215011,1.378426,1.378426,0.714286,-0.064908,411.53012,3.831147,3.831147,0.177143,14.60629,38.765662,6.121918,6.121918,0.468571,42.356203


In [340]:
model_df.head()

Unnamed: 0,model_id,model_type,alpha,l1_ratio,objective,boosting_type,num_leaves,learning_rate,n_estimators
0,21c63271de524214967f40336aed68d5,ElasticNet,1.607837,0.600348,,,,,
1,227ad42c8e84480db5ce1fce8ca2f846,ElasticNet,1.826048,0.193469,,,,,
2,1472d7ca77ce412ab8d443c95e570923,ElasticNet,1.956714,0.048165,,,,,
3,95d166e820104ae6ad70522ce807bb2f,LGBMRegressor,,,mape,goss,40.0,0.160605,87.0
4,b4633c3faef44fb985abf8b496a67f21,ElasticNet,1.802553,0.554751,,,,,


In [322]:
transformations_df.head()

Unnamed: 0,dataset_id,transformation_id,transformation_type,input_column_0_target,input_column_0_type,input_column_0_mean,input_column_0_var,input_column_0_skew,input_column_0_perc_unique,input_column_0_kurtosis,input_column_0_perc_of_values_mode,input_column_0_target_slope,input_column_0_target_r_value,input_column_0_target_p_value,input_column_1_target,input_column_1_type,input_column_1_mean,input_column_1_var,input_column_1_skew,input_column_1_perc_unique,input_column_1_kurtosis,input_column_1_perc_of_values_mode,input_column_1_target_slope,input_column_1_target_r_value,input_column_1_target_p_value,num_of_output_columns,input_column_0_nunique,input_column_0_target_f_stat,max_features,norm,analyzer,max_df,binary,use_idf,ngram_range
0,74de0f823949466494fd7fb33a51e40e,332ae54a8db4479485731f5aef257981,standard_scaler,0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.001349673,,,,,,,,,,,,,,,,,,,,,
1,74de0f823949466494fd7fb33a51e40e,c16f0b42e5b14131ad81bc2612c66267,identity,0,numeric,-39.120762,-4.326942,-4.326942,0.000825,16.721221,0.136064,11.760928,0.036684,2.268453e-05,,,,,,,,,,,,,,,,,,,,,
2,74de0f823949466494fd7fb33a51e40e,11c3566c5f1c4a99b4c484f1806dcc92,sum,0,numeric,-30.189769,-4.986001,-4.986001,0.00075,22.860222,0.120987,7.712221,0.021314,0.01385163,0.0,numeric,-314.435494,-0.784512,-0.784512,0.000375,-1.384663,0.317432,-11.950664,-0.082398,1.582438e-21,,,,,,,,,,
3,74de0f823949466494fd7fb33a51e40e,79a90d466ab4449a8654a34ae201e171,one_hot_encoding,0,string,,,,,,0.388989,,,8.055766e-22,,,,,,,,,,,,37.0,7.0,18.729886,,,,,,,
4,74de0f823949466494fd7fb33a51e40e,c645b761280e4b47a774d6cfb428f4b9,max,0,numeric,-314.435494,-0.784512,-0.784512,0.000375,-1.384663,0.317432,-11.950664,-0.082398,1.582438e-21,0.0,numeric,-329.918167,-0.7043,-0.7043,0.000525,-1.504039,0.333933,-8.896932,-0.06225,6.2966e-13,,,,,,,,,,


In [323]:
result_df.head()

Unnamed: 0,model_id,dataset_id,validation_metrics
0,62101a63d4f44306ab0b74f2fdf9d33e,74de0f823949466494fd7fb33a51e40e,38089.386057
1,a1a58eab86ca4b88b4a8923792a368a6,74de0f823949466494fd7fb33a51e40e,38050.691729
2,51d11448d9bc4d5e8ad64cc3606c7740,74de0f823949466494fd7fb33a51e40e,59766.915511
3,d6a72b76c2ad44d9b54d522cbcd42a7b,74de0f823949466494fd7fb33a51e40e,36228.136096
4,87ceb07e4e1142a9b43cf3a9db21d567,74de0f823949466494fd7fb33a51e40e,38478.969693


In [347]:
merge_df = general_state_df.merge(transformations_df)
merge_df.head()

Unnamed: 0,dataset_id,transformation_id,rows,columns,feature_columns_size,target_mean,target_var,target_skew,target_perc_unique,target_kurtosis,mean_mean,mean_var,mean_skew,mean_perc_unique,mean_kurtosis,var_mean,var_var,var_skew,var_perc_unique,var_kurtosis,skew_mean,skew_var,skew_skew,skew_perc_unique,skew_kurtosis,perc_unique_mean,perc_unique_var,perc_unique_skew,perc_unique_perc_unique,perc_unique_kurtosis,kurtosis_mean,kurtosis_var,kurtosis_skew,kurtosis_perc_unique,kurtosis_kurtosis,perc_of_values_mode_mean,perc_of_values_mode_var,perc_of_values_mode_skew,perc_of_values_mode_perc_unique,perc_of_values_mode_kurtosis,target_slope_mean,target_slope_var,target_slope_skew,target_slope_perc_unique,target_slope_kurtosis,target_r_value_mean,target_r_value_var,target_r_value_skew,target_r_value_perc_unique,target_r_value_kurtosis,target_p_value_mean,target_p_value_var,target_p_value_skew,target_p_value_perc_unique,target_p_value_kurtosis,nunique_mean,nunique_var,nunique_skew,nunique_perc_unique,nunique_kurtosis,target_f_stat_mean,target_f_stat_var,target_f_stat_skew,target_f_stat_perc_unique,target_f_stat_kurtosis,transformation_type,input_column_0_target,input_column_0_type,input_column_0_mean,input_column_0_var,input_column_0_skew,input_column_0_perc_unique,input_column_0_kurtosis,input_column_0_perc_of_values_mode,input_column_0_target_slope,input_column_0_target_r_value,input_column_0_target_p_value,input_column_1_target,input_column_1_type,input_column_1_mean,input_column_1_var,input_column_1_skew,input_column_1_perc_unique,input_column_1_kurtosis,input_column_1_perc_of_values_mode,input_column_1_target_slope,input_column_1_target_r_value,input_column_1_target_p_value,input_column_0_nunique,input_column_0_target_f_stat,max_features,norm,analyzer,max_df,binary,use_idf,ngram_range,num_of_output_columns
0,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,sum,0,numeric,-331.248575,-0.704291,-0.704291,0.000525,-1.504039,0.333933,-8.964766,-0.062599,4.675794e-13,0.0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,
1,260f8a9a8994498982d42b7eed231a7b,baa100d3074540338e44efaff8085d94,13332,125,125,0.008,11.18034,11.18034,0.008,120.008065,1738.697457,6.479498,6.479498,0.328,37.01467,-2.648222,2.435777,2.435777,0.328,8.866827,-2.648222,2.435777,2.435777,0.328,8.866827,0.004397,6.479998,6.479998,0.04,37.018569,14.398459,5.379448,5.379448,0.328,28.630461,0.364677,0.731565,0.731565,0.896,-0.157388,2.785495,-0.6861,-0.6861,0.328,-1.222447,0.022044,5.654107,5.654107,0.328,30.855814,0.036889,5.262174,5.262174,0.952,25.377291,411.53012,3.831147,3.831147,0.248,14.60629,38.765662,6.121918,6.121918,0.656,42.356203,dictionary_encode,0,string,,,,,,0.381113,,,0.006862342,,,,,,,,,,,,7.0,2.961435,,,,,,,,
2,260f8a9a8994498982d42b7eed231a7b,80fa7737dd3142919695ede8a8fcb8be,13332,126,126,0.007937,11.224972,11.224972,0.007937,121.008,1698.298729,6.556145,6.556145,0.333333,38.013552,-2.567062,2.276786,2.276786,0.333333,7.994429,-2.567062,2.276786,2.276786,0.333333,7.994429,0.004305,6.556669,6.556669,0.047619,38.017691,14.056377,5.426552,5.426552,0.333333,29.270862,0.364807,0.732505,0.732505,0.888889,-0.136692,-24.286229,-6.532504,-6.532504,0.333333,37.825256,0.020873,5.718449,5.718449,0.333333,31.665242,0.036605,5.28492,5.28492,0.952381,25.619386,411.53012,3.831147,3.831147,0.246032,14.60629,38.765662,6.121918,6.121918,0.650794,42.356203,TextBOWVectorizer,0,string,,,,,,0.578158,,,2.225692e-06,,,,,,,,,,,,4.0,9.681076,36.0,l2,word,0.174263,True,False,"[1, 4]",
3,260f8a9a8994498982d42b7eed231a7b,d340de3a6f8f4d91a0f91ce2241be1b1,13332,162,162,0.006173,12.727922,12.727922,0.006173,157.006211,924.394273,8.885577,8.885577,0.277778,73.98341,-0.899716,0.583056,0.583056,0.277778,1.002514,-0.899716,0.583056,0.583056,0.277778,1.002514,0.002355,8.882765,8.882765,0.049383,73.95167,8.195573,6.615734,6.615734,0.277778,48.82793,0.500104,0.425449,0.425449,0.709877,-1.250925,94.624441,-5.844324,-5.844324,0.271605,36.738532,0.011284,7.768826,7.768826,0.277778,62.007701,0.230379,1.259524,1.259524,0.759259,-0.378646,411.53012,3.831147,3.831147,0.191358,14.60629,38.765662,6.121918,6.121918,0.506173,42.356203,TextBOWVectorizer,0,string,,,,,,0.265302,,,1.060653e-60,,,,,,,,,,,,2956.0,1.590571,13.0,l2,word,0.931799,False,True,"[2, 4]",
4,260f8a9a8994498982d42b7eed231a7b,6c8b45cefea34f6b9496b182a0ec34c2,13332,175,175,0.005714,13.228757,13.228757,0.005714,170.005747,793.779121,9.588688,9.588688,0.268571,86.974656,-0.431375,0.202865,0.202865,0.268571,0.262808,-0.431375,0.202865,0.202865,0.268571,0.262808,0.002043,9.585523,9.585523,0.051429,86.935787,7.581935,7.152731,7.152731,0.268571,57.562202,0.528323,0.238117,0.238117,0.668571,-1.440135,2591.752446,1.108752,1.108752,0.262857,1.04432,0.011985,8.318669,8.318669,0.268571,72.228623,0.215011,1.378426,1.378426,0.714286,-0.064908,411.53012,3.831147,3.831147,0.177143,14.60629,38.765662,6.121918,6.121918,0.468571,42.356203,sum,0,numeric,-147.778353,-1.946427,-1.946427,0.000525,1.787892,0.15129,5.215961,0.027694,0.001383772,0.0,numeric,0.0,0.0,0.0,0.0,-3.0,1.0,,0.0,1.0,,,,,,,,,,


In [348]:
merge_df = merge_df.merge(result_df)
merge_df.head()

Unnamed: 0,dataset_id,transformation_id,rows,columns,feature_columns_size,target_mean,target_var,target_skew,target_perc_unique,target_kurtosis,mean_mean,mean_var,mean_skew,mean_perc_unique,mean_kurtosis,var_mean,var_var,var_skew,var_perc_unique,var_kurtosis,skew_mean,skew_var,skew_skew,skew_perc_unique,skew_kurtosis,perc_unique_mean,perc_unique_var,perc_unique_skew,perc_unique_perc_unique,perc_unique_kurtosis,kurtosis_mean,kurtosis_var,kurtosis_skew,kurtosis_perc_unique,kurtosis_kurtosis,perc_of_values_mode_mean,perc_of_values_mode_var,perc_of_values_mode_skew,perc_of_values_mode_perc_unique,perc_of_values_mode_kurtosis,target_slope_mean,target_slope_var,target_slope_skew,target_slope_perc_unique,target_slope_kurtosis,target_r_value_mean,target_r_value_var,target_r_value_skew,target_r_value_perc_unique,target_r_value_kurtosis,target_p_value_mean,target_p_value_var,target_p_value_skew,target_p_value_perc_unique,target_p_value_kurtosis,nunique_mean,nunique_var,nunique_skew,nunique_perc_unique,nunique_kurtosis,target_f_stat_mean,target_f_stat_var,target_f_stat_skew,target_f_stat_perc_unique,target_f_stat_kurtosis,transformation_type,input_column_0_target,input_column_0_type,input_column_0_mean,input_column_0_var,input_column_0_skew,input_column_0_perc_unique,input_column_0_kurtosis,input_column_0_perc_of_values_mode,input_column_0_target_slope,input_column_0_target_r_value,input_column_0_target_p_value,input_column_1_target,input_column_1_type,input_column_1_mean,input_column_1_var,input_column_1_skew,input_column_1_perc_unique,input_column_1_kurtosis,input_column_1_perc_of_values_mode,input_column_1_target_slope,input_column_1_target_r_value,input_column_1_target_p_value,input_column_0_nunique,input_column_0_target_f_stat,max_features,norm,analyzer,max_df,binary,use_idf,ngram_range,num_of_output_columns,model_id,validation_metrics
0,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,sum,0,numeric,-331.248575,-0.704291,-0.704291,0.000525,-1.504039,0.333933,-8.964766,-0.062599,4.675794e-13,0.0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,21c63271de524214967f40336aed68d5,38163.345054
1,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,sum,0,numeric,-331.248575,-0.704291,-0.704291,0.000525,-1.504039,0.333933,-8.964766,-0.062599,4.675794e-13,0.0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,227ad42c8e84480db5ce1fce8ca2f846,38183.198311
2,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,sum,0,numeric,-331.248575,-0.704291,-0.704291,0.000525,-1.504039,0.333933,-8.964766,-0.062599,4.675794e-13,0.0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,1472d7ca77ce412ab8d443c95e570923,38187.682821
3,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,sum,0,numeric,-331.248575,-0.704291,-0.704291,0.000525,-1.504039,0.333933,-8.964766,-0.062599,4.675794e-13,0.0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,95d166e820104ae6ad70522ce807bb2f,76732.803132
4,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,sum,0,numeric,-331.248575,-0.704291,-0.704291,0.000525,-1.504039,0.333933,-8.964766,-0.062599,4.675794e-13,0.0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,b4633c3faef44fb985abf8b496a67f21,38169.454395


In [349]:
set(model_df.columns)&set(merge_df.columns)

{'model_id'}

In [350]:
merge_df = merge_df.merge(model_df)
merge_df.head()


Unnamed: 0,dataset_id,transformation_id,rows,columns,feature_columns_size,target_mean,target_var,target_skew,target_perc_unique,target_kurtosis,mean_mean,mean_var,mean_skew,mean_perc_unique,mean_kurtosis,var_mean,var_var,var_skew,var_perc_unique,var_kurtosis,skew_mean,skew_var,skew_skew,skew_perc_unique,skew_kurtosis,perc_unique_mean,perc_unique_var,perc_unique_skew,perc_unique_perc_unique,perc_unique_kurtosis,kurtosis_mean,kurtosis_var,kurtosis_skew,kurtosis_perc_unique,kurtosis_kurtosis,perc_of_values_mode_mean,perc_of_values_mode_var,perc_of_values_mode_skew,perc_of_values_mode_perc_unique,perc_of_values_mode_kurtosis,target_slope_mean,target_slope_var,target_slope_skew,target_slope_perc_unique,target_slope_kurtosis,target_r_value_mean,target_r_value_var,target_r_value_skew,target_r_value_perc_unique,target_r_value_kurtosis,target_p_value_mean,target_p_value_var,target_p_value_skew,target_p_value_perc_unique,target_p_value_kurtosis,nunique_mean,nunique_var,nunique_skew,nunique_perc_unique,nunique_kurtosis,target_f_stat_mean,target_f_stat_var,target_f_stat_skew,target_f_stat_perc_unique,target_f_stat_kurtosis,transformation_type,input_column_0_target,input_column_0_type,input_column_0_mean,input_column_0_var,input_column_0_skew,input_column_0_perc_unique,input_column_0_kurtosis,input_column_0_perc_of_values_mode,input_column_0_target_slope,input_column_0_target_r_value,input_column_0_target_p_value,input_column_1_target,input_column_1_type,input_column_1_mean,input_column_1_var,input_column_1_skew,input_column_1_perc_unique,input_column_1_kurtosis,input_column_1_perc_of_values_mode,input_column_1_target_slope,input_column_1_target_r_value,input_column_1_target_p_value,input_column_0_nunique,input_column_0_target_f_stat,max_features,norm,analyzer,max_df,binary,use_idf,ngram_range,num_of_output_columns,model_id,validation_metrics,model_type,alpha,l1_ratio,objective,boosting_type,num_leaves,learning_rate,n_estimators
0,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,sum,0,numeric,-331.248575,-0.704291,-0.704291,0.000525,-1.504039,0.333933,-8.964766,-0.062599,4.675794e-13,0.0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,21c63271de524214967f40336aed68d5,38163.345054,ElasticNet,1.607837,0.600348,,,,,
1,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,sum,0,numeric,-331.248575,-0.704291,-0.704291,0.000525,-1.504039,0.333933,-8.964766,-0.062599,4.675794e-13,0.0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,21c63271de524214967f40336aed68d5,38199.956146,ElasticNet,1.607837,0.600348,,,,,
2,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,sum,0,numeric,-331.248575,-0.704291,-0.704291,0.000525,-1.504039,0.333933,-8.964766,-0.062599,4.675794e-13,0.0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,21c63271de524214967f40336aed68d5,38082.021069,ElasticNet,1.607837,0.600348,,,,,
3,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,sum,0,numeric,-331.248575,-0.704291,-0.704291,0.000525,-1.504039,0.333933,-8.964766,-0.062599,4.675794e-13,0.0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,21c63271de524214967f40336aed68d5,37589.436341,ElasticNet,1.607837,0.600348,,,,,
4,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,sum,0,numeric,-331.248575,-0.704291,-0.704291,0.000525,-1.504039,0.333933,-8.964766,-0.062599,4.675794e-13,0.0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,21c63271de524214967f40336aed68d5,38197.062348,ElasticNet,1.607837,0.600348,,,,,


In [353]:
result_df[(result_df['dataset_id'] == '260f8a9a8994498982d42b7eed231a7b')&(result_df['model_id'] == '21c63271de524214967f40336aed68d5')]

Unnamed: 0,model_id,dataset_id,validation_metrics
0,21c63271de524214967f40336aed68d5,260f8a9a8994498982d42b7eed231a7b,38163.345054
10,21c63271de524214967f40336aed68d5,260f8a9a8994498982d42b7eed231a7b,38199.956146
20,21c63271de524214967f40336aed68d5,260f8a9a8994498982d42b7eed231a7b,38082.021069
30,21c63271de524214967f40336aed68d5,260f8a9a8994498982d42b7eed231a7b,37589.436341
40,21c63271de524214967f40336aed68d5,260f8a9a8994498982d42b7eed231a7b,38197.062348
50,21c63271de524214967f40336aed68d5,260f8a9a8994498982d42b7eed231a7b,37431.546144
60,21c63271de524214967f40336aed68d5,260f8a9a8994498982d42b7eed231a7b,37581.787445
70,21c63271de524214967f40336aed68d5,260f8a9a8994498982d42b7eed231a7b,37893.186062
80,21c63271de524214967f40336aed68d5,260f8a9a8994498982d42b7eed231a7b,38193.276718
90,21c63271de524214967f40336aed68d5,260f8a9a8994498982d42b7eed231a7b,37785.907866


In [354]:
result_df.shape

(1375, 3)

In [352]:
merge_df[['dataset_id', 'transformation_id', 'model_id', 'validation_metrics']].head(20)

Unnamed: 0,dataset_id,transformation_id,model_id,validation_metrics
0,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,21c63271de524214967f40336aed68d5,38163.345054
1,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,21c63271de524214967f40336aed68d5,38199.956146
2,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,21c63271de524214967f40336aed68d5,38082.021069
3,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,21c63271de524214967f40336aed68d5,37589.436341
4,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,21c63271de524214967f40336aed68d5,38197.062348
5,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,21c63271de524214967f40336aed68d5,37431.546144
6,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,21c63271de524214967f40336aed68d5,37581.787445
7,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,21c63271de524214967f40336aed68d5,37893.186062
8,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,21c63271de524214967f40336aed68d5,38193.276718
9,260f8a9a8994498982d42b7eed231a7b,1c7a2a6e7ae14c06b025ffb1659d73e4,21c63271de524214967f40336aed68d5,37785.907866


In [326]:
merge_df = merge_df.drop(['model_id', 'dataset_id', 'transformation_id'], axis = 1)

In [327]:
merge_df.head()

Unnamed: 0,validation_metrics,transformation_type,input_column_0_target,input_column_0_type,input_column_0_mean,input_column_0_var,input_column_0_skew,input_column_0_perc_unique,input_column_0_kurtosis,input_column_0_perc_of_values_mode,input_column_0_target_slope,input_column_0_target_r_value,input_column_0_target_p_value,input_column_1_target,input_column_1_type,input_column_1_mean,input_column_1_var,input_column_1_skew,input_column_1_perc_unique,input_column_1_kurtosis,input_column_1_perc_of_values_mode,input_column_1_target_slope,input_column_1_target_r_value,input_column_1_target_p_value,num_of_output_columns,input_column_0_nunique,input_column_0_target_f_stat,max_features,norm,analyzer,max_df,binary,use_idf,ngram_range,rows,columns,feature_columns_size,target_mean,target_var,target_skew,target_perc_unique,target_kurtosis,mean_mean,mean_var,mean_skew,mean_perc_unique,mean_kurtosis,var_mean,var_var,var_skew,var_perc_unique,var_kurtosis,skew_mean,skew_var,skew_skew,skew_perc_unique,skew_kurtosis,perc_unique_mean,perc_unique_var,perc_unique_skew,perc_unique_perc_unique,perc_unique_kurtosis,kurtosis_mean,kurtosis_var,kurtosis_skew,kurtosis_perc_unique,kurtosis_kurtosis,perc_of_values_mode_mean,perc_of_values_mode_var,perc_of_values_mode_skew,perc_of_values_mode_perc_unique,perc_of_values_mode_kurtosis,target_slope_mean,target_slope_var,target_slope_skew,target_slope_perc_unique,target_slope_kurtosis,target_r_value_mean,target_r_value_var,target_r_value_skew,target_r_value_perc_unique,target_r_value_kurtosis,target_p_value_mean,target_p_value_var,target_p_value_skew,target_p_value_perc_unique,target_p_value_kurtosis,nunique_mean,nunique_var,nunique_skew,nunique_perc_unique,nunique_kurtosis,target_f_stat_mean,target_f_stat_var,target_f_stat_skew,target_f_stat_perc_unique,target_f_stat_kurtosis,model_type,alpha,l1_ratio,objective,boosting_type,num_leaves,learning_rate,n_estimators
0,38089.386057,standard_scaler,0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,,,,,,,,,,,,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,ElasticNet,0.537285,0.23927,,,,,
1,38272.772091,standard_scaler,0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,,,,,,,,,,,,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,ElasticNet,0.537285,0.23927,,,,,
2,38178.645415,standard_scaler,0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,,,,,,,,,,,,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,ElasticNet,0.537285,0.23927,,,,,
3,37523.111658,standard_scaler,0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,,,,,,,,,,,,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,ElasticNet,0.537285,0.23927,,,,,
4,38099.822809,standard_scaler,0,numeric,-147.807381,-1.946423,-1.946423,0.000525,1.787886,0.15129,5.227856,0.027757,0.00135,,,,,,,,,,,,,,,,,,,,,,13332,124,124,0.008065,11.135529,11.135529,0.008065,119.00813,1792.789004,6.402077,6.402077,0.322581,36.016924,-2.690683,2.491526,2.491526,0.322581,8.988515,-2.690683,2.491526,2.491526,0.322581,8.988515,0.004468,6.402689,6.402689,0.032258,36.021642,14.754817,5.331932,5.331932,0.322581,27.988896,0.3669,0.729624,0.729624,0.895161,-0.15619,2.936962,-0.735893,-0.735893,0.322581,-1.180441,0.02335,5.590404,5.590404,0.322581,30.057257,0.037184,5.239341,5.239341,0.951613,25.13529,411.53012,3.831147,3.831147,0.25,14.60629,38.765662,6.121918,6.121918,0.66129,42.356203,ElasticNet,0.537285,0.23927,,,,,


In [328]:
x = merge_df.drop('validation_metrics', axis = 1)
y = merge_df['validation_metrics']


In [307]:
merge_df.to_csv(r'C:\Users\TristanDelforge\Documents\data_files\metadata1.csv', index = False)


In [308]:
corr = merge_df.corr()
corr2 = corr.applymap(lambda x: x*x)
corr2

Unnamed: 0,validation_metrics,input_column_0_target,input_column_0_mean,input_column_0_var,input_column_0_skew,input_column_0_perc_unique,input_column_0_kurtosis,input_column_0_perc_of_values_mode,input_column_0_target_slope,input_column_0_target_r_value,input_column_0_target_p_value,input_column_1_target,input_column_1_mean,input_column_1_var,input_column_1_skew,input_column_1_perc_unique,input_column_1_kurtosis,input_column_1_perc_of_values_mode,input_column_1_target_slope,input_column_1_target_r_value,input_column_1_target_p_value,num_of_output_columns,input_column_0_nunique,input_column_0_target_f_stat,max_features,max_df,rows,columns,feature_columns_size,target_mean,target_var,target_skew,target_perc_unique,target_kurtosis,mean_mean,mean_var,mean_skew,mean_perc_unique,mean_kurtosis,var_mean,var_var,var_skew,var_perc_unique,var_kurtosis,skew_mean,skew_var,skew_skew,skew_perc_unique,skew_kurtosis,perc_unique_mean,perc_unique_var,perc_unique_skew,perc_unique_perc_unique,perc_unique_kurtosis,kurtosis_mean,kurtosis_var,kurtosis_skew,kurtosis_perc_unique,kurtosis_kurtosis,perc_of_values_mode_mean,perc_of_values_mode_var,perc_of_values_mode_skew,perc_of_values_mode_perc_unique,perc_of_values_mode_kurtosis,target_slope_mean,target_slope_var,target_slope_skew,target_slope_perc_unique,target_slope_kurtosis,target_r_value_mean,target_r_value_var,target_r_value_skew,target_r_value_perc_unique,target_r_value_kurtosis,target_p_value_mean,target_p_value_var,target_p_value_skew,target_p_value_perc_unique,target_p_value_kurtosis,nunique_mean,nunique_var,nunique_skew,nunique_perc_unique,nunique_kurtosis,target_f_stat_mean,target_f_stat_var,target_f_stat_skew,target_f_stat_perc_unique,target_f_stat_kurtosis,alpha,l1_ratio,num_leaves,learning_rate,n_estimators
validation_metrics,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.009801443,0.004852457,0.0,0.0,0.0
input_column_0_target,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
input_column_0_mean,0.0,,1.0,0.007587989,0.007587989,0.009507109,0.0002884023,0.00645132,3.28156e-05,0.007709157,0.000590891,,5.275695e-05,1.168524e-05,1.168524e-05,3.176914e-05,6.858006e-05,9.592378e-05,6.971294e-05,0.001146993,0.0004215869,0.0225263,,,,,,6.992619e-05,6.992619e-05,0.0005322993,0.0001495217,0.0001495217,0.0005322993,6.991868e-05,1.084727e-05,0.001942997,0.001942997,0.0003988328,0.001693356,2.431718e-05,0.002275094,0.002275094,0.0005335454,0.001366404,2.431718e-05,0.002275094,0.002275094,0.0005335454,0.001366404,0.0007665178,0.0004953207,0.0004953207,0.001695209,0.0001954924,0.0003779708,0.0004975896,0.0004975896,0.0004857162,0.0008316504,0.0002050361,1.149667e-05,1.149667e-05,0.0001606026,4.505959e-05,5.879319e-05,0.001447176,0.001447176,0.0004911152,0.0002763538,2.493704e-06,4.704392e-06,4.704392e-06,0.0004721091,2.030171e-06,0.0009715088,0.002121488,0.002121488,8.05556e-05,0.003279781,8.817578e-29,3.331138e-13,3.331138e-13,0.0005322993,8.983422e-29,7.230565e-13,4.733122e-11,4.733122e-11,0.0005322993,7.596474e-29,0.0001018291,0.0005539173,0.001209358,0.001352031,0.0002765042
input_column_0_var,0.0,,0.007587989,1.0,1.0,0.01333791,0.03609877,0.4226829,0.004334479,0.08448053,0.06846254,,1.272492e-05,0.04545198,0.04545198,0.01406088,0.008676739,0.09272694,0.004972902,0.004391245,0.0185266,0.09216115,,,,,,0.145169,0.145169,0.1754917,0.1551772,0.1551772,0.1754917,0.1451678,9.751203e-07,0.1075035,0.1075035,0.01355626,0.1158833,0.2312225,0.0633789,0.0633789,0.01302181,0.01245674,0.2312225,0.0633789,0.0633789,0.01302181,0.01245674,0.01959734,0.111452,0.111452,0.05513614,0.1212152,0.02817415,0.007396542,0.007396542,0.01208954,0.002376064,0.1571683,0.1439139,0.1439139,0.09215099,0.1562493,0.006292428,0.0003382971,0.0003382971,0.01340647,0.002363299,0.02646289,0.05225649,0.05225649,0.01381952,0.08243529,0.04036997,0.05759539,0.05759539,0.08273175,0.04877249,4.13292e-28,4.74661e-12,4.74661e-12,0.1754917,9.148081e-28,1.2031e-10,5.59824e-10,5.59824e-10,0.1754917,1.105863e-28,0.0002963333,0.0001583737,0.0001358534,0.000990454,0.0006422972
input_column_0_skew,0.0,,0.007587989,1.0,1.0,0.01333791,0.03609877,0.4226829,0.004334479,0.08448053,0.06846254,,1.272492e-05,0.04545198,0.04545198,0.01406088,0.008676739,0.09272694,0.004972902,0.004391245,0.0185266,0.09216115,,,,,,0.145169,0.145169,0.1754917,0.1551772,0.1551772,0.1754917,0.1451678,9.751203e-07,0.1075035,0.1075035,0.01355626,0.1158833,0.2312225,0.0633789,0.0633789,0.01302181,0.01245674,0.2312225,0.0633789,0.0633789,0.01302181,0.01245674,0.01959734,0.111452,0.111452,0.05513614,0.1212152,0.02817415,0.007396542,0.007396542,0.01208954,0.002376064,0.1571683,0.1439139,0.1439139,0.09215099,0.1562493,0.006292428,0.0003382971,0.0003382971,0.01340647,0.002363299,0.02646289,0.05225649,0.05225649,0.01381952,0.08243529,0.04036997,0.05759539,0.05759539,0.08273175,0.04877249,4.13292e-28,4.74661e-12,4.74661e-12,0.1754917,9.148081e-28,1.2031e-10,5.59824e-10,5.59824e-10,0.1754917,1.105863e-28,0.0002963333,0.0001583737,0.0001358534,0.000990454,0.0006422972
input_column_0_perc_unique,0.0,,0.009507109,0.01333791,0.01333791,1.0,0.002811085,0.005391052,0.03387392,0.002355455,0.004223164,,0.0001145208,0.0002822946,0.0002822946,0.172326,0.001221467,0.008713218,0.1002676,0.001890641,0.003611756,1.184096e-05,,,,,,0.006050207,0.006050207,0.009084372,0.007104074,0.007104074,0.009084372,0.006050112,7.191603e-05,0.004369615,0.004369615,0.002278699,0.004145034,0.004792629,0.01184993,0.01184993,0.002073548,0.004883422,0.004792629,0.01184993,0.01184993,0.002073548,0.004883422,0.1409004,0.0270046,0.0270046,0.15068,0.009902075,0.0007265401,0.0005304997,0.0005304997,0.001754202,5.606729e-06,8.295768e-05,0.0001302653,0.0001302653,0.003803335,3.131009e-05,0.0005076242,0.003839406,0.003839406,0.001956195,0.0001987559,0.0004176888,0.002444474,0.002444474,0.001811903,0.003532124,6.973827e-05,2.827761e-05,2.827761e-05,0.003354822,1.272309e-05,7.070988999999999e-26,4.946754e-12,4.946754e-12,0.009084372,7.557942e-26,1.977365e-12,5.05954e-14,5.05954e-14,0.009084372,6.640223e-26,0.0008621813,9.419674e-05,0.0005026856,1.874872e-05,0.0006954078
input_column_0_kurtosis,0.0,,0.0002884023,0.03609877,0.03609877,0.002811085,1.0,0.001758136,0.0007814126,0.05853213,0.02397815,,0.0002044466,0.001627943,0.001627943,0.00135457,0.002106048,8.022159e-07,1.509983e-07,4.743965e-05,0.01359777,0.01362298,,,,,,0.001875752,0.001875752,0.001295859,0.001728458,0.001728458,0.001295859,0.001875766,9.412998e-06,0.0001961572,0.0001961572,0.06412315,0.0006775047,0.05412267,0.0008243571,0.0008243571,0.06691834,0.001314136,0.05412267,0.0008243571,0.0008243571,0.06691834,0.001314136,0.004958798,0.003070234,0.003070234,0.02147987,0.002982169,0.05071379,0.0724441,0.0724441,0.06738334,0.04963779,0.000156125,0.0004791085,0.0004791085,0.02002965,1.55163e-05,0.0006106979,5.133787e-05,5.133787e-05,0.0715333,0.001378776,0.003540403,0.002280425,0.002280425,0.07036701,0.002470869,0.02149031,0.009910523,0.009910523,0.02179685,0.006376404,3.5619840000000003e-28,2.560521e-11,2.560521e-11,0.001295859,1.267792e-29,6.71505e-13,1.798318e-11,1.798318e-11,0.001295859,2.3821980000000003e-28,0.0001316569,0.0001670622,0.0002610242,0.000719556,4.81533e-06
input_column_0_perc_of_values_mode,0.0,,0.00645132,0.4226829,0.4226829,0.005391052,0.001758136,1.0,0.00119529,0.01204047,0.3887793,,0.0007818903,0.04625732,0.04625732,0.002454143,0.005568056,0.1476202,0.000171921,0.001403828,0.09465525,0.00953839,0.2502257,0.01653072,0.07159742,0.02440251,,0.2046754,0.2046754,0.2358214,0.2162414,0.2162414,0.2358214,0.204674,0.0002251616,0.1630126,0.1630126,0.01146014,0.1670999,0.1425178,0.04824391,0.04824391,0.01287077,0.005553923,0.1425178,0.04824391,0.04824391,0.01287077,0.005553923,0.04496078,0.1644905,0.1644905,0.008752121,0.1768393,0.002496643,0.002707739,0.002707739,0.0134431,0.03163875,0.2483637,0.2105493,0.2105493,0.2013354,0.2356932,0.005633496,0.0006081697,0.0006081697,0.01340641,0.001691245,0.06441926,0.1453106,0.1453106,0.01194949,0.1794474,0.1879122,0.1855246,0.1855246,0.1974983,0.153841,3.491954e-26,7.105869e-13,7.105869e-13,0.2358214,3.891294e-26,7.798959e-11,5.438804e-08,5.438804e-08,0.2358214,3.9181429999999996e-26,0.0003538296,3.36577e-05,0.0006884259,0.0006129053,0.0009141812
input_column_0_target_slope,0.0,,3.28156e-05,0.004334479,0.004334479,0.03387392,0.0007814126,0.00119529,1.0,0.06977466,0.0003061145,,4.62062e-05,0.0007686317,0.0007686317,0.0631977,2.303884e-05,0.000871448,0.3239926,0.0109768,0.0004737061,0.02924218,,,,,,0.009227582,0.009227582,0.01097089,0.009970258,0.009970258,0.01097089,0.009227508,2.179444e-06,0.00993653,0.00993653,0.0004223239,0.01127771,0.005007877,0.003892934,0.003892934,0.0004117467,0.0008607901,0.005007877,0.003892934,0.003892934,0.0004117467,0.0008607901,0.02596269,3.692085e-05,3.692085e-05,0.06631066,0.0007794626,0.0001647316,0.0007191443,0.0007191443,0.0003547783,0.0003108343,5.163253e-05,5.636347e-05,5.636347e-05,0.008907285,0.004380037,0.01007644,0.007551904,0.007551904,0.0003405149,0.01670631,0.04976147,0.000122629,0.000122629,0.0004092876,0.00199278,0.001135743,0.001872587,0.001872587,0.006659152,0.00356923,5.231678e-29,5.353303e-13,5.353303e-13,0.01097089,2.694895e-29,6.474623e-12,4.913393e-12,4.913393e-12,0.01097089,6.574976e-29,0.0001256553,0.0001748848,0.001032412,7.579746e-07,6.812838e-07
input_column_0_target_r_value,0.0,,0.007709157,0.08448053,0.08448053,0.002355455,0.05853213,0.01204047,0.06977466,1.0,4.346651e-05,,0.002079879,0.0001178136,0.0001178136,0.0002166896,0.0004874939,0.0002022455,0.005144565,0.0006370351,0.0001736486,0.09064172,,,,,,0.002532446,0.002532446,0.002099723,0.002417297,0.002417297,0.002099723,0.002532453,0.0007919316,0.0007649656,0.0007649656,0.0001428229,0.001106279,0.002411291,4.001144e-05,4.001144e-05,0.0001151244,0.0005150346,0.002411291,4.001144e-05,4.001144e-05,0.0001151244,0.0005150346,1.533973e-05,0.001558865,0.001558865,0.0002093022,0.002045882,0.00118537,0.002466745,0.002466745,0.0001093428,0.003855842,0.001018622,0.0004886941,0.0004886941,0.00143964,0.0003833685,0.0007931487,0.009237781,0.009237781,0.0001995368,0.01010457,0.02675034,0.0005923962,0.0005923962,0.0001139296,0.0008979297,0.0003674871,0.00105344,0.00105344,0.001642142,0.001393119,6.712639e-29,5.140243e-12,5.140243e-12,0.002099723,6.062211e-29,1.263749e-13,5.199255e-11,5.199255e-11,0.002099723,7.674074e-29,0.0004166152,1.917969e-06,0.0001317457,0.0003395053,1.875699e-05


In [309]:
corr2.sort_values('validation_metrics', ascending = False)[['validation_metrics']]

Unnamed: 0,validation_metrics
alpha,0.009801
l1_ratio,0.004852
input_column_0_mean,0.0
perc_of_values_mode_var,0.0
target_slope_kurtosis,0.0
target_slope_perc_unique,0.0
target_slope_skew,0.0
target_slope_var,0.0
target_slope_mean,0.0
perc_of_values_mode_kurtosis,0.0
