In [1]:
# grid search params
from sklearn.utils import shuffle
import pandas as pd
import json

class grid_search_generator(object):
    def __init__(self, config_file_url='grid_search_config.json',
                 params=None, bindings=None, score_name_list=None, csv_url=None):
        new_generator = params != None and bindings != None and score_name_list != None
        if new_generator:
            self.params = params
            self.binding = bindings
            self.score_name_list = score_name_list
            
            self.single_keys = [key for key in self.params.keys() if key not in self.binding.keys()]
            self.binding_keys = list(self.binding.keys())
            self._terminate = False
            
            self._save_config(config_file_url)
        else:
            with open(config_file_url, 'r') as f:
                data = json.load(f)
                self.params = data['params']
                self.binding = data['binding']
                self.score_name_list = data['score_name_list']
                self.single_keys = data['single_keys']
                self.binding_keys = data['binding_keys']
                
        self.keys = list(self.single_keys)
        self.keys.extend(self.binding_keys)
        self.param_keys = list(self.single_keys)
        for i in range(len(self.single_keys), len(self.keys)):
            self.param_keys.extend(self.binding[self.keys[i]])
        
        self._terminate = False
        self._idx = [0 for i in range(len(self.keys))]
        self._ub = [len(self.params[key]) for key in self.keys]
        
        if csv_url != None: self.get_csv(csv_url)
    
    def _reset_config(self):
        self._terminate = False
        self._idx = [0 for i in range(len(self.keys))]
        self._ub = [len(self.params[key]) for key in self.keys]
    
    def _save_config(self, config_file_url):
        with open(config_file_url, 'w') as f:
            data = {
                'params': self.params,
                'binding': self.binding,
                'score_name_list': self.score_name_list,
                'single_keys': self.single_keys,
                'binding_keys': self.binding_keys
            }
            json.dump(data, f)
    
    def _next_idx(self):
        i = 0
        self._idx[i] = (self._idx[i] + 1) % self._ub[i]
        i += 1
        end_loop = (self._idx[i-1]!=0 or i==len(self._idx))
        terminate = (self._idx[i-1]==0 and i==len(self._idx))
        while(not end_loop):
            self._idx[i] = (self._idx[i] + 1) % self._ub[i]
            i += 1
            end_loop = (self._idx[i-1]!=0 or i==len(self._idx))
            terminate = (self._idx[i-1]==0 and i==len(self._idx))
        self._terminate = terminate

    def _next_param_list(self):
        if not self._terminate:
            input = [self.params[self.keys[i]][self._idx[i]] for i in range(len(self.single_keys))]
            for i in range(len(self.single_keys), len(self.keys)):
                input.extend([self.params[self.keys[i]][self._idx[i]][key] for key in self.binding[self.keys[i]]])
            self._next_idx()
            return input
        else: return None
        
    def _get_param_frame(self):
        param_list = []
        self._reset_config()
        param = self._next_param_list()
        while param != None:
            param_list.append(param)
            param = self._next_param_list()
        return pd.DataFrame(param_list, columns=self.param_keys)
        
    def get_csv(self, csv_url):
        self._get_param_frame().to_csv(csv_url, index=False)
        print('successfully generated grid search csv file\n')
    
    def _get_grid_search_param(keys, values): return dict(zip(keys, values))
    
    def _get_next_param(csv_url, param_list):
        df = shuffle(pd.read_csv(csv_url))
        if df.empty: return None
        param = grid_search_generator._get_grid_search_param(param_list, df.iloc[0].values)
        df.iloc[1 : ].to_csv(csv_url, index=False)
        return param
    
    def _recorder(values, record_csv, score_name_list, params, param_keys, label_list, confusion_matrices, initialize):
        head = list(param_keys)
        head.extend(score_name_list)
        if label_list != None: head.extend(label_list)
        
        content = []
        for x in param_keys: content.append(params[x])
        content.extend(values)
        if label_list != None:
            for x in label_list: content.append(confusion_matrices[x])
        if initialize:
            with open(record_csv, 'w') as f: pd.DataFrame([content], columns=head).to_csv(f, header=True, index=False)
        else:
            with open(record_csv, 'a') as f: pd.DataFrame([content], columns=head).to_csv(f, header=False, index=False)

        print_str = ''
        for i, j in enumerate(values): print_str += (score_name_list[i] + ':' + ('%.6f, ' % j))
        print_str = print_str[ : -2] + '\n'
        print(print_str)
    
    def add_params(csv_url, param_dict, config_file_url='grid_search_config.json'):
        for key in param_dict.keys():
            grid_search = grid_search_generator(config_file_url=config_file_url)
            original = list(grid_search.params[key])
            param_values = [value for value in param_dict[key] if value not in original]
            grid_search.params[key] = param_values
            with open(csv_url, 'a') as f: grid_search._get_param_frame().to_csv(f, header=False, index=False)
            grid_search.params[key].extend(original)
            grid_search._save_config(config_file_url)
        print('successfully append the new parameters')
        
    def delete_params(csv_url, param_dict, config_file_url='grid_search_config.json'):
        csv = pd.read_csv(csv_url)
        grid_search = grid_search_generator(config_file_url)
        
        for key in param_dict.keys():
            for value in param_dict[key]:
                csv = csv[csv[key] != value]
        with open(csv_url, 'w') as f: csv.to_csv(f, index=False)
        
        for key in param_dict.keys():
            if key in grid_search.single_keys:
                grid_search.params[key] = [value for value in grid_search.params[key] if value not in param_dict[key]]
            else:
                for bkey in grid_search.binding_keys:
                    if key in grid_search.binding[bkey]:
                        grid_search.params[bkey] = [value for value in grid_search.params[bkey] if value[key] not in param_dict[key]]
                        break
        grid_search._save_config(config_file_url)
        print('successfully delete the parameters')
    
    def search(remain_csv, record_csv, X, y, model_run, other_model_dependency_dict, label_list=None,
               config_file_url='grid_search_config.json', X_y_split=False, val_size=0.2, shuffle=True):
        grid_search = grid_search_generator(config_file_url)
        if X_y_split:
            X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_size, shuffle=shuffle)
        param = grid_search_generator._get_next_param(remain_csv, grid_search.param_keys)
        header = True
        while param != None:
            print(param)
            if X_y_split: scores = list(model_run(param, X_train, X_val, y_train, y_val, other_model_dependency_dict))
            else: scores, confusion_matrices = model_run(param, X, y, other_model_dependency_dict)
            grid_search_generator._recorder(scores, record_csv, grid_search.score_name_list, param,
                                            grid_search.param_keys, label_list, confusion_matrices, header)
            header = False
            param = grid_search_generator._get_next_param(remain_csv, grid_search.param_keys)
            
print('done')

done


In [1]:
from sklearn.utils import shuffle
import pandas as pd
import json

class grid_search(object):
    def __init__(self, params=None, csv_url=None, config_file_url='grid_search_config.json'):
        if params != None:
            self.params = params
            self._get_config(csv_url, config_file_url)
        else:
            with open(config_file_url, 'r') as f: self.params = json.load(f)['params']
    
    def _get_config_one_step(node):
        key_list = []
        value_list = []
        for key in node.keys():
            if isinstance(node[key], dict):
                keys, sub_value_list = grid_search._get_config_one_step(node[key])
                value_list.append(sub_value_list)
                key_list.extend([key + ':' + x for x in keys])
                node[key] = [node[key]]
            elif not isinstance(node[key], list):
                key_list.append(key)
                value_list.append([node[key]])
                node[key] = [node[key]]
            elif isinstance(node[key][0], dict):
                values = []
                keys = None
                for sub in node[key]:
                    keys, sub_value_list = grid_search._get_config_one_step(sub)
                    values.extend(sub_value_list)
                value_list.append(values)
                key_list.extend([key + ':' + x for x in keys])
            else:
                key_list.append(key)
                value_list.append(node[key])
        
        return key_list, grid_search._get_param_chart(value_list)
        
    def _get_param_chart(value_list):
        return_values = []
        idx = [0 for _ in value_list]
        ub = [len(x) for x in value_list]
        terminate = False
        while not terminate:
            param = []
            for i, value in enumerate(value_list):
                if isinstance(value[idx[i]], list): param.extend(value[idx[i]])
                else: param.append(value[idx[i]])
            return_values.append(param)
            idx, terminate = grid_search._next_idx(idx, ub)
        return return_values
    
    def _next_idx(idx, ub):
        i = 0
        idx[i] = (idx[i] + 1) % ub[i]
        i += 1
        end_loop = (idx[i-1]!=0 or i==len(idx))
        terminate = (idx[i-1]==0 and i==len(idx))
        while(not end_loop):
            idx[i] = (idx[i] + 1) % ub[i]
            i += 1
            end_loop = (idx[i-1]!=0 or i==len(idx))
            terminate = (idx[i-1]==0 and i==len(idx))
        return idx, terminate
    
    def _get_config(self, csv_url, config_file_url):
        keys, values = grid_search._get_config_one_step(self.params)
        data = {'params': self.params, 'keys': keys}
        with open(config_file_url, 'w') as f: json.dump(data, f)
        if csv_url != None: pd.DataFrame(values, columns=keys).to_csv(csv_url, index=False, mode='w')
    
    def _next_param_from_csv(csv_url, param_keys):
        df = shuffle(pd.read_csv(csv_url))
        if df.empty: return None
        param = dict(zip(param_keys, df.loc[0, param_keys]))
        df.iloc[1 : ].to_csv(csv_url, index=False, mode='w')
        return param
    
    def _recorder(record_csv, score_name, scores, param, param_keys, initialize):
        head = list(param_keys)
        head.extend(score_name)
        
        content = []
        for x in param_keys: content.append(param[x])
        content.extend(scores)
        
        if initialize: pd.DataFrame([content], columns=head).to_csv(record_csv, header=True, index=False, mode='w')
        else: pd.DataFrame([content], columns=head).to_csv(record_csv, header=False, index=False, mode='a')

        print_str = ''
        for i, j in enumerate(scores): print_str += (score_name[i] + ':' + str(j) + ', ')
        print_str = print_str[ : -2] + '\n'
        print(print_str)
        
    def _delete_params_one_step(params, key_list, values):
        if len(key_list) == 1:
            for p in params:
                p[key_list[0]] = [v for v in p[key_list[0]] if v not in values]
        else:
            for x in params:
                grid_search._delete_params_one_step(x[key_list[0]], key_list[1:], values)
        
    def add_params(remain_csv, params, config_file_url='grid_search_config.json'):
        pass
    
    def delete_params(remain_csv, param_dict, config_file_url='grid_search_config.json'):
        csv = pd.read_csv(remain_csv)
        with open(config_file_url, 'r') as f: data = json.load(f)
        params = data['params']
        for key in param_dict.keys():
            values = param_dict[key]
            if not isinstance(values, list):
                csv = csv[csv[key] != values]
                values = [values]
            else:
                for value in values:
                    csv = csv[csv[key] != value]
            
            key_list = key.split(':')
            grid_search._delete_params_one_step([params], key_list, values)
        
        csv.to_csv(remain_csv, index=False, mode='w')
        with open(config_file_url, 'w') as f: json.dump(data, f)
        print('delete params successfully')
    
    def search(remain_csv, record_csv, X, y, model_run, score_name, dependency=None,
               config_file_url='grid_search_config.json', X_y_split=False, val_size=0.2, shuffle=True):
        if X_y_split: X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_size, shuffle=shuffle)
        with open(config_file_url, 'r') as f: param_keys = json.load(f)['keys']
        param = grid_search._next_param_from_csv(remain_csv, param_keys)
        header = True
        while param != None:
            print(param == None)
            print(param)
            if X_y_split: scores = model_run(param, X_train, X_val, y_train, y_val, dependency)
            else: scores = model_run(param, X, y, dependency)
            grid_search._recorder(record_csv, score_name, scores, param, param_keys, header)
            header = False
            param = grid_search._next_param_from_csv(remain_csv, param_keys)
        
print('done')

done


In [4]:
import numpy as np

def model_run(param, X, y, dependency):
    cm = {'toxic': np.array([[1,2], [3,4]]), 'nontoxic': np.array([[5,6], [7,8]])}
    return [1.0,cm]

params = {'cnn':[{
    'has':True, 'u':[1,2]
}, {'has':False, 'u':0}]
        }

grid_search_csv_url = 'a.csv'
grid_search_result_csv_url = 'b.csv'
grid_search(params=params, csv_url=grid_search_csv_url)

# grid_search.search(remain_csv=grid_search_csv_url, record_csv=grid_search_result_csv_url,
#                    X=None, y=None, model_run=model_run, dependency=None, score_name=['val_loss', 'c_m'])

print(pd.read_csv(grid_search_csv_url))

# grid_search.delete_params(grid_search_csv_url, {'embedding_param:embed_size:b': 3})
# print()
# print(pd.read_csv(grid_search_csv_url))
# print()
# with open('grid_search_config.json', 'r') as f: print(json.load(f)['params'])

print('done')

   cnn:has  cnn:u
0     True      1
1     True      2
2    False      0
done
