In [9]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV,KFold,cross_val_score
from sklearn.datasets import fetch_california_housing
import pandas as pd
import socket
from itertools import product
import numpy as np
import json

In [10]:
X,y = fetch_california_housing(return_X_y=True)

In [11]:
y

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

In [95]:
params = [
        {
        'model':'xgboost',
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
#         'subsample': [0.6, 0.8, 1.0],
#         'colsample_bytree': [0.6, 0.8, 1.0],
#         'max_depth': range(1, 11, 1),
#         'n_estimators':range(50, 400, 25),
        },
#         {
#         'model':'lightgbm',
#         'min_child_weight': [1, 5, 10],
#         'gamma': [0.5, 1, 1.5, 2, 5],
#         'subsample': [0.6, 0.8, 1.0],
#         'colsample_bytree': [0.6, 0.8, 1.0],
#         'max_depth': range(1, 11, 1),
#         'n_estimators':range(50, 425, 25),
#         },
]



def split_params(jobs_param,chunk):
    li = []
    for i,x in enumerate(jobs_param):
        model = x.pop('model')
        c_plane = [dict(zip(x, v)) for v in product(*x.values())]
        for y in c_plane:
            y.update( {"model":model})
        li+=c_plane
    
    split_list=np.array_split(li, chunk)
    json_list = [json.dumps(i.tolist()) for i in split_list]
    print(f"Total Param Length = {len(li)}, Each chunk = {len(split_list[0])}" )
    return json_list

split = split_params(params,1)


Total Param Length = 15, Each chunk = 15


In [98]:
import pika

connection = pika.BlockingConnection(pika.ConnectionParameters(host="localhost"))
channel = connection.channel()

channel.queue_declare(queue="task_queue", durable=True)

for msg in split:
    channel.basic_publish(
        exchange="",
        routing_key="task_queue",
        body=msg,
        properties=pika.BasicProperties(delivery_mode=2),  # make message persistent
    )
    print(" [x] Sent %r" % msg)
connection.close()

 [x] Sent '[{"min_child_weight": 1, "gamma": 0.5, "model": "xgboost"}, {"min_child_weight": 1, "gamma": 1, "model": "xgboost"}, {"min_child_weight": 1, "gamma": 1.5, "model": "xgboost"}, {"min_child_weight": 1, "gamma": 2, "model": "xgboost"}, {"min_child_weight": 1, "gamma": 5, "model": "xgboost"}, {"min_child_weight": 5, "gamma": 0.5, "model": "xgboost"}, {"min_child_weight": 5, "gamma": 1, "model": "xgboost"}, {"min_child_weight": 5, "gamma": 1.5, "model": "xgboost"}, {"min_child_weight": 5, "gamma": 2, "model": "xgboost"}, {"min_child_weight": 5, "gamma": 5, "model": "xgboost"}, {"min_child_weight": 10, "gamma": 0.5, "model": "xgboost"}, {"min_child_weight": 10, "gamma": 1, "model": "xgboost"}, {"min_child_weight": 10, "gamma": 1.5, "model": "xgboost"}, {"min_child_weight": 10, "gamma": 2, "model": "xgboost"}, {"min_child_weight": 10, "gamma": 5, "model": "xgboost"}]'


In [176]:
import json
j_data = json.loads(split[0])

X,y = fetch_california_housing(return_X_y=True)

def trainer(data:list,folds = 10):
    for i in data:
        model = i.pop('model')
        if model =="xgboost":
            regressor = XGBRegressor(objective ='reg:squarederror',**i)
            kfold = KFold(n_splits=folds)
            results = cross_val_score(regressor, X, y, cv=kfold,n_jobs=-1)
            print(regressor)
            print(results.mean())
        elif model == "lightgbm":
            pass
        elif model == "catboost":
            pass
        else:
            print(f"model {model} is not supported")


In [177]:
trainer(j_data)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=0.6, gamma=0.5,
       importance_type='gain', learning_rate=0.1, max_delta_step=0,
       max_depth=1, min_child_weight=1, missing=None, n_estimators=50,
       n_jobs=1, nthread=None, objective='reg:squarederror',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=None, subsample=0.6, verbosity=1)
0.3876456166304725
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=0.6, gamma=0.5,
       importance_type='gain', learning_rate=0.1, max_delta_step=0,
       max_depth=1, min_child_weight=1, missing=None, n_estimators=75,
       n_jobs=1, nthread=None, objective='reg:squarederror',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=None, subsample=0.6, verbosity=1)
0.442291996739219
XGBRegressor(base_score=0.5, booster='gbtree', co

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=0.6, gamma=0.5,
       importance_type='gain', learning_rate=0.1, max_delta_step=0,
       max_depth=2, min_child_weight=1, missing=None, n_estimators=150,
       n_jobs=1, nthread=None, objective='reg:squarederror',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=None, subsample=0.6, verbosity=1)
0.5740705591012524
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=0.6, gamma=0.5,
       importance_type='gain', learning_rate=0.1, max_delta_step=0,
       max_depth=2, min_child_weight=1, missing=None, n_estimators=175,
       n_jobs=1, nthread=None, objective='reg:squarederror',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=None, subsample=0.6, verbosity=1)
0.5825329250479896
XGBRegressor(base_score=0.5, booster='gbtree',

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=0.6, gamma=0.5,
       importance_type='gain', learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=250,
       n_jobs=1, nthread=None, objective='reg:squarederror',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=None, subsample=0.6, verbosity=1)
0.6291224978628234
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=0.6, gamma=0.5,
       importance_type='gain', learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=275,
       n_jobs=1, nthread=None, objective='reg:squarederror',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=None, subsample=0.6, verbosity=1)
0.629626613993351
XGBRegressor(base_score=0.5, booster='gbtree', 

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=0.6, gamma=0.5,
       importance_type='gain', learning_rate=0.1, max_delta_step=0,
       max_depth=4, min_child_weight=1, missing=None, n_estimators=350,
       n_jobs=1, nthread=None, objective='reg:squarederror',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=None, subsample=0.6, verbosity=1)
0.6346814485408954
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=0.6, gamma=0.5,
       importance_type='gain', learning_rate=0.1, max_delta_step=0,
       max_depth=4, min_child_weight=1, missing=None, n_estimators=375,
       n_jobs=1, nthread=None, objective='reg:squarederror',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=None, subsample=0.6, verbosity=1)
0.6342065564515956
XGBRegressor(base_score=0.5, booster='gbtree',

KeyboardInterrupt: 

In [80]:
regressor = XGBRegressor(objective ='reg:squarederror',**json.loads(split[0])[0])

In [82]:
dir(regressor)

['_Booster',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_estimator_type',
 '_get_param_names',
 'apply',
 'base_score',
 'booster',
 'coef_',
 'colsample_bylevel',
 'colsample_bynode',
 'colsample_bytree',
 'evals_result',
 'feature_importances_',
 'fit',
 'gamma',
 'get_booster',
 'get_num_boosting_rounds',
 'get_params',
 'get_xgb_params',
 'importance_type',
 'intercept_',
 'kwargs',
 'learning_rate',
 'load_model',
 'max_delta_step',
 'max_depth',
 'min_child_weight',
 'missing',
 'n_estimators',
 'n_jobs',
 'nthread',
 'objective',
 'predict',
 'random_state',
 'reg_alpha',
 'reg_lambda',
 'save_model',
 'scale_pos_weight',


In [88]:
pd.DataFrame([regressor.get_params()])

Unnamed: 0,base_score,booster,colsample_bylevel,colsample_bynode,colsample_bytree,gamma,importance_type,learning_rate,max_delta_step,max_depth,...,objective,random_state,reg_alpha,reg_lambda,scale_pos_weight,seed,silent,subsample,verbosity,model
0,0.5,gbtree,1,1,0.6,0.5,gain,0.1,0,1,...,reg:squarederror,0,0,1,1,,,0.6,1,xgboost


In [87]:
regressor.get_params()

{'base_score': 0.5,
 'booster': 'gbtree',
 'colsample_bylevel': 1,
 'colsample_bynode': 1,
 'colsample_bytree': 0.6,
 'gamma': 0.5,
 'importance_type': 'gain',
 'learning_rate': 0.1,
 'max_delta_step': 0,
 'max_depth': 1,
 'min_child_weight': 1,
 'missing': None,
 'n_estimators': 100,
 'n_jobs': 1,
 'nthread': None,
 'objective': 'reg:squarederror',
 'random_state': 0,
 'reg_alpha': 0,
 'reg_lambda': 1,
 'scale_pos_weight': 1,
 'seed': None,
 'silent': None,
 'subsample': 0.6,
 'verbosity': 1,
 'model': 'xgboost'}