In [26]:
import pandas as pd
import numpy as np
import datetime
import os

import xgboost as xgb
from tensorboardX import SummaryWriter

#from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

class TensorBoardCallback(xgb.callback.TrainingCallback):
    def __init__(self, experiment: str = None, data_name: str = None):
        self.experiment = experiment or "logs"
        self.data_name = data_name or "test"
        self.datetime_ = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        self.log_dir = f"runs/{self.experiment}/{self.datetime_}"
        self.train_writer = SummaryWriter(log_dir=os.path.join(self.log_dir, "train/"))
        if self.data_name:
            self.test_writer = SummaryWriter(
                log_dir=os.path.join(self.log_dir, f"{self.data_name}/")
            )

    def after_iteration(
        self, model, epoch: int, evals_log: xgb.callback.TrainingCallback.EvalsLog
    ) -> bool:
        if not evals_log:
            return False

        for data, metric in evals_log.items():
            for metric_name, log in metric.items():
                score = log[-1][0] if isinstance(log[-1], tuple) else log[-1]
                if data == "train":
                    self.train_writer.add_scalar(metric_name, score, epoch)
                else:
                    self.test_writer.add_scalar(metric_name, score, epoch)

        return False



In [27]:
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep=r"\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

In [28]:
data

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
        4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
        9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
        4.0300e+00],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        5.6400e+00],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
        6.4800e+00],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        7.8800e+00]])

In [29]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=100)

dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

params = {'objective':'reg:squarederror', 'eval_metric': 'rmse'}



In [30]:

params = {'objective':'reg:squarederror', 'eval_metric': 'rmse'}

bst = xgb.train(params, dtrain, num_boost_round=100, evals=[(dtrain, 'train'), (dtest, 'test')],
        callbacks=[TensorBoardCallback(experiment='exp_1', data_name='test')])

[0]	train-rmse:6.72100	test-rmse:7.46559
[1]	train-rmse:5.06958	test-rmse:5.82633
[2]	train-rmse:3.90621	test-rmse:4.85099
[3]	train-rmse:3.07580	test-rmse:4.21755
[4]	train-rmse:2.47553	test-rmse:3.73335
[5]	train-rmse:2.02599	test-rmse:3.50519
[6]	train-rmse:1.68859	test-rmse:3.39425
[7]	train-rmse:1.45239	test-rmse:3.28750
[8]	train-rmse:1.28438	test-rmse:3.22570
[9]	train-rmse:1.16120	test-rmse:3.21039
[10]	train-rmse:1.05986	test-rmse:3.17595
[11]	train-rmse:0.95726	test-rmse:3.16765
[12]	train-rmse:0.88131	test-rmse:3.14554
[13]	train-rmse:0.81894	test-rmse:3.13116
[14]	train-rmse:0.77485	test-rmse:3.13649
[15]	train-rmse:0.72495	test-rmse:3.11582
[16]	train-rmse:0.69172	test-rmse:3.11919
[17]	train-rmse:0.66702	test-rmse:3.10890
[18]	train-rmse:0.62824	test-rmse:3.11224
[19]	train-rmse:0.57492	test-rmse:3.11683
[20]	train-rmse:0.54343	test-rmse:3.12190
[21]	train-rmse:0.51630	test-rmse:3.11994
[22]	train-rmse:0.48323	test-rmse:3.11870
[23]	train-rmse:0.44460	test-rmse:3.11712
[2