http://joelgrus.com/2016/05/23/fizz-buzz-in-tensorflow/

In [1]:
import numpy as np
import lightgbm as lgb

In [2]:
def binary_encode(i, num_digits):
    return np.array([i >> d & 1 for d in range(num_digits)])

def fizz_buzz_encode(i):
    if   i % 15 == 0: return 3
    elif i % 5  == 0: return 2
    elif i % 3  == 0: return 1
    else:             return 0

def fizz_buzz(i, prediction):
    return [str(i), "fizz", "buzz", "fizzbuzz"][prediction]


In [3]:
NUM_DIGITS = 10
NUM_DATA = 15
X = np.array([binary_encode(i, NUM_DIGITS) for i in range(101, 2 ** NUM_DATA)])
y = np.array([fizz_buzz_encode(i) for i in range(101, 2 ** NUM_DATA)])

In [4]:
X.shape

(32667, 10)

In [5]:
X_train = X[100:]
y_train = y[100:]
X_valid = X[:100]
y_valid = y[:100]

In [6]:
import collections
collections.Counter(list(y_train))

Counter({0: 17369, 1: 8685, 2: 4342, 3: 2171})

In [7]:
lgbm_params = {
    'learning_rate': 0.1,
    'num_leaves': 8,
    'boosting_type' : 'gbdt',
    'objective': 'multiclass',
    'num_class': 4,
}

def lgbm_train(X_train_df, X_valid_df, y_train_df, y_valid_df, lgbm_params):
    lgb_train = lgb.Dataset(X_train_df, y_train_df)
    lgb_eval = lgb.Dataset(X_valid_df, y_valid_df, reference=lgb_train)

    # 上記のパラメータでモデルを学習する
    model = lgb.train(lgbm_params, lgb_train,
                      # モデルの評価用データを渡す
                      valid_sets=lgb_eval,
                      # 最大で 1000 ラウンドまで学習する
                      num_boost_round=1000,
                      # 10 ラウンド経過しても性能が向上しないときは学習を打ち切る
                      early_stopping_rounds=10)
    
    return model

In [8]:
model = lgbm_train(X_train, X_valid, y_train, y_valid, lgbm_params)

[1]	valid_0's multi_logloss: 1.35302
Training until validation scores don't improve for 10 rounds.
[2]	valid_0's multi_logloss: 1.32463
[3]	valid_0's multi_logloss: 1.30028
[4]	valid_0's multi_logloss: 1.27931
[5]	valid_0's multi_logloss: 1.2612
[6]	valid_0's multi_logloss: 1.24547
[7]	valid_0's multi_logloss: 1.2318
[8]	valid_0's multi_logloss: 1.21991
[9]	valid_0's multi_logloss: 1.20953
[10]	valid_0's multi_logloss: 1.20044
[11]	valid_0's multi_logloss: 1.19252
[12]	valid_0's multi_logloss: 1.18556
[13]	valid_0's multi_logloss: 1.17948
[14]	valid_0's multi_logloss: 1.17414
[15]	valid_0's multi_logloss: 1.16946
[16]	valid_0's multi_logloss: 1.16539
[17]	valid_0's multi_logloss: 1.16178
[18]	valid_0's multi_logloss: 1.15861
[19]	valid_0's multi_logloss: 1.15585
[20]	valid_0's multi_logloss: 1.15346
[21]	valid_0's multi_logloss: 1.15135
[22]	valid_0's multi_logloss: 1.14947
[23]	valid_0's multi_logloss: 1.1478
[24]	valid_0's multi_logloss: 1.14634
[25]	valid_0's multi_logloss: 1.14508


In [9]:
numbers = np.arange(1, 101)
X_test = np.transpose(binary_encode(numbers, NUM_DIGITS))

In [10]:
y_pred = model.predict(X_test, num_iteration=model.best_iteration)
y_pred_max = np.argmax(y_pred, axis=1)

In [11]:
y_pred_max

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0])

In [12]:
output = np.vectorize(fizz_buzz)(numbers, y_pred_max)

In [13]:
output

array(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13',
       '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24',
       '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35',
       '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46',
       '47', '48', '49', '50', '51', '52', '53', '54', '55', '56', '57',
       '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68',
       '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79',
       '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90',
       '91', '92', '93', '94', '95', '96', '97', '98', '99', '100'],
      dtype='<U3')