http://joelgrus.com/2016/05/23/fizz-buzz-in-tensorflow/

In [1]:
import numpy as np
import lightgbm as lgb

In [2]:
def binary_encode(i, num_digits):
    return np.array([i >> d & 1 for d in range(num_digits)])

def fizz_buzz_encode(i):
    if   i % 15 == 0: return 3
    elif i % 5  == 0: return 2
    elif i % 3  == 0: return 1
    else: return 0

def fizz_buzz(i, prediction):
    return [str(i), "fizz", "buzz", "fizzbuzz"][prediction]


In [3]:
NUM_DIGITS = 10
NUM_DATA = 12
X = np.array([binary_encode(i, NUM_DIGITS) for i in range(101, 2 ** NUM_DATA)])
y = np.array([fizz_buzz_encode(i) for i in range(101, 2 ** NUM_DATA)])

In [4]:
X.shape

(3995, 10)

In [5]:
X[0], y[0]

(array([1, 0, 1, 0, 0, 1, 1, 0, 0, 0]), 0)

In [6]:
X_train = X[100:]
y_train = y[100:]
X_valid = X[:100]
y_valid = y[:100]

In [7]:
import collections
c = collections.Counter(list(y_train))
c

Counter({0: 2077, 1: 1039, 2: 519, 3: 260})

In [8]:
rate = []
for i in range(4):
    rate.append(c[0]/c[i])
    print(rate[i])

1.0
1.9990375360923964
4.001926782273603
7.9884615384615385


In [9]:
def weights(n):
    return(rate[n])

w = [weights(i) for i in list(y_train)]

In [10]:
lgbm_params = {
    'boosting_type' : 'gbdt',
    'objective': 'multiclass',
    'num_class': 4,
    'learning_rate': 0.05,
#     'min_child_samples': 5,  # Minimum number of data need in a child(min_data_in_leaf)
    'subsample': 0.9,  # Subsample ratio of the training instance.
}

def lgbm_train(X_train_df, X_valid_df, y_train_df, y_valid_df, lgbm_params):
    lgb_train = lgb.Dataset(X_train_df, y_train_df, weight=w)
    lgb_eval = lgb.Dataset(X_valid_df, y_valid_df, reference=lgb_train)

    # 上記のパラメータでモデルを学習する
    model = lgb.train(lgbm_params, lgb_train,
                      # モデルの評価用データを渡す
                      valid_sets=lgb_eval,
                      # 最大で 1000 ラウンドまで学習する
                      num_boost_round=1000,
                      # 10 ラウンド経過しても性能が向上しないときは学習を打ち切る
                      early_stopping_rounds=10)
    
    return model

In [11]:
model = lgbm_train(X_train, X_valid, y_train, y_valid, lgbm_params)

[1]	valid_0's multi_logloss: 1.38614
Training until validation scores don't improve for 10 rounds.
[2]	valid_0's multi_logloss: 1.38663
[3]	valid_0's multi_logloss: 1.38717
[4]	valid_0's multi_logloss: 1.38791
[5]	valid_0's multi_logloss: 1.38823
[6]	valid_0's multi_logloss: 1.3886
[7]	valid_0's multi_logloss: 1.38918
[8]	valid_0's multi_logloss: 1.38899
[9]	valid_0's multi_logloss: 1.3882
[10]	valid_0's multi_logloss: 1.38796
[11]	valid_0's multi_logloss: 1.38807
Early stopping, best iteration is:
[1]	valid_0's multi_logloss: 1.38614


In [12]:
numbers = np.arange(1, 101)
X_test = np.transpose(binary_encode(numbers, NUM_DIGITS))

In [13]:
y_pred = model.predict(X_test, num_iteration=model.best_iteration)
y_pred_max = np.argmax(y_pred, axis=1)

In [14]:
y_pred

array([[ 0.24959967,  0.24954961,  0.24953958,  0.25131114],
       [ 0.24959967,  0.24954961,  0.24953958,  0.25131114],
       [ 0.24959967,  0.24954961,  0.24953958,  0.25131114],
       [ 0.24863578,  0.24833873,  0.25475665,  0.24826884],
       [ 0.24823994,  0.24846956,  0.25489086,  0.24839964],
       [ 0.25155656,  0.25125602,  0.24600211,  0.25118531],
       [ 0.25063177,  0.2508636 ,  0.24771164,  0.250793  ],
       [ 0.25097065,  0.252981  ,  0.25091023,  0.24513812],
       [ 0.24909929,  0.25044801,  0.24903932,  0.25141337],
       [ 0.25278928,  0.24756781,  0.25272842,  0.24691448],
       [ 0.2493372 ,  0.24973214,  0.24927717,  0.25165349],
       [ 0.24753015,  0.24926483,  0.25362379,  0.24958123],
       [ 0.24930207,  0.25093406,  0.25598144,  0.24378244],
       [ 0.25223375,  0.24677811,  0.24666434,  0.2543238 ],
       [ 0.25124469,  0.25475585,  0.24831741,  0.24568205],
       [ 0.24977664,  0.24972655,  0.24971651,  0.2507803 ],
       [ 0.24977664,  0.

In [15]:
y_pred_max

array([3, 3, 3, 2, 2, 0, 1, 1, 3, 0, 3, 2, 2, 3, 1, 3, 3, 3, 3, 2, 1, 0, 2,
       1, 3, 3, 3, 2, 3, 1, 3, 3, 3, 3, 3, 0, 1, 2, 2, 0, 3, 1, 3, 3, 1, 2,
       2, 3, 3, 3, 3, 0, 2, 2, 1, 3, 3, 3, 3, 1, 3, 2, 3, 3, 3, 3, 3, 2, 2,
       0, 1, 1, 3, 0, 3, 2, 2, 3, 1, 3, 3, 3, 3, 2, 0, 1, 2, 1, 3, 3, 3, 2,
       3, 1, 3, 3, 3, 3, 3, 1])

In [16]:
y_test = np.array([fizz_buzz_encode(i) for i in range(1, 101)])
y_test

array([0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0,
       1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0,
       0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1,
       2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0,
       1, 0, 2, 1, 0, 0, 1, 2])

In [17]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred_max)

0.11

In [18]:
output = np.vectorize(fizz_buzz)(numbers, y_pred_max)

In [19]:
output

array(['fizzbuzz', 'fizzbuzz', 'fizzbuzz', 'buzz', 'buzz', '6', 'fizz',
       'fizz', 'fizzbuzz', '10', 'fizzbuzz', 'buzz', 'buzz', 'fizzbuzz',
       'fizz', 'fizzbuzz', 'fizzbuzz', 'fizzbuzz', 'fizzbuzz', 'buzz',
       'fizz', '22', 'buzz', 'fizz', 'fizzbuzz', 'fizzbuzz', 'fizzbuzz',
       'buzz', 'fizzbuzz', 'fizz', 'fizzbuzz', 'fizzbuzz', 'fizzbuzz',
       'fizzbuzz', 'fizzbuzz', '36', 'fizz', 'buzz', 'buzz', '40',
       'fizzbuzz', 'fizz', 'fizzbuzz', 'fizzbuzz', 'fizz', 'buzz', 'buzz',
       'fizzbuzz', 'fizzbuzz', 'fizzbuzz', 'fizzbuzz', '52', 'buzz',
       'buzz', 'fizz', 'fizzbuzz', 'fizzbuzz', 'fizzbuzz', 'fizzbuzz',
       'fizz', 'fizzbuzz', 'buzz', 'fizzbuzz', 'fizzbuzz', 'fizzbuzz',
       'fizzbuzz', 'fizzbuzz', 'buzz', 'buzz', '70', 'fizz', 'fizz',
       'fizzbuzz', '74', 'fizzbuzz', 'buzz', 'buzz', 'fizzbuzz', 'fizz',
       'fizzbuzz', 'fizzbuzz', 'fizzbuzz', 'fizzbuzz', 'buzz', '85',
       'fizz', 'buzz', 'fizz', 'fizzbuzz', 'fizzbuzz', 'fizzbuzz', 'buzz',
 