http://joelgrus.com/2016/05/23/fizz-buzz-in-tensorflow/

In [1]:
import numpy as np
import lightgbm as lgb

In [2]:
def binary_encode(i, num_digits):
    return np.array([i >> d & 1 for d in range(num_digits)])

def fizz_buzz_encode(i):
    if   i % 15 == 0: return 3
    elif i % 5  == 0: return 2
    elif i % 3  == 0: return 1
    else: return 0

def fizz_buzz(i, prediction):
    return [str(i), "fizz", "buzz", "fizzbuzz"][prediction]


In [3]:
NUM_DIGITS = 10
NUM_DATA = 20
X = np.array([binary_encode(i, NUM_DIGITS) for i in range(101, 2 ** NUM_DATA)])
y = np.array([fizz_buzz_encode(i) for i in range(101, 2 ** NUM_DATA)])

In [4]:
X.shape

(1048475, 10)

In [5]:
X[0], y[0]

(array([1, 0, 1, 0, 0, 1, 1, 0, 0, 0]), 0)

In [6]:
X_train = X[100:]
y_train = y[100:]
X_valid = X[:100]
y_valid = y[:100]

In [7]:
import collections
c_train = collections.Counter(list(y_train))
c_valid = collections.Counter(list(y_valid))
c_train, c_valid

(Counter({0: 559133, 1: 279567, 2: 139783, 3: 69892}),
 Counter({0: 54, 1: 26, 2: 13, 3: 7}))

# Weight

In [8]:
rate = []
for i in range(4):
    rate.append(c_train[0]/c_train[i])
    print(rate[i])

1.0
1.9999964230399152
4.0000071539457585
7.999957076632518


In [9]:
def weights(n):
    return(rate[n])

w = [weights(i) for i in list(y_train)]

In [10]:
lgbm_params = {
    'boosting_type' : 'gbdt',
    'objective': 'multiclass',
    'num_class': 4,
    'learning_rate': 0.05,
    'min_child_samples': 5,  # Minimum number of data need in a child(min_data_in_leaf)
    'subsample': 0.9,  # Subsample ratio of the training instance.
}

def lgbm_train(X_train_df, X_valid_df, y_train_df, y_valid_df, lgbm_params):
    lgb_train = lgb.Dataset(X_train_df, y_train_df, weight=w)
    lgb_eval = lgb.Dataset(X_valid_df, y_valid_df, reference=lgb_train)

    # 上記のパラメータでモデルを学習する
    model = lgb.train(lgbm_params, lgb_train,
                      # モデルの評価用データを渡す
                      valid_sets=lgb_eval,
                      # 最大で 1000 ラウンドまで学習する
                      num_boost_round=1000,
                      # 10 ラウンド経過しても性能が向上しないときは学習を打ち切る
                      early_stopping_rounds=10)
    
    return model

In [11]:
model = lgbm_train(X_train, X_valid, y_train, y_valid, lgbm_params)

[1]	valid_0's multi_logloss: 1.3863
Training until validation scores don't improve for 10 rounds.
[2]	valid_0's multi_logloss: 1.3863
[3]	valid_0's multi_logloss: 1.3863
[4]	valid_0's multi_logloss: 1.38631
[5]	valid_0's multi_logloss: 1.38631
[6]	valid_0's multi_logloss: 1.38632
[7]	valid_0's multi_logloss: 1.38632
[8]	valid_0's multi_logloss: 1.38632
[9]	valid_0's multi_logloss: 1.38632
[10]	valid_0's multi_logloss: 1.38632
[11]	valid_0's multi_logloss: 1.38633
Early stopping, best iteration is:
[1]	valid_0's multi_logloss: 1.3863


In [12]:
numbers = np.arange(1, 101)
X_test = np.transpose(binary_encode(numbers, NUM_DIGITS))

In [13]:
y_pred = model.predict(X_test, num_iteration=model.best_iteration)
y_pred_max = np.argmax(y_pred, axis=1)

In [14]:
y_pred

array([[ 0.24999273,  0.24999254,  0.24999251,  0.25002222],
       [ 0.2500065 ,  0.25000632,  0.25000628,  0.2499809 ],
       [ 0.24999273,  0.24999254,  0.24999251,  0.25002222],
       [ 0.25000065,  0.24999931,  0.250001  ,  0.24999904],
       [ 0.24999965,  0.24999964,  0.25000133,  0.24999938],
       [ 0.25000065,  0.24999931,  0.250001  ,  0.24999904],
       [ 0.24999965,  0.24999964,  0.25000133,  0.24999938],
       [ 0.24999733,  0.2500085 ,  0.24999711,  0.24999705],
       [ 0.25000044,  0.2499992 ,  0.25000021,  0.25000015],
       [ 0.24999632,  0.25001156,  0.24999609,  0.24999603],
       [ 0.24999766,  0.25000752,  0.24999744,  0.24999738],
       [ 0.24999777,  0.25000778,  0.24999812,  0.24999633],
       [ 0.24999987,  0.24999881,  0.25000155,  0.24999977],
       [ 0.24999676,  0.25001084,  0.2499971 ,  0.24999531],
       [ 0.2499971 ,  0.25000713,  0.24999878,  0.24999699],
       [ 0.24999261,  0.24999242,  0.24999239,  0.25002258],
       [ 0.25000674,  0.

In [15]:
y_pred_max

array([3, 0, 3, 2, 2, 2, 2, 1, 0, 1, 1, 1, 2, 1, 1, 3, 0, 3, 0, 2, 2, 2, 2,
       1, 0, 1, 1, 1, 2, 1, 1, 0, 3, 0, 3, 0, 0, 2, 2, 1, 1, 1, 0, 1, 1, 2,
       2, 3, 0, 3, 0, 0, 2, 2, 0, 1, 1, 1, 0, 1, 1, 2, 0, 0, 3, 0, 3, 2, 2,
       2, 2, 1, 0, 1, 1, 1, 2, 2, 1, 3, 0, 3, 0, 0, 2, 2, 2, 1, 0, 1, 1, 1,
       2, 1, 1, 0, 3, 0, 3, 1])

In [16]:
y_test = np.array([fizz_buzz_encode(i) for i in range(1, 101)])
y_test

array([0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0,
       1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0,
       0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1,
       2, 0, 1, 0, 0, 3, 0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3, 0, 0,
       1, 0, 2, 1, 0, 0, 1, 2])

In [17]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred_max)

0.25

In [18]:
output = np.vectorize(fizz_buzz)(numbers, y_pred_max)
output

array(['fizzbuzz', '2', 'fizzbuzz', 'buzz', 'buzz', 'buzz', 'buzz', 'fizz',
       '9', 'fizz', 'fizz', 'fizz', 'buzz', 'fizz', 'fizz', 'fizzbuzz',
       '17', 'fizzbuzz', '19', 'buzz', 'buzz', 'buzz', 'buzz', 'fizz',
       '25', 'fizz', 'fizz', 'fizz', 'buzz', 'fizz', 'fizz', '32',
       'fizzbuzz', '34', 'fizzbuzz', '36', '37', 'buzz', 'buzz', 'fizz',
       'fizz', 'fizz', '43', 'fizz', 'fizz', 'buzz', 'buzz', 'fizzbuzz',
       '49', 'fizzbuzz', '51', '52', 'buzz', 'buzz', '55', 'fizz', 'fizz',
       'fizz', '59', 'fizz', 'fizz', 'buzz', '63', '64', 'fizzbuzz', '66',
       'fizzbuzz', 'buzz', 'buzz', 'buzz', 'buzz', 'fizz', '73', 'fizz',
       'fizz', 'fizz', 'buzz', 'buzz', 'fizz', 'fizzbuzz', '81',
       'fizzbuzz', '83', '84', 'buzz', 'buzz', 'buzz', 'fizz', '89',
       'fizz', 'fizz', 'fizz', 'buzz', 'fizz', 'fizz', '96', 'fizzbuzz',
       '98', 'fizzbuzz', 'fizz'],
      dtype='<U8')

# Sampling

In [19]:
from imblearn.datasets import make_imbalance

X_r, y_r = make_imbalance(X, y,
                      sampling_strategy={0: c_train[3], 1: c_train[3], 2: c_train[3], 3: c_train[3]},
                      random_state=123)

In [20]:
lgbm_params = {
    'boosting_type' : 'gbdt',
    'objective': 'multiclass',
    'num_class': 4,
    'learning_rate': 0.05,
#     'min_child_samples': 5,  # Minimum number of data need in a child(min_data_in_leaf)
    'subsample': 0.9,  # Subsample ratio of the training instance.
}

def lgbm_train(X_train_df, X_valid_df, y_train_df, y_valid_df, lgbm_params):
    lgb_train = lgb.Dataset(X_train_df, y_train_df, weight=None)
    lgb_eval = lgb.Dataset(X_valid_df, y_valid_df, reference=lgb_train)

    # 上記のパラメータでモデルを学習する
    model = lgb.train(lgbm_params, lgb_train,
                      # モデルの評価用データを渡す
                      valid_sets=lgb_eval,
                      # 最大で 1000 ラウンドまで学習する
                      num_boost_round=1000,
                      # 10 ラウンド経過しても性能が向上しないときは学習を打ち切る
                      early_stopping_rounds=10)
    
    return model

In [21]:
model = lgbm_train(X_r, X_valid, y_r, y_valid, lgbm_params)

[1]	valid_0's multi_logloss: 1.38624
Training until validation scores don't improve for 10 rounds.
[2]	valid_0's multi_logloss: 1.38621
[3]	valid_0's multi_logloss: 1.38612
[4]	valid_0's multi_logloss: 1.3861
[5]	valid_0's multi_logloss: 1.38609
[6]	valid_0's multi_logloss: 1.38605
[7]	valid_0's multi_logloss: 1.38603
[8]	valid_0's multi_logloss: 1.38605
[9]	valid_0's multi_logloss: 1.386
[10]	valid_0's multi_logloss: 1.38593
[11]	valid_0's multi_logloss: 1.38591
[12]	valid_0's multi_logloss: 1.38591
[13]	valid_0's multi_logloss: 1.386
[14]	valid_0's multi_logloss: 1.38597
[15]	valid_0's multi_logloss: 1.38588
[16]	valid_0's multi_logloss: 1.38595
[17]	valid_0's multi_logloss: 1.38585
[18]	valid_0's multi_logloss: 1.38584
[19]	valid_0's multi_logloss: 1.38573
[20]	valid_0's multi_logloss: 1.38564
[21]	valid_0's multi_logloss: 1.3856
[22]	valid_0's multi_logloss: 1.38555
[23]	valid_0's multi_logloss: 1.38546
[24]	valid_0's multi_logloss: 1.38542
[25]	valid_0's multi_logloss: 1.38537
[26

In [22]:
y_pred = model.predict(X_test, num_iteration=model.best_iteration)
y_pred_max = np.argmax(y_pred, axis=1)

In [23]:
accuracy_score(y_test, y_pred_max)

0.20000000000000001

In [24]:
output = np.vectorize(fizz_buzz)(numbers, y_pred_max)
output

array(['buzz', 'fizz', 'buzz', 'buzz', 'buzz', 'buzz', 'buzz', 'buzz',
       'buzz', 'buzz', 'buzz', 'buzz', 'fizz', 'buzz', 'fizz', '16',
       'buzz', 'buzz', 'buzz', 'buzz', '21', 'fizzbuzz', 'fizzbuzz', '24',
       'buzz', '26', '27', 'buzz', '29', '30', 'fizz', '32', '33', '34',
       'fizz', 'fizzbuzz', '37', 'buzz', '39', 'buzz', 'fizz', 'buzz',
       'fizz', '44', '45', '46', '47', 'fizzbuzz', 'fizz', 'fizzbuzz',
       'fizz', 'buzz', 'fizz', 'buzz', 'fizzbuzz', 'buzz', 'fizz', 'buzz',
       'fizz', 'buzz', 'fizz', 'buzz', 'fizz', 'fizz', 'buzz', 'fizz',
       'buzz', 'fizz', 'buzz', 'fizz', 'buzz', 'buzz', 'buzz', 'buzz',
       'buzz', 'buzz', 'fizz', 'buzz', 'fizz', 'buzz', 'fizzbuzz', 'buzz',
       'fizzbuzz', 'buzz', '85', 'buzz', 'buzz', 'buzz', 'fizz', 'buzz',
       'fizz', 'buzz', '93', 'buzz', 'fizz', 'buzz', 'fizz', 'buzz',
       'buzz', 'fizzbuzz'],
      dtype='<U8')