In [1]:
import lightgbm as lgb
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.datasets import  make_classification

In [2]:
iris = load_iris()
data=iris.data
target = iris.target
X_train,X_test,y_train,y_test =train_test_split(data,target,test_size=0.2)

In [3]:
gbm = lgb.LGBMRegressor(objective='regression',num_leaves=31,learning_rate=0.05,n_estimators=20)

In [4]:
gbm.fit(X_train, y_train,eval_set=[(X_test, y_test)],eval_metric='l1',early_stopping_rounds=5)

[1]	valid_0's l2: 0.705124	valid_0's l1: 0.741123
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's l2: 0.644377	valid_0's l1: 0.70894
[3]	valid_0's l2: 0.589432	valid_0's l1: 0.678366
[4]	valid_0's l2: 0.539728	valid_0's l1: 0.649321
[5]	valid_0's l2: 0.494761	valid_0's l1: 0.621728
[6]	valid_0's l2: 0.454074	valid_0's l1: 0.595515
[7]	valid_0's l2: 0.417255	valid_0's l1: 0.570613
[8]	valid_0's l2: 0.380118	valid_0's l1: 0.545885
[9]	valid_0's l2: 0.349944	valid_0's l1: 0.523345
[10]	valid_0's l2: 0.319344	valid_0's l1: 0.500954
[11]	valid_0's l2: 0.294608	valid_0's l1: 0.480551
[12]	valid_0's l2: 0.269374	valid_0's l1: 0.460277
[13]	valid_0's l2: 0.24909	valid_0's l1: 0.441807
[14]	valid_0's l2: 0.230724	valid_0's l1: 0.424261
[15]	valid_0's l2: 0.212082	valid_0's l1: 0.406808
[16]	valid_0's l2: 0.196957	valid_0's l1: 0.390942
[17]	valid_0's l2: 0.18154	valid_0's l1: 0.376096
[18]	valid_0's l2: 0.169081	valid_0's l1: 0.361703
[19]	valid_0's l2: 0.155533	valid

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
       importance_type='split', learning_rate=0.05, max_depth=-1,
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=20, n_jobs=-1, num_leaves=31, objective='regression',
       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [5]:
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)

In [6]:
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)

The rmse of prediction is: 0.38086186779531483


In [7]:
print('Feature importances:', list(gbm.feature_importances_))

Feature importances: [22, 4, 23, 17]


In [8]:
estimator = lgb.LGBMRegressor(num_leaves=31)

In [9]:
param_grid = {
    'learning_rate': [0.01, 0.1, 1],
    'n_estimators': [20, 40]
}

In [10]:
gbm = GridSearchCV(estimator, param_grid)

In [11]:
gbm.fit(X_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
       importance_type='split', learning_rate=0.1, max_depth=-1,
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'learning_rate': [0.01, 0.1, 1], 'n_estimators': [20, 40]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [12]:
print('Best parameters found by grid search are:', gbm.best_params_)

Best parameters found by grid search are: {'learning_rate': 0.1, 'n_estimators': 40}


In [13]:
X_train.shape

(120, 4)

In [14]:
X_train[0]

array([5.4, 3. , 4.5, 1.5])

In [15]:
y_train

array([1, 1, 1, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 1, 0, 2, 1,
       2, 1, 2, 0, 0, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 0, 0, 1, 1, 0,
       2, 0, 2, 0, 2, 1, 2, 0, 1, 2, 2, 1, 0, 1, 1, 2, 1, 2, 0, 0, 1, 1,
       1, 2, 0, 1, 0, 1, 1, 0, 0, 2, 2, 0, 2, 1, 1, 1, 2, 0, 0, 1, 0, 2,
       0, 0, 2, 0, 1, 2, 1, 0, 1, 0, 2, 2, 1, 1, 1, 2, 0, 1, 2, 2, 2, 2,
       2, 2, 1, 1, 2, 0, 1, 0, 2, 1])

In [16]:
y_train.shape

(120,)

In [18]:
from keras.utils import to_categorical

In [19]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [22]:
y_train[0]

array([0., 1., 0.], dtype=float32)

In [32]:
from keras.callbacks import TensorBoard

In [17]:
import keras
from keras import Model
from keras.models import Sequential
from keras.layers import Dense, Activation

In [41]:
model = Sequential()
model.add(Dense(32, input_shape=(4,),activation='relu'))
model.add(Dense(32,activation='relu'))
model.add(Dense(32,activation='relu'))
model.add(Dense(32,activation='relu'))
model.add(Dense(3, activation='softmax'))

In [42]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [43]:
model.fit(X_train, y_train, epochs=20, batch_size=32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fc41a0c6550>

In [48]:
X_train[0].shape

(4,)

In [50]:
y_train[0]

array([0., 1., 0.], dtype=float32)

In [49]:
model.predict(X_train[0:1])

array([[0.01603369, 0.52876365, 0.45520264]], dtype=float32)

In [51]:
model.predict(X_test)

array([[3.0017562e-02, 6.7974997e-01, 2.9023251e-01],
       [2.7794661e-02, 7.0236915e-01, 2.6983622e-01],
       [4.4556404e-03, 2.8829730e-01, 7.0724708e-01],
       [8.6175367e-02, 7.8270656e-01, 1.3111809e-01],
       [2.2785461e-03, 2.6520711e-01, 7.3251438e-01],
       [9.8114729e-01, 1.8476533e-02, 3.7616325e-04],
       [9.8380196e-01, 1.5915118e-02, 2.8284747e-04],
       [2.6922335e-03, 1.7234580e-01, 8.2496190e-01],
       [2.1314460e-03, 1.4464018e-01, 8.5322839e-01],
       [9.7779435e-01, 2.1683944e-02, 5.2178651e-04],
       [1.0960707e-03, 1.1860984e-01, 8.8029402e-01],
       [9.8423612e-01, 1.5496837e-02, 2.6703480e-04],
       [1.4674448e-03, 1.8161945e-01, 8.1691313e-01],
       [3.7191235e-02, 7.5317854e-01, 2.0963027e-01],
       [9.9142390e-01, 8.4697967e-03, 1.0633280e-04],
       [4.3053970e-02, 7.1285373e-01, 2.4409230e-01],
       [9.7326332e-01, 2.6100431e-02, 6.3625234e-04],
       [9.8126721e-01, 1.8391557e-02, 3.4123610e-04],
       [3.5606246e-03, 2.598

In [53]:
import numpy as np

In [57]:
xdata = [np.argmax(one_hot)for one_hot in model.predict(X_test)]

In [58]:
data

[1,
 1,
 1,
 1,
 2,
 0,
 0,
 2,
 2,
 0,
 2,
 0,
 2,
 1,
 0,
 1,
 0,
 0,
 2,
 0,
 0,
 1,
 0,
 0,
 2,
 2,
 2,
 2,
 0,
 0]

In [56]:
data = [np.argmax(one_hot)for one_hot in y_test]

In [59]:
for x,y in zip(data,xdata):
    if x != y:
        print("x =",x,"y =",y)

x = 1 y = 2
