In [1]:
import pandas as pd

df = pd.read_csv('iris.csv')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [2]:
labels = df['species'].unique().tolist()
print(labels)

['setosa', 'versicolor', 'virginica']


In [3]:
df['label'] = df['species'].apply(lambda x:labels.index(x)) 
df.sample(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,label
92,5.8,2.6,4.0,1.2,versicolor,1
35,5.0,3.2,1.2,0.2,setosa,0
61,5.9,3.0,4.2,1.5,versicolor,1
71,6.1,2.8,4.0,1.3,versicolor,1
129,7.2,3.0,5.8,1.6,virginica,2


In [4]:
df.shape

(150, 6)

In [5]:
print(df.shape[0])

150


In [6]:
x_columns = ['sepal_length','sepal_width','petal_length','petal_width']
y_columns = ['label']


In [8]:
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


x = df[x_columns]
y = df[y_columns]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=6)
x_train.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
95,5.7,3.0,4.2,1.2
49,5.0,3.3,1.4,0.2
17,5.1,3.5,1.4,0.3
103,6.3,2.9,5.6,1.8
71,6.1,2.8,4.0,1.3


In [10]:
lgb_train = lgb.Dataset(x_train, y_train)
# specify your configurations as a dic
params = {
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': {'l2', 'l1'},
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': -1
}

print('Starting training...')
# train
gbm = lgb.train(params, lgb_train)
gbm.save_model('model2.bst')

Starting training...


<lightgbm.basic.Booster at 0x7fe023ac5d00>

In [11]:
print('Starting predicting...')
# predict
y_pred = gbm.predict(x_test, num_iteration=gbm.best_iteration)
print(y_pred)

# eval
rmse_test = mean_squared_error(y_test, y_pred) ** 0.5
print(f'The RMSE of prediction is: {rmse_test}')

Starting predicting...
[-0.03151512  2.00980354 -0.03151512 -0.02001995  1.76678096  1.03989302
  1.31833273 -0.01778496  2.00980354  1.05113177  1.69548143  1.03912075
  1.99763405  2.0259589   0.98571954  1.07531014  2.0259589   0.85892188
  1.03347305 -0.01434713  0.02374981  2.01213831  0.43587068 -0.01778496
  1.05687502  1.06759109  0.9969583   2.0259589   0.01362066  0.8023872
  0.02571026  1.06558206 -0.00645296  0.02571026  1.22574072  2.0259589
  1.03989302  1.78293632]
The RMSE of prediction is: 0.2508680061926446
