In [2]:
import biosppy.signals.ecg as ecg
import lightgbm as lgb
import numpy as np
import xgboost as xgb
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you won't need to install the gcc compiler anymore.
Instead of that, you'll need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [16]:
X_add = np.loadtxt('data/otherfeature_complete.csv', delimiter=',', skiprows=1, usecols=range(1,21))

In [12]:
X = np.loadtxt('data/rpeakfeature.csv', delimiter=',', skiprows=1, usecols=range(1,9))
y = np.loadtxt('data/y_train.csv', delimiter=',', skiprows=1, usecols=range(1,2))

In [28]:
newX = []
for i in range(len(X)):
     newX.append(np.concatenate([X[i], X_add[i]]))

In [32]:
# train_X = np.array(X[:int(len(X)*0.7)])
# train_Y = np.array(y[:int(len(X)*0.7)])
# eval_X = np.array(X[int(len(X)*0.7):])
# eval_Y = np.array(y[int(len(X)*0.7):])
train_X, eval_X, train_Y, eval_Y = train_test_split(newX, y, test_size=0.33, random_state=42)
xg_train = xgb.DMatrix(train_X, label=train_Y)
xg_eval = xgb.DMatrix(eval_X, label=eval_Y)

# setup parameters for xgboost
param = {}
# use softmax multi-class classification
param['objective'] = 'multi:softmax'
# scale weight of positive examples
param['eta'] = 0.2
param['gamma'] = 1.0
param['max_depth'] = 6
param['silent'] = 1
param['subsample'] = 0.8
param['colsample_bytree'] = 0.9
param['min_child_weight'] = 20
param['num_class'] = 4


watchlist = [(xg_train, 'train'), (xg_eval, 'eval')]
num_round = 60
bst = xgb.train(param, 
                xg_train, 
                num_round, 
                watchlist, 
                feval=lambda y,t: ("f1", f1_score(y, t.get_label(), average='micro')))

# get prediction
pred = bst.predict(xg_eval)
# error_rate = np.sum(pred != test_Y) / test_Y.shape[0]
F1 = f1_score(eval_Y, pred, average='micro')
print('Test error using softmax = {}'.format(F1))


[0]	train-merror:0.294049	eval-merror:0.330965	train-f1:0.705951	eval-f1:0.669035
[1]	train-merror:0.248833	eval-merror:0.290705	train-f1:0.751167	eval-f1:0.709295
[2]	train-merror:0.237165	eval-merror:0.294849	train-f1:0.762835	eval-f1:0.705151
[3]	train-merror:0.226663	eval-merror:0.280639	train-f1:0.773337	eval-f1:0.719361
[4]	train-merror:0.217328	eval-merror:0.271166	train-f1:0.782672	eval-f1:0.728834
[5]	train-merror:0.214702	eval-merror:0.271758	train-f1:0.785298	eval-f1:0.728242
[6]	train-merror:0.209452	eval-merror:0.261693	train-f1:0.790548	eval-f1:0.738307
[7]	train-merror:0.205076	eval-merror:0.262877	train-f1:0.794924	eval-f1:0.737123
[8]	train-merror:0.204492	eval-merror:0.259325	train-f1:0.795508	eval-f1:0.740675
[9]	train-merror:0.196908	eval-merror:0.256365	train-f1:0.803092	eval-f1:0.743635
[10]	train-merror:0.187573	eval-merror:0.25222	train-f1:0.812427	eval-f1:0.74778
[11]	train-merror:0.186406	eval-merror:0.251036	train-f1:0.813594	eval-f1:0.748964
[12]	train-merro

In [33]:
test_X = np.loadtxt('data/rpeakfeature_test.csv', delimiter=',', skiprows=1, usecols=range(1,9))

In [36]:
test_X_add = np.loadtxt('data/test_otherfeature_complete.csv', delimiter=',', skiprows=1, usecols=range(1,21))

In [37]:
test_X_new = []
for i in range(len(test_X)):
     test_X_new.append(np.concatenate([test_X[i], test_X_add[i]]))

In [40]:
xg_test = xgb.DMatrix(test_X_new)
y_pred = bst.predict(xg_test)
f = open("submission.csv", "w")
f.write("id,y\n")
for i,x in enumerate(y_pred):
    f.write("{},{}\n".format(i,y_pred[i]))
f.close()