In [3]:
import biosppy.signals.ecg as ecg
import lightgbm as lgb
import numpy as np
import xgboost as xgb
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

In [12]:
X = np.loadtxt('data/rpeakfeature.csv', delimiter=',', skiprows=1, usecols=range(1,9))
y = np.loadtxt('data/y_train.csv', delimiter=',', skiprows=1, usecols=range(1,2))

In [27]:
# train_X = np.array(X[:int(len(X)*0.7)])
# train_Y = np.array(y[:int(len(X)*0.7)])
# eval_X = np.array(X[int(len(X)*0.7):])
# eval_Y = np.array(y[int(len(X)*0.7):])
train_X, eval_X, train_Y, eval_Y = train_test_split(X, y, test_size=0.2)
xg_train = xgb.DMatrix(train_X, label=train_Y)
xg_eval = xgb.DMatrix(eval_X, label=eval_Y)

# setup parameters for xgboost
param = {}
# use softmax multi-class classification
param['objective'] = 'multi:softmax'
# scale weight of positive examples
param['eta'] = 0.2
param['gamma'] = 1.0
param['max_depth'] = 6
param['silent'] = 1
param['subsample'] = 0.8
param['colsample_bytree'] = 0.9
param['min_child_weight'] = 20
param['num_class'] = 4


watchlist = [(xg_train, 'train'), (xg_eval, 'eval')]
num_round = 60
bst = xgb.train(param, 
                xg_train, 
                num_round, 
                watchlist, 
                feval=lambda y,t: ("f1", f1_score(y, t.get_label(), average='micro')))

# get prediction
pred = bst.predict(xg_eval)
# error_rate = np.sum(pred != test_Y) / test_Y.shape[0]
F1 = f1_score(eval_Y, pred, average='micro')
print('Test error using softmax = {}'.format(F1))


[0]	train-merror:0.26924	eval-merror:0.270508	train-f1:0.73076	eval-f1:0.729492
[1]	train-merror:0.260445	eval-merror:0.265625	train-f1:0.739555	eval-f1:0.734375
[2]	train-merror:0.24945	eval-merror:0.269531	train-f1:0.75055	eval-f1:0.730469
[3]	train-merror:0.244075	eval-merror:0.266602	train-f1:0.755925	eval-f1:0.733398
[4]	train-merror:0.242609	eval-merror:0.259766	train-f1:0.757391	eval-f1:0.740234
[5]	train-merror:0.239189	eval-merror:0.256836	train-f1:0.760811	eval-f1:0.743164
[6]	train-merror:0.236746	eval-merror:0.260742	train-f1:0.763254	eval-f1:0.739258
[7]	train-merror:0.232348	eval-merror:0.268555	train-f1:0.767652	eval-f1:0.731445
[8]	train-merror:0.229172	eval-merror:0.260742	train-f1:0.770828	eval-f1:0.739258
[9]	train-merror:0.229416	eval-merror:0.260742	train-f1:0.770584	eval-f1:0.739258
[10]	train-merror:0.227462	eval-merror:0.257812	train-f1:0.772538	eval-f1:0.742188
[11]	train-merror:0.225018	eval-merror:0.258789	train-f1:0.774982	eval-f1:0.741211
[12]	train-merror:

In [None]:
xg_test = xgb.DMatrix(test_X)
y_pred = bst.predict(xg_test)
f = open("submission.csv", "w")
f.write("id,y\n")
for i,x in enumerate(y_pred):
    f.write("{},{}\n".format(i,y_pred[i]))
f.close()