In [1]:
import numpy as np
import os
import random
import lightgbm as lgb

from os.path import join as pjoin
from collections import defaultdict
from functools import reduce

from keras.layers import Dense, Input, BatchNormalization, Activation
from keras.models import Model
from keras.optimizers import Adam, SGD, RMSprop
from keras.callbacks import ReduceLROnPlateau, LearningRateScheduler, EarlyStopping

from sklearn.svm import SVC

from hashlib import sha256

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.
Using TensorFlow backend.


In [2]:
base = 128
datadir = 'data'
dataset_size=1e6
filename = 'Base{}.txt'.format(base)
train_split=0.8
val_split=0.1
test_split=0.1
batch_size=500
lr = 1e-1

In [3]:
def get_data(preprocess=True, use_hash=True):
    data = []
    for row in open(pjoin(datadir, filename)):
        if len(row.split()) == 2:
            inp, target = row.split()
            if use_hash:
                int_val = int(inp, 2)
                bytes_val = int_val.to_bytes(16, 'little', signed=False)
                inp = bin(int(sha256(bytes_val).hexdigest(), 16))[2:]
                inp = ''.join(['0']*(256 - len(inp))) + inp
            data.append([np.array([int(x) if preprocess else float(-1 if x == '0' else 1) for x in inp]), float(target)])
    if preprocess: # Magick preprocessing
        new_data = []
        size = len(data)
        for x, y in data:
            s = np.zeros_like(x)
            lamb = reduce(lambda a, b: a^b, x, 0)
            for i in range(len(x)):
                s[i] = float(-1 if lamb == 1 else 1)
                lamb ^= x[i]
            new_data.append([s, y])
        data = new_data
    random.shuffle(data)
    train_count = int(dataset_size*train_split)
    val_count = int(dataset_size*val_split)
    test_count = int(dataset_size*test_split)
    return data[:train_count], data[train_count:train_count+val_count], \
           data[train_count+val_count:train_count+val_count+test_count]

In [4]:
train_data, val_data, test_data = get_data(preprocess=True, use_hash=False)
x_train, y_train = list(map(np.array, zip(*train_data)))
x_val, y_val = list(map(np.array, zip(*test_data)))
x_test, y_test = list(map(np.array, zip(*test_data)))

In [6]:
print(x_train[:10])

[[0. 0. 0. ... 0. 1. 0.]
 [1. 1. 1. ... 0. 1. 1.]
 [1. 0. 1. ... 1. 1. 0.]
 ...
 [1. 0. 1. ... 0. 1. 1.]
 [1. 0. 1. ... 1. 1. 1.]
 [1. 0. 0. ... 0. 0. 1.]]


In [7]:
def nn_model():
    optimizer = RMSprop(lr)
    inp = Input((base,))
    x = Dense(base//2, activation='relu')(inp)
    x = Dense(base//4, activation='relu')(x)
    x = Dense(1, activation='sigmoid')(x)
    model = Model(inp, x)
    model.compile(loss='binary_crossentropy', metrics=['binary_accuracy'], optimizer=optimizer)
    return model
    

In [8]:
def step_decay_schedule(initial_lr=1e-3, decay_factor=0.9, min_lr=1e-4):
    def schedule(epoch):
        return max(min_lr, initial_lr * (decay_factor ** (epoch)))
    
    return LearningRateScheduler(schedule)

In [9]:
def cosine_anneal_schedule(t, alpha_zero=1e-2):
    T, M = 80, 10
    cos_inner = np.pi * ((t+1) % (T // M))
    cos_inner /= T // M
    cos_out = np.cos(cos_inner) + 1
    return max(float(alpha_zero / 2 * cos_out), alpha_zero / 10)

In [10]:
model = nn_model()
early_stopping_callback = EarlyStopping(monitor='loss', patience=15, min_delta=1e-4)
lr_callback = LearningRateScheduler(cosine_anneal_schedule)

lr=1e-3
fl = False
callbacks = [early_stopping_callback, lr_callback] if fl else [early_stopping_callback]
model.fit(x_train, y_train,
        epochs=100,
        batch_size=10000,
        validation_data=(x_test, y_test),
        callbacks = callbacks, verbose=1)

Train on 800000 samples, validate on 100000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100

KeyboardInterrupt: 

In [10]:
def reset_lr(min_lr=1e-3, mult=0.99):
    def callback(env):
        last_lr = env.params['learning_rate']
        env.params['learning_rate'] = last_lr*mult if last_lr > min_lr else lr
    callback.before_iteration = True
    callback.order = 0
    return callback

In [7]:
def get_datasets():
    train_dataset = lgb.Dataset(x_train, label=y_train, \
                         feature_name=['c{}'.format(i) for i in range(2*base)], \
                         categorical_feature=['c{}'.format(i) for i in range(2*base)])
    val_dataset = lgb.Dataset(x_val, label=y_val, \
                             feature_name=['c{}'.format(i) for i in range(2*base)], \
                             categorical_feature=['c{}'.format(i) for i in range(2*base)])
    return train_dataset, val_dataset

lr=0.1

param = {'num_leaves': 2047, 
         'num_trees':1000, 
         'objective':'binary', 
         'learning_rate' : lr,
         'boosting': 'dart',
         'max_bin': 2047}
param['metric'] = ['auc', 'binary_logloss']

train_dataset, val_dataset = get_datasets()

num_round = 1000
bst = lgb.train(param, train_dataset, num_round, valid_sets=[val_dataset])#, \
                    #callbacks=[reset_lr(lr/0.1)], early_stopping_rounds=100)



[1]	valid_0's binary_logloss: 0.66565	valid_0's auc: 0.499484
[2]	valid_0's binary_logloss: 0.665788	valid_0's auc: 0.498389
[3]	valid_0's binary_logloss: 0.665905	valid_0's auc: 0.499506
[4]	valid_0's binary_logloss: 0.666022	valid_0's auc: 0.498854
[5]	valid_0's binary_logloss: 0.666175	valid_0's auc: 0.498084
[6]	valid_0's binary_logloss: 0.666335	valid_0's auc: 0.49762
[7]	valid_0's binary_logloss: 0.66656	valid_0's auc: 0.496647
[8]	valid_0's binary_logloss: 0.66644	valid_0's auc: 0.496736
[9]	valid_0's binary_logloss: 0.666642	valid_0's auc: 0.495592
[10]	valid_0's binary_logloss: 0.666778	valid_0's auc: 0.495528
[11]	valid_0's binary_logloss: 0.666913	valid_0's auc: 0.495569
[12]	valid_0's binary_logloss: 0.666792	valid_0's auc: 0.495295
[13]	valid_0's binary_logloss: 0.666945	valid_0's auc: 0.495151
[14]	valid_0's binary_logloss: 0.667064	valid_0's auc: 0.495034
[15]	valid_0's binary_logloss: 0.667176	valid_0's auc: 0.495447
[16]	valid_0's binary_logloss: 0.667263	valid_0's auc

KeyboardInterrupt: 

In [9]:
clf = SVC(kernel='poly', verbose=2)
clf.fit(x_train[:50000], y_train[:50000])

[LibSVM]

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='poly', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=2)

In [10]:
res = clf.predict(x_test[:10000])
print(1 - np.abs(np.array(res) - np.array(y_test[:10000])).sum() / len(y_test[:10000]))

0.5981000000000001


In [4]:
train_data, val_data, test_data = get_data(preprocess=False, use_hash=True)
x_train, y_train = list(map(np.array, zip(*train_data)))
x_val, y_val = list(map(np.array, zip(*test_data)))
x_test, y_test = list(map(np.array, zip(*test_data)))

In [7]:
def glorot_uniform(shape):
    limit = 1./(shape[0]+1)
    return np.random.uniform(-limit, limit, size=shape)

def sigmoid(x):                                        
    return 1 / (1 + np.exp(-x))

data=train_data
batch_size=100
gen_count=100
x, y = zip(*([random.choice(data) for _ in range(batch_size)]))

def get_random_W(shape=[256, 1]):
    return np.random.uniform(-1., 1., shape)

def mutate_W(W):
    return W + glorot_uniform(W.shape)

def get_random_generation(shape=[256, 1], count=100):
    return [get_random_W(shape) for _ in range(count)]

def mutate_generation(gen):
    return [mutate_W(W) for W in gen]

def fitness(W):
    return (-np.log(sigmoid(y*np.dot(x, W)))).sum()

def fitness_generation(gen):
    return [fitness(W) for W in gen]

gen = get_random_generation(count=gen_count)
gen = list(sorted(gen, key=fitness))
best_fit = fitness(gen[0])
best_W = gen[0]
print(best_fit)

while 1:
    new_gen = get_random_generation(count=gen_count)
    mutable_gen = mutate_generation(gen)
    gen = gen + new_gen + mutable_gen
    gen = list(sorted(gen, key=fitness))
    gen = gen[:int(0.75*gen_count)] + gen[int(-0.25*gen_count):]
    fit = fitness(gen[0])
    if fit < best_fit:
        best_fit = fit
        best_W = gen[0]
        print(best_fit)


17227.597538119793
17224.578364337853
16990.002201553805
16986.959354908606
16964.786487317513
16963.606681434474
16945.004348009355
16919.483325350422
16455.746300057974
16447.626269232736
16441.225462073744
16419.38148198875
16411.66474733617
16399.718979039142
16385.994537941948
16362.615995001644
16359.341725856448
16348.109221359675
16334.77894710577
16333.054257098484
16309.936770916727
16299.780927786309
16269.11030191036
16258.713266116822
16222.475743586177
16217.982701001278
16204.532277923354
16199.902334742808
15426.106585310992
15418.056512874258
15390.07206603875
15362.892409760481
15350.215450582986
15338.499013264716
15324.605001600075
15293.761346116049
15292.738539599675
15292.37375987031
15282.104377888376
15273.465967344979
15254.960538432819
15241.876634015896
15233.928231494909
15224.940675695201
15220.551318904356
15204.83592881941
15198.088102318008
15182.04021904036
15173.379655227329
15163.194950055144
15132.927579021218
15118.878445722039
15090.449227851072
1

10415.397885753584
10406.42452822269
10386.91996663858
10383.64562390386
10374.441329774725
10347.798164314638
10334.826321853603
10333.8106680312
10326.284142827615
10320.982379306348
10314.473937886114
10309.420709886264
10297.701450944154
10277.636366273679
10270.936307651495
10257.532456878354
10237.929348754406
10226.647128426297
10202.835248922966
10193.261576008099
10185.253128756303
10163.796344856122
10129.097217762743
10100.986989462663
10099.67358127822
10097.222916010795
10089.919110838016
10088.84313936341
10080.558596562943
10069.076712606271
10049.37016559481
10047.63950931663
10043.825616517432
10026.547982573575
10015.81506027211
10007.660180796896
9995.093705397316
9985.137399585816
9973.30604832045
9959.129252711396
9947.340446544245
9938.403790241722
9934.03368728963
9916.82267047308
9914.663948363906
9892.687097919574
9885.405969441344
9883.83074274231
9868.271132302118
9859.831483276812
9848.140459287859
9839.768867545494
9830.916627411581
9830.34804037229
9817.43

6041.802466889234
6027.564395485969
6017.243992147639
6016.043334103766
6011.753497698044
6010.5729784682635
6006.720995088364
5994.28602121483
5988.154439524854
5972.762159393118
5971.919868638419
5954.9839623312855
5949.46454577881
5943.948669939869
5936.711189357513
5933.3632559790385
5919.306241182788
5914.266771798964
5912.150658485593
5901.846039611997
5895.359620018357
5895.27387591735
5894.008480835552
5884.840866076091
5881.089062405737
5877.376885776466
5869.290068513847
5867.975319946119
5855.945918460824
5851.443774508554
5848.76332142013
5837.116532936052
5832.670196947102
5820.502039994139
5815.972347032987
5812.958793090032
5809.436602650951
5804.669180711594
5786.447910760856
5780.27421490102
5777.439108900098
5767.4872693265
5762.29173813799
5759.097259228716
5758.9297759656065
5747.07029809436
5735.425606168541
5717.009552362549
5716.78242261438
5712.763514786789
5707.5077126005035
5706.0832832120195
5697.679802637774
5697.581882375977
5694.746062995346
5685.046512350

3538.388944938849
3536.1719234893108
3535.155758982408
3530.9442133265743
3526.4975989863
3525.115673027469
3519.964308128952
3511.7117277331045
3505.5516818474484
3502.8488238628506
3502.6423478237366
3500.337078028183
3497.156610593081
3487.404464028409
3481.2978508440733
3476.2469326573837
3476.1430264761784
3466.584949635268
3463.214832337505
3459.637825518712
3457.6203675182564
3456.559157166921
3449.6224525909392
3446.7662947244025
3437.474411128341
3433.5005504902483
3431.6184762923403
3429.9375425965154
3424.330658979091
3417.2043202229565
3415.3651227534083
3412.166680319059
3407.986170202534
3401.2431201209574
3399.5359005524765
3395.0311131695867
3390.127633380068
3389.747365634043
3387.628004552478
3384.5843884799133
3383.1224516518914
3375.969230563109
3370.2681975691794
3367.6402775912343
3367.1646472462303
3361.663937249803
3358.3258619574435
3353.502022048155
3352.503455867611
3347.778631703617
3345.05357312692
3340.373135601957
3338.3855327226256
3338.306067475483
3331

2488.0584199666437
2487.3615349400225
2486.7831934038286
2486.315209197382
2484.4105687344395
2482.7831668510144
2482.2647192776426
2480.8783556287726
2479.757825618248
2479.5483663951977
2479.0159027502996
2478.4703696822817
2477.297356007669
2475.9830198626273
2474.610174200165
2473.3159266418606
2471.6032094187462
2471.1975814411217
2470.5657725098213
2470.148301283425
2469.8456708339695
2469.6460313940743
2467.8094825184317
2466.889078782494
2466.1126429044216
2464.401263937692
2464.3246374010478
2463.6435305032537
2463.191693267862
2461.848975557141
2461.485918735991
2460.7078301058564
2460.360315432773
2460.052139012115
2458.7366454963885
2457.9628836013444
2457.2403043592444
2456.9394601942445
2456.232580138168
2455.19941906441
2454.92638447461
2454.388300022914
2453.61323611407
2451.9059124246305
2451.5368776554837
2450.611374344744
2450.1171273855593
2449.5202495469453
2448.996415463543
2447.462505767795
2446.7551177372206
2446.633491136972
2445.5540202285215
2443.252416239444

2275.9939935930156
2275.722942259121
2275.6492891342536
2275.387217304962
2275.29801362027
2274.9559125085907
2274.67619441461
2274.620485793348
2274.5032623933957
2274.319321289734
2274.0513912349325
2273.886100424444
2273.63476381714
2273.20791446953
2272.9278007937737
2272.8665189491358
2272.8166991570138
2272.616614074317
2272.446233855563
2272.07132367134
2271.7723747407963
2271.7310669534377
2271.6727103506937
2271.615609942795
2271.465572311532
2271.1993415955876
2271.0678700181256
2270.915001981882
2270.488065532107
2270.266421880289
2269.8641464229995
2269.5723024574127
2269.3952755879936
2269.249499821915
2268.9766088359534
2268.6916143255357
2268.5720121540335
2268.3921794805256
2268.1925549668126
2267.9256239000697
2267.6887964409207
2267.512573564032
2267.322780687074
2267.176832017421
2267.0736251557846
2266.894361247522
2266.6864490162143
2266.5997539858354
2266.465223646595
2266.4646390122007
2266.300091411981
2266.0269430536764
2265.970043973418
2265.911530546381
2265.

2232.271370351243
2232.2605334323644
2232.25789171216
2232.1977977566557
2232.1472722022977
2232.09586430968
2232.0698209355287
2231.9793331122555
2231.978672393115
2231.962779479357
2231.9423042975104
2231.9236878817455
2231.871049000318
2231.841485684773
2231.812259174274
2231.7779060704197
2231.6993153332933
2231.6524601348738
2231.632002893012
2231.6063379927973
2231.5781309508293
2231.486108371038
2231.482008128888
2231.4435668471915
2231.4118800444026
2231.378403401044
2231.351333073159
2231.317332454102
2231.290578464547
2231.2724834812147
2231.248571854816
2231.202824385219
2231.183743487531
2231.1512018874937
2231.1280470021607
2231.071143868322
2231.0402742266306
2231.0271034909997
2230.974561532698
2230.943014648717
2230.9252789076586
2230.915784226188
2230.874775401846
2230.8186011089956
2230.8008192775246
2230.771397484278
2230.7286705454935
2230.654684319976
2230.6499197068
2230.6323060580958
2230.5743870350434
2230.566648368191
2230.5133597310323
2230.4722606526084
2230.

2222.388657319951
2222.3797847301325
2222.3725097743636
2222.364039522726
2222.3575372139803
2222.3436564201334
2222.3419000586846
2222.3354844213563
2222.3303619213143
2222.3050816167665
2222.2969434644033
2222.2784901102323
2222.274382484243
2222.265254063762
2222.252485988401
2222.2275939967926
2222.217809854963
2222.206691320756
2222.1990516271967
2222.1784772016135
2222.172737155881
2222.1630707504246
2222.1417683116906
2222.139003518581
2222.1263274553326
2222.121508716864
2222.104156225111
2222.099708166684
2222.08875995985
2222.0804242565173
2222.076532079068
2222.0490650649635
2222.041871303636
2222.0369876976806
2222.01677017619
2222.0152070619174
2222.0051491630165
2221.9907633635144
2221.977578167357
2221.9751089628817
2221.9528555208462
2221.9377196779265
2221.921777773904
2221.911140152588
2221.906447012341
2221.8926496143786
2221.8865853244897
2221.8781914049187
2221.8755464664973
2221.868328636935
2221.854129684627
2221.8480974656
2221.842086959996
2221.840089789037
222

2219.3334458939453
2219.3289322277515
2219.324643471162
2219.318480745187
2219.318262134451
2219.3166721753687
2219.3156578564276
2219.3120376451398
2219.305701333942
2219.302926226681
2219.301517708722
2219.2981533569778
2219.288845685465
2219.2866768105755
2219.2856609128016
2219.2856281894037
2219.2804350358765
2219.2728267379466
2219.2695382959014
2219.2695155585093
2219.2619642349023
2219.2604556874644
2219.256325177019
2219.2545808721347
2219.2523585164417
2219.247718381501
2219.246916443937
2219.24214589284
2219.2379515126513
2219.2342316876593
2219.2337016707893
2219.2310996420397
2219.227382036245
2219.2247784021747
2219.2234391909633
2219.2221921870823
2219.2168115877284
2219.2155819721906
2219.213641697522
2219.2121294257645
2219.207285727726
2219.205447201575
2219.2013105112724
2219.1999498235145
2219.1952836269543
2219.190664187747
2219.1901361852047
2219.186698290476
2219.1777475282997
2219.1765958354395
2219.1702936823963
2219.16682087109
2219.1662014887856
2219.16277613

2218.441489778529
2218.440521515031
2218.44040655095
2218.439056658853
2218.4380003763245
2218.437602411232
2218.4372828191476
2218.436366018288
2218.436124231731
2218.434859004665
2218.4338764179683
2218.433206354246
2218.4316043383583
2218.4312330550047
2218.429617641926
2218.4286397697942
2218.427507358601
2218.426817196522
2218.425682523123
2218.4247397935815
2218.4245227184956
2218.424126181485
2218.421174487981
2218.420621259652
2218.4197701257785
2218.4196581497995
2218.4189494495163
2218.4184660415794
2218.4169031956712
2218.416516586752
2218.415049978652
2218.4136989668414
2218.413247356556
2218.4123070957908
2218.4117848956885
2218.409508694892
2218.409085594409
2218.408027243713
2218.406266980787
2218.405797270503
2218.405796038683
2218.403862826653
2218.4020797604626
2218.400561851795
2218.4001847753034
2218.39945747835
2218.3977932213784
2218.397676525302
2218.397269189502
2218.3968022461004
2218.3960658954734
2218.3939124189583
2218.393540666033
2218.3930799007417
2218.39

2218.1886476275295
2218.188476662801
2218.1879764588643
2218.1877874198626
2218.187297687079
2218.187164732614
2218.186692805889
2218.18647937409
2218.186478069869
2218.1857362375345
2218.1851548599434
2218.1849179761043
2218.184766037227
2218.1847078728943
2218.184207855955
2218.184034184538
2218.183988906758
2218.1832620103155
2218.182790838121
2218.1822137194076
2218.1819895685485
2218.1818194764996
2218.1812507364184
2218.1808391448562
2218.180620690399
2218.1802755822314
2218.179894316754
2218.179386534765
2218.1790121453955
2218.1789108826747
2218.1785319753917
2218.1785248122287
2218.178293476153
2218.1778866265704
2218.177557411668
2218.177358877154
2218.1771852754323
2218.1769939227443
2218.1769731881786
2218.1764995054227
2218.176337871833
2218.175969053199
2218.1756496857943
2218.175591174649
2218.1755240814864
2218.1753762822414
2218.1748944237133
2218.1747535976783
2218.174476173752
2218.174436373919
2218.1741415197557
2218.173853225234
2218.1736962718974
2218.173677762692

2218.1066384193045
2218.1066128354214
2218.1065604025034
2218.1064894820856
2218.106383088483
2218.1063712484547
2218.1062491350845
2218.1061736948086
2218.106111516083
2218.1060159437548
2218.105977362498
2218.105802228953
2218.105683424499
2218.105527263527
2218.1054848169542
2218.105445636018
2218.1053453438053
2218.1053370514574
2218.1053072251298
2218.1051988087756
2218.105122855419
2218.1051100503373
2218.104944935925
2218.10479241473
2218.10467193042
2218.104545178486
2218.10444651508
2218.1043573892407
2218.104321274339
2218.1042780979005
2218.1041871740845
2218.104071800676
2218.103889750885
2218.103832838724
2218.1038044772163
2218.1036974799827
2218.1036412730195
2218.1035614482403
2218.1034786160667
2218.103413824788
2218.1033925609936
2218.1033032387804
2218.1031948708605
2218.1030463793363
2218.103030547767
2218.102951224319
2218.1029463348987
2218.102815371259
2218.1027380905393
2218.1026294902586
2218.102580678836
2218.1024787222996
2218.102377534588
2218.1023688975006


KeyboardInterrupt: 

In [9]:
-np.log(sigmoid(y*np.dot(x, best_W)))

array([[1.37889700e-13, 1.37889700e-13, 1.37889700e-13, ...,
        1.37889700e-13, 6.93147181e-01, 6.93147181e-01],
       [1.94164462e-10, 1.94164462e-10, 1.94164462e-10, ...,
        1.94164462e-10, 6.93147181e-01, 6.93147181e-01],
       [2.12787696e-06, 2.12787696e-06, 2.12787696e-06, ...,
        2.12787696e-06, 6.93147181e-01, 6.93147181e-01],
       ...,
       [4.67919774e-06, 4.67919774e-06, 4.67919774e-06, ...,
        4.67919774e-06, 6.93147181e-01, 6.93147181e-01],
       [1.95536095e-06, 1.95536095e-06, 1.95536095e-06, ...,
        1.95536095e-06, 6.93147181e-01, 6.93147181e-01],
       [1.73562916e-06, 1.73562916e-06, 1.73562916e-06, ...,
        1.73562916e-06, 6.93147181e-01, 6.93147181e-01]])